{ "best_metric": 0.0484619140625, "best_model_checkpoint": "model_fewrel_1_6-task7/checkpoint-1470", "epoch": 10.0, "eval_steps": 500, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.1253662109375, "eval_rouge1": 93.102, "eval_rouge2": 89.1364, "eval_rougeL": 92.1128, "eval_rougeLsum": 93.0898, "eval_runtime": 30.5369, "eval_samples_per_second": 36.677, "eval_steps_per_second": 1.146, "step": 210 }, { "epoch": 2.0, "eval_loss": 0.07708740234375, "eval_rouge1": 94.6253, "eval_rouge2": 91.7196, "eval_rougeL": 93.9284, "eval_rougeLsum": 94.6153, "eval_runtime": 29.0183, "eval_samples_per_second": 38.596, "eval_steps_per_second": 1.206, "step": 420 }, { "epoch": 2.380952380952381, "grad_norm": 0.4770593047142029, "learning_rate": 0.0008665259359149131, "loss": 0.1042, "step": 500 }, { "epoch": 3.0, "eval_loss": 0.05633544921875, "eval_rouge1": 95.1246, "eval_rouge2": 92.2081, "eval_rougeL": 94.3701, "eval_rougeLsum": 95.1249, "eval_runtime": 28.82, "eval_samples_per_second": 38.862, "eval_steps_per_second": 1.214, "step": 630 }, { "epoch": 4.0, "eval_loss": 0.052490234375, "eval_rouge1": 95.9748, "eval_rouge2": 93.6071, "eval_rougeL": 95.3787, "eval_rougeLsum": 95.9622, "eval_runtime": 28.7568, "eval_samples_per_second": 38.947, "eval_steps_per_second": 1.217, "step": 840 }, { "epoch": 4.761904761904762, "grad_norm": 0.2157868593931198, "learning_rate": 0.0005373650467932121, "loss": 0.0397, "step": 1000 }, { "epoch": 5.0, "eval_loss": 0.052734375, "eval_rouge1": 96.4573, "eval_rouge2": 94.4045, "eval_rougeL": 95.96, "eval_rougeLsum": 96.4689, "eval_runtime": 28.2262, "eval_samples_per_second": 39.679, "eval_steps_per_second": 1.24, "step": 1050 }, { "epoch": 6.0, "eval_loss": 0.053009033203125, "eval_rouge1": 96.692, "eval_rouge2": 94.7143, "eval_rougeL": 96.2205, "eval_rougeLsum": 96.6725, "eval_runtime": 28.1396, "eval_samples_per_second": 39.802, "eval_steps_per_second": 1.244, "step": 1260 }, { "epoch": 7.0, "eval_loss": 0.0484619140625, "eval_rouge1": 96.2898, "eval_rouge2": 94.1342, "eval_rougeL": 95.7357, "eval_rougeLsum": 96.2989, "eval_runtime": 28.7901, "eval_samples_per_second": 38.902, "eval_steps_per_second": 1.216, "step": 1470 }, { "epoch": 7.142857142857143, "grad_norm": 0.13793426752090454, "learning_rate": 0.00018825509907063325, "loss": 0.0253, "step": 1500 }, { "epoch": 8.0, "eval_loss": 0.050872802734375, "eval_rouge1": 96.419, "eval_rouge2": 94.2908, "eval_rougeL": 95.887, "eval_rougeLsum": 96.431, "eval_runtime": 28.8197, "eval_samples_per_second": 38.862, "eval_steps_per_second": 1.214, "step": 1680 }, { "epoch": 9.0, "eval_loss": 0.050994873046875, "eval_rouge1": 96.5301, "eval_rouge2": 94.465, "eval_rougeL": 96.014, "eval_rougeLsum": 96.5445, "eval_runtime": 28.8499, "eval_samples_per_second": 38.822, "eval_steps_per_second": 1.213, "step": 1890 }, { "epoch": 9.523809523809524, "grad_norm": 0.44433069229125977, "learning_rate": 5.5845868874357386e-06, "loss": 0.0172, "step": 2000 }, { "epoch": 10.0, "eval_loss": 0.05072021484375, "eval_rouge1": 96.5339, "eval_rouge2": 94.4734, "eval_rougeL": 96.0133, "eval_rougeLsum": 96.5439, "eval_runtime": 28.8295, "eval_samples_per_second": 38.849, "eval_steps_per_second": 1.214, "step": 2100 } ], "logging_steps": 500, "max_steps": 2100, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3099168784384e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }