{ "best_metric": 0.03607177734375, "best_model_checkpoint": "model_fewrel_1_5-task6/checkpoint-1260", "epoch": 10.0, "eval_steps": 500, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.051544189453125, "eval_rouge1": 97.5746, "eval_rouge2": 95.9962, "eval_rougeL": 96.8014, "eval_rougeLsum": 97.5759, "eval_runtime": 33.8471, "eval_samples_per_second": 33.09, "eval_steps_per_second": 1.034, "step": 210 }, { "epoch": 2.0, "eval_loss": 0.038238525390625, "eval_rouge1": 97.0504, "eval_rouge2": 95.0089, "eval_rougeL": 96.0484, "eval_rougeLsum": 97.0773, "eval_runtime": 33.7575, "eval_samples_per_second": 33.178, "eval_steps_per_second": 1.037, "step": 420 }, { "epoch": 2.380952380952381, "grad_norm": 0.15559855103492737, "learning_rate": 0.0008665259359149131, "loss": 0.0759, "step": 500 }, { "epoch": 3.0, "eval_loss": 0.039306640625, "eval_rouge1": 97.8822, "eval_rouge2": 96.4368, "eval_rougeL": 97.1693, "eval_rougeLsum": 97.886, "eval_runtime": 33.8292, "eval_samples_per_second": 33.107, "eval_steps_per_second": 1.035, "step": 630 }, { "epoch": 4.0, "eval_loss": 0.036468505859375, "eval_rouge1": 97.8707, "eval_rouge2": 96.3824, "eval_rougeL": 97.1565, "eval_rougeLsum": 97.88, "eval_runtime": 34.5353, "eval_samples_per_second": 32.431, "eval_steps_per_second": 1.013, "step": 840 }, { "epoch": 4.761904761904762, "grad_norm": 0.021474618464708328, "learning_rate": 0.0005373650467932121, "loss": 0.0211, "step": 1000 }, { "epoch": 5.0, "eval_loss": 0.03961181640625, "eval_rouge1": 97.5277, "eval_rouge2": 95.8576, "eval_rougeL": 96.691, "eval_rougeLsum": 97.527, "eval_runtime": 33.9424, "eval_samples_per_second": 32.997, "eval_steps_per_second": 1.031, "step": 1050 }, { "epoch": 6.0, "eval_loss": 0.03607177734375, "eval_rouge1": 98.1371, "eval_rouge2": 96.8791, "eval_rougeL": 97.5059, "eval_rougeLsum": 98.173, "eval_runtime": 33.9961, "eval_samples_per_second": 32.945, "eval_steps_per_second": 1.03, "step": 1260 }, { "epoch": 7.0, "eval_loss": 0.040924072265625, "eval_rouge1": 98.1004, "eval_rouge2": 96.774, "eval_rougeL": 97.4333, "eval_rougeLsum": 98.1087, "eval_runtime": 34.4539, "eval_samples_per_second": 32.507, "eval_steps_per_second": 1.016, "step": 1470 }, { "epoch": 7.142857142857143, "grad_norm": 0.08763577789068222, "learning_rate": 0.00018825509907063325, "loss": 0.0103, "step": 1500 }, { "epoch": 8.0, "eval_loss": 0.03826904296875, "eval_rouge1": 97.8337, "eval_rouge2": 96.3488, "eval_rougeL": 97.096, "eval_rougeLsum": 97.8664, "eval_runtime": 34.3599, "eval_samples_per_second": 32.596, "eval_steps_per_second": 1.019, "step": 1680 }, { "epoch": 9.0, "eval_loss": 0.038909912109375, "eval_rouge1": 97.9644, "eval_rouge2": 96.525, "eval_rougeL": 97.2236, "eval_rougeLsum": 97.9585, "eval_runtime": 33.8333, "eval_samples_per_second": 33.103, "eval_steps_per_second": 1.034, "step": 1890 }, { "epoch": 9.523809523809524, "grad_norm": 0.19326545298099518, "learning_rate": 5.5845868874357386e-06, "loss": 0.0071, "step": 2000 }, { "epoch": 10.0, "eval_loss": 0.039276123046875, "eval_rouge1": 98.0097, "eval_rouge2": 96.6105, "eval_rougeL": 97.2833, "eval_rougeLsum": 98.0092, "eval_runtime": 33.8974, "eval_samples_per_second": 33.041, "eval_steps_per_second": 1.033, "step": 2100 } ], "logging_steps": 500, "max_steps": 2100, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3099168784384e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }