| { | |
| "best_metric": 0.03607177734375, | |
| "best_model_checkpoint": "model_fewrel_1_5-task6/checkpoint-1260", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 2100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.051544189453125, | |
| "eval_rouge1": 97.5746, | |
| "eval_rouge2": 95.9962, | |
| "eval_rougeL": 96.8014, | |
| "eval_rougeLsum": 97.5759, | |
| "eval_runtime": 33.8471, | |
| "eval_samples_per_second": 33.09, | |
| "eval_steps_per_second": 1.034, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.038238525390625, | |
| "eval_rouge1": 97.0504, | |
| "eval_rouge2": 95.0089, | |
| "eval_rougeL": 96.0484, | |
| "eval_rougeLsum": 97.0773, | |
| "eval_runtime": 33.7575, | |
| "eval_samples_per_second": 33.178, | |
| "eval_steps_per_second": 1.037, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.15559855103492737, | |
| "learning_rate": 0.0008665259359149131, | |
| "loss": 0.0759, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.039306640625, | |
| "eval_rouge1": 97.8822, | |
| "eval_rouge2": 96.4368, | |
| "eval_rougeL": 97.1693, | |
| "eval_rougeLsum": 97.886, | |
| "eval_runtime": 33.8292, | |
| "eval_samples_per_second": 33.107, | |
| "eval_steps_per_second": 1.035, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.036468505859375, | |
| "eval_rouge1": 97.8707, | |
| "eval_rouge2": 96.3824, | |
| "eval_rougeL": 97.1565, | |
| "eval_rougeLsum": 97.88, | |
| "eval_runtime": 34.5353, | |
| "eval_samples_per_second": 32.431, | |
| "eval_steps_per_second": 1.013, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.021474618464708328, | |
| "learning_rate": 0.0005373650467932121, | |
| "loss": 0.0211, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.03961181640625, | |
| "eval_rouge1": 97.5277, | |
| "eval_rouge2": 95.8576, | |
| "eval_rougeL": 96.691, | |
| "eval_rougeLsum": 97.527, | |
| "eval_runtime": 33.9424, | |
| "eval_samples_per_second": 32.997, | |
| "eval_steps_per_second": 1.031, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.03607177734375, | |
| "eval_rouge1": 98.1371, | |
| "eval_rouge2": 96.8791, | |
| "eval_rougeL": 97.5059, | |
| "eval_rougeLsum": 98.173, | |
| "eval_runtime": 33.9961, | |
| "eval_samples_per_second": 32.945, | |
| "eval_steps_per_second": 1.03, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.040924072265625, | |
| "eval_rouge1": 98.1004, | |
| "eval_rouge2": 96.774, | |
| "eval_rougeL": 97.4333, | |
| "eval_rougeLsum": 98.1087, | |
| "eval_runtime": 34.4539, | |
| "eval_samples_per_second": 32.507, | |
| "eval_steps_per_second": 1.016, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 0.08763577789068222, | |
| "learning_rate": 0.00018825509907063325, | |
| "loss": 0.0103, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.03826904296875, | |
| "eval_rouge1": 97.8337, | |
| "eval_rouge2": 96.3488, | |
| "eval_rougeL": 97.096, | |
| "eval_rougeLsum": 97.8664, | |
| "eval_runtime": 34.3599, | |
| "eval_samples_per_second": 32.596, | |
| "eval_steps_per_second": 1.019, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.038909912109375, | |
| "eval_rouge1": 97.9644, | |
| "eval_rouge2": 96.525, | |
| "eval_rougeL": 97.2236, | |
| "eval_rougeLsum": 97.9585, | |
| "eval_runtime": 33.8333, | |
| "eval_samples_per_second": 33.103, | |
| "eval_steps_per_second": 1.034, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 9.523809523809524, | |
| "grad_norm": 0.19326545298099518, | |
| "learning_rate": 5.5845868874357386e-06, | |
| "loss": 0.0071, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.039276123046875, | |
| "eval_rouge1": 98.0097, | |
| "eval_rouge2": 96.6105, | |
| "eval_rougeL": 97.2833, | |
| "eval_rougeLsum": 98.0092, | |
| "eval_runtime": 33.8974, | |
| "eval_samples_per_second": 33.041, | |
| "eval_steps_per_second": 1.033, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3099168784384e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |