Michael Diskin 4 жил өмнө
parent
commit
b17cef6e9e

+ 1 - 2
examples/albert/run_trainer.py

@@ -77,10 +77,9 @@ def get_optimizer_and_scheduler(training_args, model):
         },
     ]
 
-    opt = AdamW(
+    opt = Adafactor(
         optimizer_grouped_parameters,
         lr=training_args.learning_rate,
-        betas=(training_args.adam_beta1, training_args.adam_beta2),
         eps=training_args.adam_epsilon,
         weight_decay=training_args.weight_decay,
     )