Jelajahi Sumber

explain total_steps vs max_steps

Alexander Borzunov 3 tahun lalu
induk
melakukan
cdfca180bf
1 mengubah file dengan 2 tambahan dan 2 penghapusan
  1. 2 2
      examples/albert/arguments.py

+ 2 - 2
examples/albert/arguments.py

@@ -127,7 +127,7 @@ class AlbertTrainingArguments(TrainingArguments):
     gradient_accumulation_steps: int = 2
     seq_length: int = 512
 
-    total_steps: int = 125_000  # please note: this only affects the learning rate schedule
+    total_steps: int = 125_000  # the number of global optimizer and scheduler steps
     learning_rate: float = 0.00176
     warmup_steps: int = 5000
     adam_epsilon: float = 1e-6
@@ -148,4 +148,4 @@ class AlbertTrainingArguments(TrainingArguments):
 
     save_total_limit: int = 2
     save_steps: int = 500
-    max_steps: int = 10 ** 30  # meant as "peer should compute gradients forever"
+    max_steps: int = 10 ** 30  # the number of forward-backward passes before exit, defaults to "as many as you can"