Преглед изворни кода

explain total_steps vs max_steps

Alexander Borzunov пре 3 година
родитељ
комит
cdfca180bf
1 измењених фајлова са 2 додато и 2 уклоњено
  1. 2 2
      examples/albert/arguments.py

+ 2 - 2
examples/albert/arguments.py

@@ -127,7 +127,7 @@ class AlbertTrainingArguments(TrainingArguments):
     gradient_accumulation_steps: int = 2
     seq_length: int = 512
 
-    total_steps: int = 125_000  # please note: this only affects the learning rate schedule
+    total_steps: int = 125_000  # the number of global optimizer and scheduler steps
     learning_rate: float = 0.00176
     warmup_steps: int = 5000
     adam_epsilon: float = 1e-6
@@ -148,4 +148,4 @@ class AlbertTrainingArguments(TrainingArguments):
 
     save_total_limit: int = 2
     save_steps: int = 500
-    max_steps: int = 10 ** 30  # meant as "peer should compute gradients forever"
+    max_steps: int = 10 ** 30  # the number of forward-backward passes before exit, defaults to "as many as you can"