浏览代码

Update per_device_batch_sizes and shuffle_buffer_size

Aleksandr Borzunov 3 年之前
父节点
当前提交
c5493b9baf
共有 2 个文件被更改,包括 3 次插入3 次删除
  1. 2 2
      arguments.py
  2. 1 1
      data.py

+ 2 - 2
arguments.py

@@ -9,8 +9,8 @@ from transformers import TrainingArguments
 class HFTrainerArguments(TrainingArguments):
 class HFTrainerArguments(TrainingArguments):
     """Arguments for huggingface/transformers.Trainer"""
     """Arguments for huggingface/transformers.Trainer"""
     dataloader_num_workers: int = 1
     dataloader_num_workers: int = 1
-    per_device_train_batch_size: int = 1
-    per_device_eval_batch_size: int = 1
+    per_device_train_batch_size: int = 2
+    per_device_eval_batch_size: int = 2
     gradient_accumulation_steps: int = 1
     gradient_accumulation_steps: int = 1
     text_seq_length: int = 256
     text_seq_length: int = 256
 
 

+ 1 - 1
data.py

@@ -34,7 +34,7 @@ def preprocess_batch(batch, tokenizer, max_sequence_length: int):
 def make_dataset(
 def make_dataset(
     tokenizer,
     tokenizer,
     *,
     *,
-    shuffle_buffer_size: int = 10 ** 4,
+    shuffle_buffer_size: int = 8192,
     shuffle_seed: Optional[int],
     shuffle_seed: Optional[int],
     preprocessing_batch_size: int = 256,
     preprocessing_batch_size: int = 256,
     max_sequence_length: int,
     max_sequence_length: int,