Selaa lähdekoodia

test model with reduced vocabulary

justheuristic 3 vuotta sitten
vanhempi
commit
90cda76573
2 muutettua tiedostoa jossa 22 lisäystä ja 3 poistoa
  1. 16 3
      .github/workflows/run-tests.yaml
  2. 6 0
      cli/convert_model.py

+ 16 - 3
.github/workflows/run-tests.yaml

@@ -35,7 +35,8 @@ jobs:
         run: |
         run: |
           export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           python -m cli.convert_model --model bigscience/bloom-560m  --output_path ./converted_model \
           python -m cli.convert_model --model bigscience/bloom-560m  --output_path ./converted_model \
-            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN
+            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN \
+            --resize_token_embeddings 10_000  # reduce embeddings size to save memory
 
 
 
 
   run-tests:
   run-tests:
@@ -76,7 +77,8 @@ jobs:
           export REF_NAME=bigscience/bloom-560m
           export REF_NAME=bigscience/bloom-560m
 
 
           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
-            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 &
+            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \
+            --throughput 1 &> server1.log
           SERVER1_PID=$!
           SERVER1_PID=$!
           
           
           sleep 5  # wait for the first server to initialize DHT
           sleep 5  # wait for the first server to initialize DHT
@@ -88,9 +90,20 @@ jobs:
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
           SERVER2_PID=$!
           SERVER2_PID=$!
 
 
+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \
+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log &
+#          SERVER3_PID=$!
+#
+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 4:16 \
+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server4.log &
+#          SERVER4_PID=$!
+#
+          tail -f server*.log &
+          LOGS_PID=$!
+
           sleep 60  # wait for server to download layers
           sleep 60  # wait for server to download layers
           
           
           PYTHONPATH=. pytest tests --durations=0 --durations-min=1.0 -v
           PYTHONPATH=. pytest tests --durations=0 --durations-min=1.0 -v
           
           
-          kill -s SIGINT $SERVER1_PID $SERVER2_PID
+          kill -s SIGINT $SERVER1_PID $SERVER2_PID # $SERVER3_PID $SERVER4_PID $LOGS_PID
           echo "Done!"
           echo "Done!"

+ 6 - 0
cli/convert_model.py

@@ -35,6 +35,8 @@ if __name__ == "__main__":
         "--commit_message", type=str, default="push-o-matic", help="Use this commit message for all parts"
         "--commit_message", type=str, default="push-o-matic", help="Use this commit message for all parts"
     )
     )
     parser.add_argument("--use_auth_token", type=str, default=None, help="auth token for from_pretrained")
     parser.add_argument("--use_auth_token", type=str, default=None, help="auth token for from_pretrained")
+    parser.add_argument("--resize_token_embeddings", type=int, default=None,
+                        help="change the vocabulary size of the converted model to this value")
     args = parser.parse_args()
     args = parser.parse_args()
 
 
     free_ram_gb = psutil.virtual_memory().available / 2**30
     free_ram_gb = psutil.virtual_memory().available / 2**30
@@ -56,6 +58,10 @@ if __name__ == "__main__":
     model = BloomModel.from_pretrained(
     model = BloomModel.from_pretrained(
         args.model, use_auth_token=args.use_auth_token, revision=args.revision, torch_dtype=DTYPE_MAP[args.torch_dtype]
         args.model, use_auth_token=args.use_auth_token, revision=args.revision, torch_dtype=DTYPE_MAP[args.torch_dtype]
     )
     )
+    if args.resize_token_embeddings:
+        logger.info(f"Resizing token embeddings, new size = {args.resize_token_embeddings}")
+        model.resize_token_embeddings(args.resize_token_embeddings)
+
     tokenizer = transformers.AutoTokenizer.from_pretrained(
     tokenizer = transformers.AutoTokenizer.from_pretrained(
         args.model, use_auth_token=args.use_auth_token, revision=args.revision
         args.model, use_auth_token=args.use_auth_token, revision=args.revision
     )
     )