3 vuotta sitten · 90cda76573
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@@ -35,7 +35,8 @@ jobs:
 
															         run: |
														
 
															           export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
														
 
															           python -m cli.convert_model --model bigscience/bloom-560m  --output_path ./converted_model \
														
 
															-            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN
														
 
															+            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN \
														
 
															+            --resize_token_embeddings 10_000  # reduce embeddings size to save memory
														
 
															   run-tests:
														
@@ -76,7 +77,8 @@ jobs:
 
															           export REF_NAME=bigscience/bloom-560m
														
 
															           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
														
 
															-            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 &
														
 
															+            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \
														
 
															+            --throughput 1 &> server1.log
														
 
															           SERVER1_PID=$!
														
 
															           sleep 5  # wait for the first server to initialize DHT
														
@@ -88,9 +90,20 @@ jobs:
 
															             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
														
 
															           SERVER2_PID=$!
														
 
															+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \
														
 
															+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log &
														
 
															+#          SERVER3_PID=$!
														
 
															+#
														
 
															+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 4:16 \
														
 
															+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server4.log &
														
 
															+#          SERVER4_PID=$!
														
 
															+#
														
 
															+          tail -f server*.log &
														
 
															+          LOGS_PID=$!
														
 
															+
														
 
															           sleep 60  # wait for server to download layers
														
 
															           PYTHONPATH=. pytest tests --durations=0 --durations-min=1.0 -v
														
 
															-          kill -s SIGINT $SERVER1_PID $SERVER2_PID
														
 
															+          kill -s SIGINT $SERVER1_PID $SERVER2_PID # $SERVER3_PID $SERVER4_PID $LOGS_PID
														
 
															           echo "Done!"
														
--- a/cli/convert_model.py
+++ b/cli/convert_model.py
@@ -35,6 +35,8 @@ if __name__ == "__main__":
 
															         "--commit_message", type=str, default="push-o-matic", help="Use this commit message for all parts"
														
 
															     )
														
 
															     parser.add_argument("--use_auth_token", type=str, default=None, help="auth token for from_pretrained")
														
 
															+    parser.add_argument("--resize_token_embeddings", type=int, default=None,
														
 
															+                        help="change the vocabulary size of the converted model to this value")
														
 
															     args = parser.parse_args()
														
 
															     free_ram_gb = psutil.virtual_memory().available / 2**30
														
@@ -56,6 +58,10 @@ if __name__ == "__main__":
 
															     model = BloomModel.from_pretrained(
														
 
															         args.model, use_auth_token=args.use_auth_token, revision=args.revision, torch_dtype=DTYPE_MAP[args.torch_dtype]
														
 
															     )
														
 
															+    if args.resize_token_embeddings:
														
 
															+        logger.info(f"Resizing token embeddings, new size = {args.resize_token_embeddings}")
														
 
															+        model.resize_token_embeddings(args.resize_token_embeddings)
														
 
															+
														
 
															     tokenizer = transformers.AutoTokenizer.from_pretrained(
														
 
															         args.model, use_auth_token=args.use_auth_token, revision=args.revision
														
 
															     )