3 роки тому · 90cda76573
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@@ -35,7 +35,8 @@ jobs:
 
				         run: |
			
 
				           export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
			
 
				           python -m cli.convert_model --model bigscience/bloom-560m  --output_path ./converted_model \
			
 
				-            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN
			
 
				+            --output_repo bloom-testing/test-bloomd-560m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN \
			
 
				+            --resize_token_embeddings 10_000  # reduce embeddings size to save memory
			
 
				 
			
 
				 
			
 
				   run-tests:
			
@@ -76,7 +77,8 @@ jobs:
 
				           export REF_NAME=bigscience/bloom-560m
			
 
				 
			
 
				           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
			
 
				-            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 &
			
 
				+            --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \
			
 
				+            --throughput 1 &> server1.log
			
 
				           SERVER1_PID=$!
			
 
				           
			
 
				           sleep 5  # wait for the first server to initialize DHT
			
@@ -88,9 +90,20 @@ jobs:
 
				             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
			
 
				           SERVER2_PID=$!
			
 
				 
			
 
				+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \
			
 
				+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log &
			
 
				+#          SERVER3_PID=$!
			
 
				+#
			
 
				+#          python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 4:16 \
			
 
				+#            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server4.log &
			
 
				+#          SERVER4_PID=$!
			
 
				+#
			
 
				+          tail -f server*.log &
			
 
				+          LOGS_PID=$!
			
 
				+
			
 
				           sleep 60  # wait for server to download layers
			
 
				           
			
 
				           PYTHONPATH=. pytest tests --durations=0 --durations-min=1.0 -v
			
 
				           
			
 
				-          kill -s SIGINT $SERVER1_PID $SERVER2_PID
			
 
				+          kill -s SIGINT $SERVER1_PID $SERVER2_PID # $SERVER3_PID $SERVER4_PID $LOGS_PID
			
 
				           echo "Done!"
			
--- a/cli/convert_model.py
+++ b/cli/convert_model.py
@@ -35,6 +35,8 @@ if __name__ == "__main__":
 
				         "--commit_message", type=str, default="push-o-matic", help="Use this commit message for all parts"
			
 
				     )
			
 
				     parser.add_argument("--use_auth_token", type=str, default=None, help="auth token for from_pretrained")
			
 
				+    parser.add_argument("--resize_token_embeddings", type=int, default=None,
			
 
				+                        help="change the vocabulary size of the converted model to this value")
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     free_ram_gb = psutil.virtual_memory().available / 2**30
			
@@ -56,6 +58,10 @@ if __name__ == "__main__":
 
				     model = BloomModel.from_pretrained(
			
 
				         args.model, use_auth_token=args.use_auth_token, revision=args.revision, torch_dtype=DTYPE_MAP[args.torch_dtype]
			
 
				     )
			
 
				+    if args.resize_token_embeddings:
			
 
				+        logger.info(f"Resizing token embeddings, new size = {args.resize_token_embeddings}")
			
 
				+        model.resize_token_embeddings(args.resize_token_embeddings)
			
 
				+
			
 
				     tokenizer = transformers.AutoTokenizer.from_pretrained(
			
 
				         args.model, use_auth_token=args.use_auth_token, revision=args.revision
			
 
				     )