浏览代码

add script for remote benchmarks

dbaranchuk 3 年之前
父节点
当前提交
afc1de2627
共有 2 个文件被更改,包括 29 次插入0 次删除
  1. 24 0
      cli/demo_deploy_server.sh
  2. 5 0
      demo-requirements.txt

+ 24 - 0
cli/demo_deploy_server.sh

@@ -0,0 +1,24 @@
+if conda env list | grep ".*bloom-demo-benchmark.*"  >/dev/null 2>/dev/null; then
+    conda activate bloom-demo-benchmark
+else
+    conda create -y --name bloom-demo-benchmark python=3.8.12 pip
+    conda activate bloom-demo-benchmark
+
+    conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
+    pip install -i https://pypi.org/simple torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
+
+    pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda113
+    pip install -i https://pypi.org/simple -r demo-requirements.txt
+
+# Please set up
+INITIAL_PEER="<DMITRY or YOZH WILL PROVIDE>"
+MODEL_NAME="bigscience/test-bloomd"
+HOST_MADDR="/ip4/0.0.0.0/tcp/30000"
+SERVER_ID_PATH="./server.id"
+GPU_ID="0"
+NUM_BLOCKS="3" # one converted block consumes ~3Gb 
+
+export OMP_NUM_THREADS="16" # just in case
+CUDA_VISIBLE_DEVICES=${GPU_ID} python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --torch_dtype float16 --initial_peer ${INITIAL_PEER} \
+                                                        --compression BLOCKWISE_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} \
+                                                        --num_blocks ${NUM_BLOCKS} --load_in_8bit

+ 5 - 0
demo-requirements.txt

@@ -0,0 +1,5 @@
+torch==1.12.0
+accelerate==0.10.0
+huggingface-hub==0.7.0
+https://github.com/learning-at-home/hivemind/archive/20b3b3d5f225ed525515a5383a008a8f9fad8173.zip
+https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip