demo_deploy_server.sh 1.5 KB

12345678910111213141516171819202122232425262728293031
  1. #!/usr/bin/env bash
  2. source ~/miniconda3/etc/profile.d/conda.sh
  3. # If you use anaconda, uncomment:
  4. # source ~/anaconda3/etc/profile.d/conda.sh
  5. if conda env list | grep ".*bloom-demo-benchmark.*" >/dev/null 2>/dev/null; then
  6. conda activate bloom-demo-benchmark
  7. else
  8. conda create -y --name bloom-demo-benchmark python=3.8.12 pip
  9. conda activate bloom-demo-benchmark
  10. conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
  11. pip install -i https://pypi.org/simple torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
  12. pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda113
  13. pip install -i https://pypi.org/simple -r demo-requirements.txt
  14. fi
  15. # Please set up
  16. INITIAL_PEER="/ip4/193.106.95.184/tcp/44653/p2p/QmPz8gEuppboaKuF3kV245iqaGAWA9juhBqG9mV3e7KZvg"
  17. MODEL_NAME="bigscience/test-bloomd"
  18. HOST_MADDR="/ip4/0.0.0.0/tcp/30000"
  19. SERVER_ID_PATH="./server.id"
  20. GPU_ID="0" # GPU must have Tensor Cores: RTX, Titan, A100, V100, ...
  21. NUM_BLOCKS="3" # one converted block consumes ~3.5Gb
  22. export OMP_NUM_THREADS="16" # just in case
  23. CUDA_VISIBLE_DEVICES=${GPU_ID} python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --torch_dtype float16 --initial_peer ${INITIAL_PEER} \
  24. --compression BLOCKWISE_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} \
  25. --num_blocks ${NUM_BLOCKS} --load_in_8bit --throughput 1.0