run_remote_servers.sh 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # !/usr/bin/env bash
  2. SSH_KEY_PATH="~/.ssh/<YOUR_KEY>"
  3. #################
  4. # Parse options #
  5. #################
  6. instructions() {
  7. echo "Usage: $0 [-u] [-n] [-c]" >&2
  8. echo " -u: username" >&2
  9. echo " -n: number of servers to run" >&2
  10. echo " -c: path to the server configs" >&2
  11. exit 1
  12. }
  13. if [ $# != 6 ]; then
  14. instructions
  15. fi
  16. while getopts ":u:n:c:" option; do
  17. case $option in
  18. u) USERNAME=${OPTARG}
  19. ;;
  20. n) NUM_SERVERS=${OPTARG}
  21. ;;
  22. c) CONFIG_PATH=${OPTARG}
  23. ;;
  24. \?) instructions
  25. ;;
  26. esac
  27. done
  28. ###########################
  29. # Install or activate env #
  30. ###########################
  31. source ~/miniconda3/etc/profile.d/conda.sh
  32. if conda env list | grep ".*bloom-demo.*" &>/dev/null; then
  33. conda activate bloom-demo
  34. else
  35. conda create -y --name bloom-demo python=3.8.12 pip
  36. conda activate bloom-demo
  37. conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
  38. pip install -i https://pypi.org/simple torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
  39. pip install -i https://pypi.org/simple accelerate==0.10.0 huggingface-hub==0.7.0 hivemind==1.1.0
  40. pip install -i https://pypi.org/simple bitsandbytes-cuda113==0.26.0
  41. pip install -i https://pypi.org/simple https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip
  42. fi
  43. #######################
  44. # Create Initial peer #
  45. #######################
  46. hivemind-dht &> tmp.out &
  47. sleep 3
  48. INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-2])" )
  49. rm tmp.out
  50. echo "Initial peer: ${INITIAL_PEER}"
  51. ##############################
  52. # Initialize the config file #
  53. ##############################
  54. typeset -A cfg
  55. cfg=( # set default values in config array
  56. [name]=""
  57. [device]="cpu"
  58. [block_ids]="1:2"
  59. [id_path]="server.id"
  60. [maddr]="/ip4/0.0.0.0/tcp/30000"
  61. )
  62. ###############
  63. # Run servers #
  64. ###############
  65. for SERVER_ID in $(seq 0 $(( $NUM_SERVERS - 1 )) )
  66. do
  67. ###############
  68. # Read config #
  69. ###############
  70. while read line
  71. do
  72. if echo $line | grep -F = &>/dev/null
  73. then
  74. varname=$(echo "$line" | cut -d '=' -f 1)
  75. cfg[$varname]=$(echo "$line" | cut -d '=' -f 2-)
  76. fi
  77. done < ${CONFIG_PATH}/server_${SERVER_ID}.cfg
  78. SERVER_NAME="${USERNAME}@${cfg[name]}"
  79. echo "=== Server #${SERVER_ID} ==="
  80. echo "Server name ${SERVER_NAME}"
  81. echo "Server ID: ${cfg[id_path]}"
  82. echo "Device: ${cfg[device]}"
  83. echo "Bloom block ids: ${cfg[block_ids]}"
  84. echo "Host maddr: ${cfg[maddr]}"
  85. echo "================="
  86. ##############
  87. # Run server #
  88. ##############
  89. ssh -i ${SSH_KEY_PATH} ${SERVER_NAME} "tmux new-session -d -s 'Server_${SERVER_ID}' 'cd bloom-demo && bash cli/deploy_server.sh -i ${INITIAL_PEER} -d ${cfg[device]} -p ${cfg[id_path]} -b ${cfg[block_ids]} -a ${cfg[maddr]}'"
  90. done