dbaranchuk 3 роки тому
батько
коміт
cf86dcc457
4 змінених файлів з 7 додано та 6 видалено
  1. 3 2
      cli/deploy_server.sh
  2. 2 2
      cli/run_local_servers.sh
  3. 1 1
      cli/run_server.py
  4. 1 1
      requirements.txt

+ 3 - 2
cli/deploy_server.sh

@@ -81,5 +81,6 @@ fi
 #  ('UNIFORM_8BIT', 4),
 #  ('UNIFORM_8BIT', 4),
 #  ('BLOCKWISE_8BIT', 5)]
 #  ('BLOCKWISE_8BIT', 5)]
 
 
-python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --device ${DEVICE} --initial_peer ${INITIAL_PEER} \
-  --block_indices ${BLOCK_IDS} --compression UNIFORM_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} --load_in_8bit # &> ${SERVER_ID_PATH}.log
+export OMP_NUM_THREADS=16
+CUDA_VISIBLE_DEVICES=${DEVICE} python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --torch_dtype float16 --initial_peer ${INITIAL_PEER} --cache_dir '/extra_disk_1/dbaranchuk/test-bloomd' \
+  --block_indices ${BLOCK_IDS} --compression BLOCKWISE_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} --load_in_8bit #&> logs/${SERVER_ID_PATH}.log

+ 2 - 2
cli/run_local_servers.sh

@@ -49,7 +49,7 @@ fi
 #######################
 #######################
 
 
 hivemind-dht &> tmp.out &
 hivemind-dht &> tmp.out &
-sleep 5
+sleep 20
 INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-1])" )
 INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-1])" )
 echo "Initial peer: ${INITIAL_PEER}"
 echo "Initial peer: ${INITIAL_PEER}"
 
 
@@ -103,6 +103,6 @@ done
 # Kill initial peer #
 # Kill initial peer #
 #####################
 #####################
 
 
-sleep 10
+sleep 180
 pkill -f hivemind-dht # TODO: kill only particular pids of hivemind-dht
 pkill -f hivemind-dht # TODO: kill only particular pids of hivemind-dht
 rm tmp.out
 rm tmp.out

+ 1 - 1
cli/run_server.py

@@ -27,7 +27,7 @@ def main():
 
 
     parser.add_argument('--compression', type=str, default='NONE', required=False, help='Tensor compression communication')
     parser.add_argument('--compression', type=str, default='NONE', required=False, help='Tensor compression communication')
 
 
-    parser.add_argument('--num_handlers', type=int, default=16, required=False,
+    parser.add_argument('--num_handlers', type=int, default=8, required=False,
                         help='server will use this many processes to handle incoming requests')
                         help='server will use this many processes to handle incoming requests')
     parser.add_argument('--min_batch_size', type=int, default=1,
     parser.add_argument('--min_batch_size', type=int, default=1,
                         help='Minimum required batch size for all expert operations')
                         help='Minimum required batch size for all expert operations')

+ 1 - 1
requirements.txt

@@ -2,5 +2,5 @@ torch==1.12.0
 accelerate==0.10.0
 accelerate==0.10.0
 huggingface-hub==0.7.0
 huggingface-hub==0.7.0
 bitsandbytes-cuda113==0.26.0
 bitsandbytes-cuda113==0.26.0
-https://github.com/learning-at-home/hivemind/archive/28261470e44f2ae4157d08b563b4d2771f3a9549.zip
+https://github.com/learning-at-home/hivemind/archive/20b3b3d5f225ed525515a5383a008a8f9fad8173.zip # bnb_integration + dtype fix
 https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip
 https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip