2 年之前 · b28f5016ea
--- a/src/petals/cli/deploy_server.sh
+++ b/src/petals/cli/deploy_server.sh
@@ -1,79 +0,0 @@
 
				-#!/usr/bin/env bash
			
 
				-
			
 
				-#################
			
 
				-# Parse options #
			
 
				-#################
			
 
				-
			
 
				-instructions() {
			
 
				-  echo "Usage: $0 [-m] [-i] [ -d ] [ -p ] [ -b ] [-a] [-t]" >&2
			
 
				-  echo " -m: model name"
			
 
				-  echo " -i: initial peer"
			
 
				-  echo " -d: device" >&2
			
 
				-  echo " -p: server identity path" >&2
			
 
				-  echo " -b: block_ids" >&2
			
 
				-  echo " -a: host maddrs" >&2
			
 
				-  echo " -t: whether to run local tests" >&2
			
 
				-  exit 1
			
 
				-}
			
 
				-
			
 
				-if [ ! $# -ge 8 ]; then
			
 
				-    instructions
			
 
				-fi
			
 
				-
			
 
				-while getopts ":m:i:d:p:b:a:t:" option; do
			
 
				-    case $option in
			
 
				-        m)  MODEL_NAME=${OPTARG}
			
 
				-            ;;
			
 
				-        i)  INITIAL_PEER=${OPTARG}
			
 
				-            ;;
			
 
				-        d)  DEVICE=${OPTARG}
			
 
				-            ;;
			
 
				-        p)  SERVER_ID_PATH=${OPTARG}
			
 
				-            ;;
			
 
				-        b)  BLOCK_IDS=${OPTARG}
			
 
				-            ;;
			
 
				-        a)  HOST_MADDR=${OPTARG} # TODO: allow several maddrs
			
 
				-            ;;
			
 
				-        t)  RUN_LOCAL_TESTS=true
			
 
				-            ;;
			
 
				-        \?) instructions
			
 
				-            ;;
			
 
				-   esac
			
 
				-done
			
 
				-
			
 
				-
			
 
				-echo "=========="
			
 
				-echo "= Config ="
			
 
				-echo "=========="
			
 
				-echo "Model name: ${MODEL_NAME}"
			
 
				-echo "Initial peer: ${INITIAL_PEER}"
			
 
				-echo "Device: ${DEVICE}"
			
 
				-echo "Server name: ${SERVER_ID_PATH}"
			
 
				-echo "Server address: ${HOST_MADDR}"
			
 
				-echo "Bloom blocks: ${BLOCK_IDS}"
			
 
				-
			
 
				-
			
 
				-###########################
			
 
				-# Install or activate env #
			
 
				-###########################
			
 
				-
			
 
				-# TODO fix bug with self calling
			
 
				-source ~/miniconda3/etc/profile.d/conda.sh
			
 
				-if conda env list | grep ".*bloom-demo.*"  >/dev/null 2>/dev/null; then
			
 
				-    conda activate bloom-demo
			
 
				-else
			
 
				-    conda create -y --name bloom-demo python=3.8.12 pip
			
 
				-    conda activate bloom-demo
			
 
				-
			
 
				-    conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
			
 
				-    pip install -i https://pypi.org/simple torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
			
 
				-    pip install -i https://pypi.org/simple -r .
			
 
				-    pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda113
			
 
				-fi
			
 
				-
			
 
				-##############
			
 
				-# Run server #
			
 
				-##############
			
 
				-
			
 
				-python -m petals.cli.run_server --converted_model_name_or_path ${MODEL_NAME} --device ${DEVICE} --initial_peer ${INITIAL_PEER} \
			
 
				-  --block_indices ${BLOCK_IDS} --compression UNIFORM_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} --load_in_8bit &> ${SERVER_ID_PATH}.log
			
--- a/src/petals/cli/inference_one_block.py
+++ b/src/petals/cli/inference_one_block.py
@@ -1,51 +0,0 @@
 
				-import argparse
			
 
				-
			
 
				-import torch
			
 
				-from hivemind.utils.logging import get_logger
			
 
				-from tqdm.auto import trange
			
 
				-from transformers import BloomConfig
			
 
				-from transformers.models.bloom.modeling_bloom import build_alibi_tensor
			
 
				-
			
 
				-from petals.models.bloom.block import BloomBlock
			
 
				-
			
 
				-logger = get_logger(__name__)
			
 
				-
			
 
				-logger.warning("inference_one_block will soon be deprecated in favour of tests!")
			
 
				-
			
 
				-
			
 
				-def print_device_info(device=None):
			
 
				-    """Prints device stats. Code from https://stackoverflow.com/a/53374933/12891528"""
			
 
				-    device = torch.device(device or ("cuda" if torch.cuda.is_available() else "cpu"))
			
 
				-    logger.info(f"Using device: {device}")
			
 
				-
			
 
				-    # Additional Info when using cuda
			
 
				-    if device.type == "cuda":
			
 
				-        logger.info(torch.cuda.get_device_name(0))
			
 
				-        logger.info(f"Memory Usage:")
			
 
				-        logger.info(f"Allocated: {round(torch.cuda.memory_allocated(0) / 1024 ** 3, 1)} GB")
			
 
				-        logger.info(f"Cached:   {round(torch.cuda.memory_cached(0) / 1024 ** 3, 1)} GB")
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    parser = argparse.ArgumentParser(description="Run a single bloom block locally on dummy data")
			
 
				-    parser.add_argument("--config", required=True, type=str, help="Path to a config json file")
			
 
				-    parser.add_argument("--state_dict", default=None, type=str, help="Optional path to saved block state dict")
			
 
				-    parser.add_argument("--num_steps", default=500, type=int, help="How many inference steps to run")
			
 
				-    parser.add_argument("--device", default=None, type=str, help="Run inference on this device")
			
 
				-    args = parser.parse_args()
			
 
				-
			
 
				-    if args.device is None:
			
 
				-        args.device = "cuda" if torch.cuda.is_available() else "cpu"
			
 
				-
			
 
				-    config = BloomConfig.from_json_file(args.config)
			
 
				-    block = BloomBlock(config).to(args.device)
			
 
				-
			
 
				-    cache = None
			
 
				-
			
 
				-    for i in trange(args.num_steps):
			
 
				-        dummy_input = torch.randn(1, 1, config.hidden_size, device=args.device)
			
 
				-        alibi = build_alibi_tensor(i + 1, config.num_attention_heads).to(args.device)
			
 
				-        with torch.no_grad():
			
 
				-            outputs, cache = block.forward(dummy_input, alibi=alibi, use_cache=True, layer_past=cache)
			
 
				-
			
 
				-    print_device_info(args.device)
			
--- a/src/petals/cli/local_server_config_example.cfg
+++ b/src/petals/cli/local_server_config_example.cfg
@@ -1,5 +0,0 @@
 
				-device=cpu
			
 
				-block_ids=2:3
			
 
				-id_path=./server.id
			
 
				-maddr=/ip4/127.0.0.1/tcp/30000
			
 
				-#
			
--- a/src/petals/cli/remote_server_config_example.cfg
+++ b/src/petals/cli/remote_server_config_example.cfg
@@ -1,6 +0,0 @@
 
				-name=bloom-peer-0.bloom.net
			
 
				-device=cpu
			
 
				-block_ids=1:3
			
 
				-id_path=./server.id
			
 
				-maddr=/ip4/0.0.0.0/tcp/30000
			
 
				-#
			
--- a/src/petals/cli/run_local_servers.sh
+++ b/src/petals/cli/run_local_servers.sh
@@ -1,109 +0,0 @@
 
				-# !/usr/bin/env bash
			
 
				-
			
 
				-#################
			
 
				-# Parse options #
			
 
				-#################
			
 
				-
			
 
				-instructions() {
			
 
				-  echo "Usage: $0 [-n] [-c]" >&2
			
 
				-  echo " -n: number of servers to run" >&2
			
 
				-  echo " -c: path to the server configs" >&2
			
 
				-  exit 1
			
 
				-}
			
 
				-
			
 
				-if [ $# != 4 ]; then
			
 
				-    instructions
			
 
				-fi
			
 
				-
			
 
				-while getopts ":n:c:t:" option; do
			
 
				-    case $option in
			
 
				-        n)  NUM_SERVERS=${OPTARG}
			
 
				-            ;;
			
 
				-        c)  CONFIG_PATH=${OPTARG}
			
 
				-            ;;
			
 
				-        \?) instructions
			
 
				-            ;;
			
 
				-   esac
			
 
				-done
			
 
				-
			
 
				-
			
 
				-###########################
			
 
				-# Install or activate env #
			
 
				-###########################
			
 
				-
			
 
				-source ~/miniconda3/etc/profile.d/conda.sh
			
 
				-if conda env list | grep ".*bloom-demo.*"  >/dev/null 2>/dev/null; then
			
 
				-    conda activate bloom-demo
			
 
				-else
			
 
				-    conda create -y --name bloom-demo python=3.8.12 pip
			
 
				-    conda activate bloom-demo
			
 
				-
			
 
				-    conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
			
 
				-    pip install -i https://pypi.org/simple torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
			
 
				-    pip install -i https://pypi.org/simple -r .
			
 
				-    pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda113
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-#######################
			
 
				-# Create Initial peer #
			
 
				-#######################
			
 
				-
			
 
				-hivemind-dht &> tmp.out &
			
 
				-sleep 5
			
 
				-INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-1])" )
			
 
				-echo "Initial peer: ${INITIAL_PEER}"
			
 
				-
			
 
				-
			
 
				-##############################
			
 
				-# Initialize the config file #
			
 
				-##############################
			
 
				-
			
 
				-typeset -A cfg
			
 
				-cfg=( # set default values in config array
			
 
				-    [device]="cpu"
			
 
				-    [block_ids]="1:2"
			
 
				-    [id_path]="server.id"
			
 
				-    [maddr]="/ip4/127.0.0.1/tcp/30000"
			
 
				-)
			
 
				-
			
 
				-###############
			
 
				-# Run servers #
			
 
				-###############
			
 
				-
			
 
				-for SERVER_ID in $(seq 0 $(( $NUM_SERVERS - 1 )) )
			
 
				-do
			
 
				-    ###############
			
 
				-    # Read config #
			
 
				-    ###############
			
 
				-
			
 
				-    while read line
			
 
				-    do
			
 
				-        if echo $line | grep -F = &>/dev/null
			
 
				-        then
			
 
				-            varname=$(echo "$line" | cut -d '=' -f 1)
			
 
				-            cfg[$varname]=$(echo "$line" | cut -d '=' -f 2-)
			
 
				-        fi
			
 
				-    done < ${CONFIG_PATH}/server_${SERVER_ID}.cfg
			
 
				-
			
 
				-    echo "=== Server #${SERVER_ID} ==="
			
 
				-    echo "Server ID: ${cfg[id_path]}"
			
 
				-    echo "Device: ${cfg[device]}"
			
 
				-    echo "Bloom block ids: ${cfg[block_ids]}"
			
 
				-    echo "Host maddr: ${cfg[maddr]}"
			
 
				-    echo ""
			
 
				-
			
 
				-    ##############
			
 
				-    # Run server #
			
 
				-    ##############
			
 
				-
			
 
				-    tmux new-session -d -s "Server_${SERVER_ID}" bash cli/deploy_server.sh -m "bigscience/test-bloomd" -i ${INITIAL_PEER} -d ${cfg[device]} -p ${cfg[id_path]} -b ${cfg[block_ids]} -a ${cfg[maddr]}
			
 
				-done
			
 
				-
			
 
				-#####################
			
 
				-# Kill initial peer #
			
 
				-#####################
			
 
				-
			
 
				-sleep 10
			
 
				-pkill -f hivemind-dht # TODO: kill only particular pids of hivemind-dht
			
 
				-rm tmp.out
			
--- a/src/petals/cli/run_remote_servers.sh
+++ b/src/petals/cli/run_remote_servers.sh
@@ -1,110 +0,0 @@
 
				-# !/usr/bin/env bash
			
 
				-
			
 
				-SSH_KEY_PATH="~/.ssh/<YOUR_KEY>"
			
 
				-
			
 
				-#################
			
 
				-# Parse options #
			
 
				-#################
			
 
				-
			
 
				-instructions() {
			
 
				-  echo "Usage: $0 [-u] [-n] [-c]" >&2
			
 
				-  echo " -u: username" >&2
			
 
				-  echo " -n: number of servers to run" >&2
			
 
				-  echo " -c: path to the server configs" >&2
			
 
				-  exit 1
			
 
				-}
			
 
				-
			
 
				-if [ $# != 6 ]; then
			
 
				-    instructions
			
 
				-fi
			
 
				-
			
 
				-while getopts ":u:n:c:" option; do
			
 
				-    case $option in
			
 
				-        u)  USERNAME=${OPTARG}
			
 
				-            ;;
			
 
				-        n)  NUM_SERVERS=${OPTARG}
			
 
				-            ;;
			
 
				-        c)  CONFIG_PATH=${OPTARG}
			
 
				-            ;;
			
 
				-        \?) instructions
			
 
				-            ;;
			
 
				-   esac
			
 
				-done
			
 
				-
			
 
				-
			
 
				-###########################
			
 
				-# Install or activate env #
			
 
				-###########################
			
 
				-
			
 
				-source ~/miniconda3/etc/profile.d/conda.sh
			
 
				-if conda env list | grep ".*bloom-demo.*"  >/dev/null 2>/dev/null; then
			
 
				-    conda activate bloom-demo
			
 
				-else
			
 
				-    conda create -y --name bloom-demo python=3.8.12 pip
			
 
				-    conda activate bloom-demo
			
 
				-
			
 
				-    conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
			
 
				-    pip install -i https://pypi.org/simple torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
			
 
				-    pip install -i https://pypi.org/simple -r .
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-#######################
			
 
				-# Create Initial peer #
			
 
				-#######################
			
 
				-
			
 
				-hivemind-dht &> tmp.out &
			
 
				-
			
 
				-sleep 5
			
 
				-INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-2])" )
			
 
				-rm tmp.out
			
 
				-echo "Initial peer: ${INITIAL_PEER}"
			
 
				-
			
 
				-
			
 
				-##############################
			
 
				-# Initialize the config file #
			
 
				-##############################
			
 
				-
			
 
				-typeset -A cfg
			
 
				-cfg=( # set default values in config array
			
 
				-    [name]=""
			
 
				-    [device]="cpu"
			
 
				-    [block_ids]="1:2"
			
 
				-    [id_path]="server.id"
			
 
				-    [maddr]="/ip4/0.0.0.0/tcp/30000"
			
 
				-)
			
 
				-
			
 
				-###############
			
 
				-# Run servers #
			
 
				-###############
			
 
				-
			
 
				-for SERVER_ID in $(seq 0 $(( $NUM_SERVERS - 1 )) )
			
 
				-do
			
 
				-    ###############
			
 
				-    # Read config #
			
 
				-    ###############
			
 
				-
			
 
				-    while read line
			
 
				-    do
			
 
				-        if echo $line | grep -F = &>/dev/null
			
 
				-        then
			
 
				-            varname=$(echo "$line" | cut -d '=' -f 1)
			
 
				-            cfg[$varname]=$(echo "$line" | cut -d '=' -f 2-)
			
 
				-        fi
			
 
				-    done < ${CONFIG_PATH}/server_${SERVER_ID}.cfg
			
 
				-
			
 
				-    SERVER_NAME="${USERNAME}@${cfg[name]}"
			
 
				-    echo "=== Server #${SERVER_ID} ==="
			
 
				-    echo "Server name ${SERVER_NAME}"
			
 
				-    echo "Server ID: ${cfg[id_path]}"
			
 
				-    echo "Device: ${cfg[device]}"
			
 
				-    echo "Bloom block ids: ${cfg[block_ids]}"
			
 
				-    echo "Host maddr: ${cfg[maddr]}"
			
 
				-    echo "================="
			
 
				-
			
 
				-    ##############
			
 
				-    # Run server #
			
 
				-    ##############
			
 
				-
			
 
				-    ssh -i ${SSH_KEY_PATH} ${SERVER_NAME} "tmux new-session -d -s 'Server_${SERVER_ID}' 'cd bloom-demo && bash cli/deploy_server.sh -i ${INITIAL_PEER} -d ${cfg[device]} -p ${cfg[id_path]} -b ${cfg[block_ids]} -a ${cfg[maddr]}'"
			
 
				-done