瀏覽代碼

test with 5 servers

justheuristic 3 年之前
父節點
當前提交
c4bbf8f361
共有 3 個文件被更改,包括 5 次插入6 次删除
  1. 4 4
      .github/workflows/run-tests.yaml
  2. 1 1
      src/client/sequence_manager.py
  3. 0 1
      src/client/sequential_autograd.py

+ 4 - 4
.github/workflows/run-tests.yaml

@@ -83,7 +83,7 @@ jobs:
 
           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
             --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \
-            --throughput 1 &> server2.log &
+            --throughput 1 &> server1.log &
           SERVER1_PID=$!
           
           sleep 5  # wait for the first server to initialize DHT
@@ -95,6 +95,8 @@ jobs:
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
           SERVER2_PID=$!
 
+          sleep 10 # wait for initial servers to declare blocks, then let server decide which blocks to serve
+
           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log &
           SERVER3_PID=$!
@@ -103,10 +105,8 @@ jobs:
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server4.log &
           SERVER4_PID=$!
           
-          sleep 10 # wait for initial servers to declare blocks, then let server decide which blocks to serve
-
           python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --num_blocks 3 \
-            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server4.log &
+            --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server5.log &
           SERVER5_PID=$!
           
           #TODO tail server logs

+ 1 - 1
src/client/sequence_manager.py

@@ -54,7 +54,7 @@ class RemoteSequenceManager:
             chosen_span = random.choice(candidate_spans)  # TODO this should be replaced with proper load balancing
 
             assert chosen_span.start <= current_index < chosen_span.end
-            span_sequence.append(chosen_span)
+            span_sequence.append(RemoteSpanInfo(start=current_index, end=chosen_span.end, peer_id=chosen_span.peer_id))
             current_index = chosen_span.end
 
         return span_sequence

+ 0 - 1
src/client/sequential_autograd.py

@@ -191,7 +191,6 @@ async def sequential_backward(
 
     # For now, we do not support mixed dummy and grad prompts
     # Concat in num_layer dimension
-    assert not grad_prompts_reversed or len(grad_prompts_reversed) == len(prompts)
     grad_prompts = torch.cat(grad_prompts_reversed[::-1], dim=0) if grad_prompts_reversed else None
     return grad_outputs, grad_prompts