%!s(int64=4) %!d(string=hai) anos · 9b466fcee7
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -233,4 +233,4 @@ if __name__ == "__main__":
 
				     elif args.preset == "nop":
			
 
				         benchmark_throughput(expert_cls="nop", backprop=False, num_batches_per_client=args.num_batches_per_client)
			
 
				     else:
			
 
				-        raise ValueError(f"No such benchmark preset: {args.preset}")
			
 
				+        raise ValueError(f"No such benchmark preset: {args.preset}")
			
--- a/benchmarks/benchmark_throughput_p2p.py
+++ b/benchmarks/benchmark_throughput_p2p.py
@@ -31,14 +31,14 @@ def print_device_info(device=None):
 
				 
			
 
				 
			
 
				 def client_process(
			
 
				-        can_start,
			
 
				-        benchmarking_failed,
			
 
				-        server_peer_info,
			
 
				-        num_experts,
			
 
				-        batch_size,
			
 
				-        hid_dim,
			
 
				-        num_batches,
			
 
				-        backprop=True,
			
 
				+    can_start,
			
 
				+    benchmarking_failed,
			
 
				+    server_peer_info,
			
 
				+    num_experts,
			
 
				+    batch_size,
			
 
				+    hid_dim,
			
 
				+    num_batches,
			
 
				+    backprop=True,
			
 
				 ) -> None:
			
 
				     torch.set_num_threads(1)
			
 
				     can_start.wait()
			
@@ -61,21 +61,21 @@ def client_process(
 
				 
			
 
				 
			
 
				 def benchmark_throughput(
			
 
				-        num_experts=16,
			
 
				-        num_handlers=None,
			
 
				-        num_clients=128,
			
 
				-        num_batches_per_client=16,
			
 
				-        expert_cls="ffn",
			
 
				-        hid_dim=1024,
			
 
				-        batch_size=2048,
			
 
				-        max_batch_size=None,
			
 
				-        backprop=True,
			
 
				-        device=None,
			
 
				+    num_experts=16,
			
 
				+    num_handlers=None,
			
 
				+    num_clients=128,
			
 
				+    num_batches_per_client=16,
			
 
				+    expert_cls="ffn",
			
 
				+    hid_dim=1024,
			
 
				+    batch_size=2048,
			
 
				+    max_batch_size=None,
			
 
				+    backprop=True,
			
 
				+    device=None,
			
 
				 ):
			
 
				     assert (
			
 
				-            not hasattr(torch.cuda, "is_initialized")
			
 
				-            or not torch.cuda.is_initialized()
			
 
				-            or torch.device(device) == torch.device("cpu")
			
 
				+        not hasattr(torch.cuda, "is_initialized")
			
 
				+        or not torch.cuda.is_initialized()
			
 
				+        or torch.device(device) == torch.device("cpu")
			
 
				     )
			
 
				     assert expert_cls in layers.name_to_block
			
 
				     max_batch_size = max_batch_size or batch_size * 4
			
@@ -244,7 +244,10 @@ if __name__ == "__main__":
 
				         )
			
 
				     elif args.preset == "minimalistic":
			
 
				         benchmark_throughput(
			
 
				-            num_experts=1, num_clients=1, num_handlers=1, num_batches_per_client=args.num_batches_per_client,
			
 
				+            num_experts=1,
			
 
				+            num_clients=1,
			
 
				+            num_handlers=1,
			
 
				+            num_batches_per_client=args.num_batches_per_client,
			
 
				             batch_size=512,
			
 
				         )
			
 
				     elif args.preset == "nop":
			
--- a/hivemind/moe/client/expert.py
+++ b/hivemind/moe/client/expert.py
@@ -9,11 +9,10 @@ import torch.nn as nn
 
				 from torch.autograd.function import once_differentiable
			
 
				 
			
 
				 import hivemind
			
 
				+from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
			
 
				 from hivemind.p2p import P2P, PeerInfo, StubBase
			
 
				 from hivemind.proto import runtime_pb2
			
 
				-from hivemind.utils import switch_to_uvloop
			
 
				-from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
			
 
				-from hivemind.utils import nested_compare, nested_flatten, nested_pack
			
 
				+from hivemind.utils import nested_compare, nested_flatten, nested_pack, switch_to_uvloop
			
 
				 
			
 
				 DUMMY = torch.empty(0, requires_grad=True)  # dummy tensor that triggers autograd in RemoteExpert
			
 
				 
			
--- a/hivemind/moe/server/connection_handler.py
+++ b/hivemind/moe/server/connection_handler.py
@@ -5,8 +5,8 @@ from typing import AsyncIterator, Dict
 
				 
			
 
				 import torch
			
 
				 
			
 
				-from hivemind.dht import DHT
			
 
				 from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
			
 
				+from hivemind.dht import DHT
			
 
				 from hivemind.moe.server.expert_backend import ExpertBackend
			
 
				 from hivemind.p2p import P2PContext, ServicerBase
			
 
				 from hivemind.proto import runtime_pb2