Denis Mazur %!s(int64=4) %!d(string=hai) anos
pai
achega
9b466fcee7

+ 1 - 1
benchmarks/benchmark_throughput.py

@@ -233,4 +233,4 @@ if __name__ == "__main__":
     elif args.preset == "nop":
         benchmark_throughput(expert_cls="nop", backprop=False, num_batches_per_client=args.num_batches_per_client)
     else:
-        raise ValueError(f"No such benchmark preset: {args.preset}")
+        raise ValueError(f"No such benchmark preset: {args.preset}")

+ 25 - 22
benchmarks/benchmark_throughput_p2p.py

@@ -31,14 +31,14 @@ def print_device_info(device=None):
 
 
 def client_process(
-        can_start,
-        benchmarking_failed,
-        server_peer_info,
-        num_experts,
-        batch_size,
-        hid_dim,
-        num_batches,
-        backprop=True,
+    can_start,
+    benchmarking_failed,
+    server_peer_info,
+    num_experts,
+    batch_size,
+    hid_dim,
+    num_batches,
+    backprop=True,
 ) -> None:
     torch.set_num_threads(1)
     can_start.wait()
@@ -61,21 +61,21 @@ def client_process(
 
 
 def benchmark_throughput(
-        num_experts=16,
-        num_handlers=None,
-        num_clients=128,
-        num_batches_per_client=16,
-        expert_cls="ffn",
-        hid_dim=1024,
-        batch_size=2048,
-        max_batch_size=None,
-        backprop=True,
-        device=None,
+    num_experts=16,
+    num_handlers=None,
+    num_clients=128,
+    num_batches_per_client=16,
+    expert_cls="ffn",
+    hid_dim=1024,
+    batch_size=2048,
+    max_batch_size=None,
+    backprop=True,
+    device=None,
 ):
     assert (
-            not hasattr(torch.cuda, "is_initialized")
-            or not torch.cuda.is_initialized()
-            or torch.device(device) == torch.device("cpu")
+        not hasattr(torch.cuda, "is_initialized")
+        or not torch.cuda.is_initialized()
+        or torch.device(device) == torch.device("cpu")
     )
     assert expert_cls in layers.name_to_block
     max_batch_size = max_batch_size or batch_size * 4
@@ -244,7 +244,10 @@ if __name__ == "__main__":
         )
     elif args.preset == "minimalistic":
         benchmark_throughput(
-            num_experts=1, num_clients=1, num_handlers=1, num_batches_per_client=args.num_batches_per_client,
+            num_experts=1,
+            num_clients=1,
+            num_handlers=1,
+            num_batches_per_client=args.num_batches_per_client,
             batch_size=512,
         )
     elif args.preset == "nop":

+ 2 - 3
hivemind/moe/client/expert.py

@@ -9,11 +9,10 @@ import torch.nn as nn
 from torch.autograd.function import once_differentiable
 
 import hivemind
+from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
 from hivemind.p2p import P2P, PeerInfo, StubBase
 from hivemind.proto import runtime_pb2
-from hivemind.utils import switch_to_uvloop
-from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
-from hivemind.utils import nested_compare, nested_flatten, nested_pack
+from hivemind.utils import nested_compare, nested_flatten, nested_pack, switch_to_uvloop
 
 DUMMY = torch.empty(0, requires_grad=True)  # dummy tensor that triggers autograd in RemoteExpert
 

+ 1 - 1
hivemind/moe/server/connection_handler.py

@@ -5,8 +5,8 @@ from typing import AsyncIterator, Dict
 
 import torch
 
-from hivemind.dht import DHT
 from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
+from hivemind.dht import DHT
 from hivemind.moe.server.expert_backend import ExpertBackend
 from hivemind.p2p import P2PContext, ServicerBase
 from hivemind.proto import runtime_pb2