4 年之前 · 87012861ff
--- a/benchmarks/benchmark_throughput_p2p.py
+++ b/benchmarks/benchmark_throughput_p2p.py
@@ -7,7 +7,7 @@ import time
 
															 import torch
														
 
															 import hivemind
														
 
															-from hivemind import get_free_port
														
 
															+from hivemind.dht import DHT
														
 
															 from hivemind.moe.server import layers
														
 
															 from hivemind.utils.limits import increase_file_limit
														
 
															 from hivemind.utils.logging import get_logger
														
@@ -28,11 +28,20 @@ def print_device_info(device=None):
 
															         logger.info(f"Cached:   {round(torch.cuda.memory_cached(0) / 1024 ** 3, 1)} GB")
														
 
															-def client_process(can_start, benchmarking_failed, port, num_experts, batch_size, hid_dim, num_batches, backprop=True):
														
 
															+def client_process(
														
 
															+    can_start,
														
 
															+    benchmarking_failed,
														
 
															+    server_peer_info,
														
 
															+    num_experts,
														
 
															+    batch_size,
														
 
															+    hid_dim,
														
 
															+    num_batches,
														
 
															+    backprop=True,
														
 
															+) -> None:
														
 
															     torch.set_num_threads(1)
														
 
															     can_start.wait()
														
 
															     experts = [
														
 
															-        hivemind.RemoteExpert(f"expert{i}", endpoint=f"{hivemind.LOCALHOST}:{port}") for i in range(num_experts)
														
 
															+        hivemind.RemoteExpert(f"expert.{i}", server_peer_info=server_peer_info) for i in range(num_experts)
														
 
															     ]
														
 
															     try:
														
@@ -48,17 +57,16 @@ def client_process(can_start, benchmarking_failed, port, num_experts, batch_size
 
															 def benchmark_throughput(
														
 
															-    num_experts=16,
														
 
															+    num_experts=1,
														
 
															     num_handlers=None,
														
 
															-    num_clients=128,
														
 
															+    num_clients=1,
														
 
															     num_batches_per_client=16,
														
 
															     expert_cls="ffn",
														
 
															     hid_dim=1024,
														
 
															-    batch_size=2048,
														
 
															+    batch_size=16,
														
 
															     max_batch_size=None,
														
 
															     backprop=True,
														
 
															     device=None,
														
 
															-    port=None,
														
 
															 ):
														
 
															     assert (
														
 
															         not hasattr(torch.cuda, "is_initialized")
														
@@ -66,7 +74,6 @@ def benchmark_throughput(
 
															         or torch.device(device) == torch.device("cpu")
														
 
															     )
														
 
															     assert expert_cls in layers.name_to_block
														
 
															-    port = port or get_free_port()
														
 
															     max_batch_size = max_batch_size or batch_size * 4
														
 
															     num_handlers = max(1, num_handlers or num_clients // 2)
														
 
															     benchmarking_failed = mp.Event()
														
@@ -74,8 +81,12 @@ def benchmark_throughput(
 
															     timestamps = dict(started=time.perf_counter())
														
 
															     try:
														
 
															-        # start clients and await server
														
 
															-        # Note: client processes must be launched BEFORE touching gpu, even torch.cuda.is_available can cause trouble
														
 
															+        server_dht = DHT(start=True)
														
 
															+        server_dht_peer_info = hivemind.PeerInfo(
														
 
															+            peer_id=server_dht.peer_id,
														
 
															+            addrs=[addr.decapsulate("/p2p/" + addr.get("p2p")) for addr in server_dht.get_visible_maddrs()],
														
 
															+        )
														
 
															+
														
 
															         clients = [
														
 
															             mp.Process(
														
 
															                 target=client_process,
														
@@ -83,30 +94,29 @@ def benchmark_throughput(
 
															                 args=(
														
 
															                     can_start,
														
 
															                     benchmarking_failed,
														
 
															-                    port,
														
 
															+                    server_dht_peer_info,
														
 
															                     num_experts,
														
 
															                     batch_size,
														
 
															                     hid_dim,
														
 
															                     num_batches_per_client,
														
 
															                     backprop,
														
 
															                 ),
														
 
															+                daemon=True,
														
 
															             )
														
 
															             for i in range(num_clients)
														
 
															         ]
														
 
															         for client in clients:
														
 
															-            client.daemon = True
														
 
															             client.start()
														
 
															         timestamps["launched_clients"] = timestamps["began_launching_server"] = time.perf_counter()
														
 
															-        # start server
														
 
															         device = device or ("cuda" if torch.cuda.is_available() else "cpu")
														
 
															         experts = {}
														
 
															         for i in range(num_experts):
														
 
															             expert = torch.jit.script(layers.name_to_block[expert_cls](hid_dim))
														
 
															-            experts[f"expert{i}"] = hivemind.ExpertBackend(
														
 
															-                name=f"expert{i}",
														
 
															+            experts[f"expert.{i}"] = hivemind.ExpertBackend(
														
 
															+                name=f"expert.{i}",
														
 
															                 expert=expert,
														
 
															                 optimizer=torch.optim.Adam(expert.parameters()),
														
 
															                 args_schema=(hivemind.BatchTensorDescriptor(hid_dim),),
														
@@ -114,11 +124,11 @@ def benchmark_throughput(
 
															                 max_batch_size=max_batch_size,
														
 
															             )
														
 
															         timestamps["created_experts"] = time.perf_counter()
														
 
															+
														
 
															         server = hivemind.moe.Server(
														
 
															-            None,
														
 
															-            experts,
														
 
															-            listen_on=f"{hivemind.LOCALHOST}:{port}",
														
 
															-            num_connection_handlers=num_handlers,
														
 
															+            dht=server_dht,
														
 
															+            expert_backends=experts,
														
 
															+            num_connection_handlers=1,  # TODO: support greater number
														
 
															             device=device,
														
 
															         )
														
 
															         server.start()
														
@@ -127,8 +137,13 @@ def benchmark_throughput(
 
															         can_start.set()
														
 
															         for client in clients:
														
 
															+            print("joining clients")
														
 
															+            print(client.is_alive())
														
 
															             client.join()
														
 
															+            print("i'm here")
														
 
															+
														
 
															         timestamps["clients_finished"] = time.perf_counter()
														
 
															+
														
 
															     except BaseException as e:
														
 
															         benchmarking_failed.set()
														
 
															         raise e
														
--- a/hivemind/hivemind_cli/run_server.py
+++ b/hivemind/hivemind_cli/run_server.py
@@ -17,8 +17,7 @@ def main():
 
															     # fmt:off
														
 
															     parser = configargparse.ArgParser(default_config_files=["config.yml"])
														
 
															     parser.add('-c', '--config', required=False, is_config_file=True, help='config file path')
														
 
															-    parser.add_argument('--listen_on', type=str, default='0.0.0.0:*', required=False,
														
 
															-                        help="'localhost' for local connections only, '0.0.0.0' for ipv4 '[::]' for ipv6")
														
 
															+
														
 
															     parser.add_argument('--num_experts', type=int, default=None, required=False, help="The number of experts to serve")
														
 
															     parser.add_argument('--expert_pattern', type=str, default=None, required=False,
														
 
															                         help='all expert uids will follow this pattern, e.g. "myexpert.[0:256].[0:1024]" will'
														
--- a/hivemind/moe/__init__.py
+++ b/hivemind/moe/__init__.py
@@ -1,2 +1,9 @@
 
															 from hivemind.moe.client import RemoteExpert, RemoteMixtureOfExperts, RemoteSwitchMixtureOfExperts
														
 
															-from hivemind.moe.server import ExpertBackend, Server, declare_experts, get_experts, register_expert_class, ConnectionHandler
														
 
															+from hivemind.moe.server import (
														
 
															+    ConnectionHandler,
														
 
															+    ExpertBackend,
														
 
															+    Server,
														
 
															+    declare_experts,
														
 
															+    get_experts,
														
 
															+    register_expert_class,
														
 
															+)
														
--- a/hivemind/moe/client/expert.py
+++ b/hivemind/moe/client/expert.py
@@ -1,32 +1,23 @@
 
															-import asyncio
														
 
															-from concurrent.futures import Future
														
 
															-import multiprocessing as mp
														
 
															 import pickle
														
 
															-from typing import Any, Dict, Optional, Tuple, Awaitable
														
 
															-from threading import Thread
														
 
															+from concurrent.futures import Future
														
 
															 from queue import Queue
														
 
															+from threading import Thread
														
 
															+from typing import Any, Awaitable, Dict, Optional, Tuple
														
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															 from torch.autograd.function import once_differentiable
														
 
															-
														
 
															-#from hivemind.moe.server.connection_handler import ConnectionHandler
														
 
															-from hivemind.utils.compression import deserialize_torch_tensor, serialize_torch_tensor
														
 
															-from hivemind.utils import nested_compare, nested_flatten, nested_pack, switch_to_uvloop
														
 
															+import hivemind
														
 
															 from hivemind.p2p import P2P, PeerInfo, StubBase
														
 
															 from hivemind.proto import runtime_pb2
														
 
															-
														
 
															+from hivemind.utils import nested_compare, nested_flatten, nested_pack, switch_to_uvloop
														
 
															+from hivemind.utils.compression import deserialize_torch_tensor, serialize_torch_tensor
														
 
															 DUMMY = torch.empty(0, requires_grad=True)  # dummy tensor that triggers autograd in RemoteExpert
														
 
															-import hivemind
														
 
															-
														
 
															-#ConnectionHandlerStub = hivemind.moe.server.connection_handler.ConnectionHandler._stub_type
														
 
															-
														
 
															-
														
 
															-def _get_expert_stub(p2p: P2P, server_peer_info: PeerInfo): # -> ConnectionHandlerStub:
														
 
															+def _get_expert_stub(p2p: P2P, server_peer_info: PeerInfo):  # -> ConnectionHandlerStub:
														
 
															     return hivemind.moe.server.connection_handler.ConnectionHandler.get_stub(p2p, server_peer_info.peer_id)
														
@@ -39,7 +30,7 @@ class RemoteExpert(nn.Module):
 
															     :param uid: unique expert identifier
														
 
															     """
														
 
															-    def __init__(self, uid, server_peer_info: PeerInfo, p2p: Optional[P2P] = None):
														
 
															+    def __init__(self, uid, server_peer_info: PeerInfo, p2p: Optional[P2P] = None, connect: bool = True):
														
 
															         super().__init__()
														
 
															         self.uid, self.server_peer_info = uid, server_peer_info
														
 
															         self._info = None
														
@@ -49,7 +40,8 @@ class RemoteExpert(nn.Module):
 
															         else:
														
 
															             self.p2p = p2p
														
 
															-        _RemoteModuleCall.run_coroutine(self.p2p._client.connect(server_peer_info.peer_id, server_peer_info.addrs))
														
 
															+        if connect:
														
 
															+            _RemoteModuleCall.run_coroutine(self.p2p._client.connect(server_peer_info.peer_id, server_peer_info.addrs))
														
 
															     @property
														
 
															     def stub(self) -> StubBase:
														
@@ -66,6 +58,7 @@ class RemoteExpert(nn.Module):
 
															         if not nested_compare(forward_inputs, self.info["forward_schema"]):
														
 
															             raise TypeError(f"Inputs do not match expert input schema. Did you pass the right number of parameters?")
														
 
															+
														
 
															         flat_outputs = _RemoteModuleCall.apply(DUMMY, self.uid, self.stub, self.info, *nested_flatten(forward_inputs))
														
 
															         # Note: we send DUMMY to prevent torch from excluding expert from backward if no other inputs require grad
														
@@ -100,7 +93,6 @@ class _RemoteModuleCall(torch.autograd.Function):
 
															                 except Exception as e:
														
 
															                     future.set_exception(e)
														
 
															                     continue
														
 
															-
														
 
															                 future.set_result(result)
														
 
															         loop.run_until_complete(receive_tasks())
														
@@ -108,7 +100,7 @@ class _RemoteModuleCall(torch.autograd.Function):
 
															     @classmethod
														
 
															     def run_coroutine(cls, coro: Awaitable, return_future: bool = False):
														
 
															         if cls._event_thread is None:
														
 
															-            cls._event_thread = Thread(target=cls._run)
														
 
															+            cls._event_thread = Thread(target=cls._run, daemon=True)
														
 
															             cls._event_thread.start()
														
 
															         future = Future()
														
@@ -117,7 +109,8 @@ class _RemoteModuleCall(torch.autograd.Function):
 
															         if return_future:
														
 
															             return future
														
 
															-        return future.result()
														
 
															+        result = future.result()
														
 
															+        return result
														
 
															     @classmethod
														
 
															     def forward(
														
@@ -125,7 +118,7 @@ class _RemoteModuleCall(torch.autograd.Function):
 
															         ctx,
														
 
															         dummy: torch.Tensor,
														
 
															         uid: str,
														
 
															-        stub,#: ConnectionHandlerStub,
														
 
															+        stub,  #: ConnectionHandlerStub,
														
 
															         info: Dict[str, Any],
														
 
															         *inputs: torch.Tensor,
														
 
															     ) -> Tuple[torch.Tensor, ...]:
														
--- a/hivemind/moe/server/__init__.py
+++ b/hivemind/moe/server/__init__.py
@@ -58,7 +58,6 @@ class Server(threading.Thread):
 
															         self,
														
 
															         dht: Optional[DHT],
														
 
															         expert_backends: Dict[str, ExpertBackend],
														
 
															-        listen_on: Endpoint = "0.0.0.0:*",
														
 
															         num_connection_handlers: int = 1,
														
 
															         update_period: int = 30,
														
 
															         start=False,
														
@@ -67,9 +66,6 @@ class Server(threading.Thread):
 
															     ):
														
 
															         super().__init__()
														
 
															         self.dht, self.experts, self.update_period = dht, expert_backends, update_period
														
 
															-        if get_port(listen_on) is None:
														
 
															-            listen_on = replace_port(listen_on, new_port=get_free_port())
														
 
															-        self.listen_on, self.port = listen_on, get_port(listen_on)
														
 
															         self.conn_handlers = [ConnectionHandler(dht, self.experts) for _ in range(1)]
														
 
															         if checkpoint_dir is not None:
														
@@ -82,7 +78,7 @@ class Server(threading.Thread):
 
															             self.dht_handler_thread = DHTHandlerThread(
														
 
															                 experts=self.experts,
														
 
															                 dht=self.dht,
														
 
															-                endpoint=self.listen_on,
														
 
															+                peer_id=self.dht.peer_id,
														
 
															                 update_period=self.update_period,
														
 
															                 daemon=True,
														
 
															             )
														
@@ -93,7 +89,6 @@ class Server(threading.Thread):
 
															     @classmethod
														
 
															     def create(
														
 
															         cls,
														
 
															-        listen_on="0.0.0.0:*",
														
 
															         num_experts: int = None,
														
 
															         expert_uids: str = None,
														
 
															         expert_pattern: str = None,
														
@@ -222,7 +217,6 @@ class Server(threading.Thread):
 
															         return cls(
														
 
															             dht,
														
 
															             experts,
														
 
															-            listen_on=listen_on,
														
 
															             num_connection_handlers=num_handlers,
														
 
															             device=device,
														
 
															             checkpoint_dir=checkpoint_dir,
														
@@ -235,8 +229,7 @@ class Server(threading.Thread):
 
															         Starts Server in the current thread. Initializes dht if necessary, starts connection handlers,
														
 
															         runs Runtime (self.runtime) to process incoming requests.
														
 
															         """
														
 
															-        logger.info(f"Server started at {self.listen_on}")
														
 
															-        logger.info(f"Got {len(self.experts)} experts:")
														
 
															+        logger.info(f"Server started with {len(self.experts)} experts:")
														
 
															         for expert_name, backend in self.experts.items():
														
 
															             num_parameters = sum(p.numel() for p in backend.expert.parameters() if p.requires_grad)
														
 
															             logger.info(f"{expert_name}: {backend.expert.__class__.__name__}, {num_parameters} parameters")
														
--- a/hivemind/moe/server/connection_handler.py
+++ b/hivemind/moe/server/connection_handler.py
@@ -5,11 +5,11 @@ from typing import Dict
 
															 import torch
														
 
															-from hivemind.p2p import P2PContext, ServicerBase
														
 
															 from hivemind.dht import DHT
														
 
															 from hivemind.moe.server.expert_backend import ExpertBackend
														
 
															+from hivemind.p2p import P2PContext, ServicerBase
														
 
															 from hivemind.proto import runtime_pb2
														
 
															-from hivemind.utils import get_logger, nested_flatten, MPFuture
														
 
															+from hivemind.utils import MPFuture, get_logger, nested_flatten
														
 
															 from hivemind.utils.asyncio import switch_to_uvloop
														
 
															 from hivemind.utils.compression import deserialize_torch_tensor, serialize_torch_tensor
														
@@ -45,6 +45,7 @@ class ConnectionHandler(mp.context.ForkProcess, ServicerBase):
 
															             except Exception as e:
														
 
															                 self.ready.set_exception(e)
														
 
															                 return
														
 
															+
														
 
															         self.ready.set_result(None)
														
 
															         try:
														
@@ -65,7 +66,9 @@ class ConnectionHandler(mp.context.ForkProcess, ServicerBase):
 
															         return runtime_pb2.ExpertResponse(tensors=serialized_response)
														
 
															-    async def rpc_backward(self, request: runtime_pb2.ExpertRequest, context: P2PContext) -> runtime_pb2.ExpertResponse:
														
 
															+    async def rpc_backward(
														
 
															+        self, request: runtime_pb2.ExpertRequest, context: P2PContext
														
 
															+    ) -> runtime_pb2.ExpertResponse:
														
 
															         inputs_and_grad_outputs = [deserialize_torch_tensor(tensor) for tensor in request.tensors]
														
 
															         future = self.experts[request.uid].backward_pool.submit_task(*inputs_and_grad_outputs)
														
 
															         serialized_response = [
														
--- a/hivemind/moe/server/dht_handler.py
+++ b/hivemind/moe/server/dht_handler.py
@@ -14,27 +14,27 @@ from hivemind.moe.server.expert_uid import (
 
															     is_valid_uid,
														
 
															     split_uid,
														
 
															 )
														
 
															-from hivemind.utils import Endpoint, get_dht_time, get_port
														
 
															+from hivemind.p2p import PeerID, PeerInfo
														
 
															+from hivemind.utils import get_dht_time
														
 
															 class DHTHandlerThread(threading.Thread):
														
 
															-    def __init__(self, experts, dht: DHT, endpoint: Endpoint, update_period: int = 5, **kwargs):
														
 
															+    def __init__(self, experts, dht: DHT, peer_id: PeerID, update_period: int = 5, **kwargs):
														
 
															         super().__init__(**kwargs)
														
 
															-        assert get_port(endpoint) is not None
														
 
															-        self.endpoint = endpoint
														
 
															+        self.peer_id = peer_id
														
 
															         self.experts = experts
														
 
															         self.dht = dht
														
 
															         self.update_period = update_period
														
 
															         self.stop = threading.Event()
														
 
															     def run(self) -> None:
														
 
															-        declare_experts(self.dht, self.experts.keys(), self.endpoint)
														
 
															+        declare_experts(self.dht, self.experts.keys(), self.peer_id)
														
 
															         while not self.stop.wait(self.update_period):
														
 
															-            declare_experts(self.dht, self.experts.keys(), self.endpoint)
														
 
															+            declare_experts(self.dht, self.experts.keys(), self.peer_id)
														
 
															 def declare_experts(
														
 
															-    dht: DHT, uids: Sequence[ExpertUID], endpoint: Endpoint, expiration: DHTExpiration = 300, wait: bool = True
														
 
															+    dht: DHT, uids: Sequence[ExpertUID], peer_id: PeerID, expiration: DHTExpiration = 300, wait: bool = True
														
 
															 ) -> Dict[ExpertUID, bool]:
														
 
															     """
														
 
															     Make experts visible to all DHT peers; update timestamps if declared previously.
														
@@ -49,22 +49,22 @@ def declare_experts(
 
															     for uid in uids:
														
 
															         assert is_valid_uid(uid), f"{uid} is not a valid expert uid. All uids must follow {UID_PATTERN.pattern}"
														
 
															     return dht.run_coroutine(
														
 
															-        partial(_declare_experts, uids=list(uids), endpoint=endpoint, expiration=expiration), return_future=not wait
														
 
															+        partial(_declare_experts, uids=list(uids), peer_id=peer_id, expiration=expiration), return_future=not wait
														
 
															     )
														
 
															 async def _declare_experts(
														
 
															-    dht: DHT, node: DHTNode, uids: List[ExpertUID], endpoint: Endpoint, expiration: DHTExpiration
														
 
															+    dht: DHT, node: DHTNode, uids: List[ExpertUID], peer_id: PeerID, expiration: DHTExpiration
														
 
															 ) -> Dict[ExpertUID, bool]:
														
 
															     num_workers = len(uids) if dht.num_workers is None else min(len(uids), dht.num_workers)
														
 
															     expiration_time = get_dht_time() + expiration
														
 
															     data_to_store: Dict[Tuple[ExpertPrefix, Optional[Coordinate]], DHTValue] = {}
														
 
															     for uid in uids:
														
 
															-        data_to_store[uid, None] = endpoint
														
 
															+        data_to_store[uid, None] = peer_id.to_base58()
														
 
															         prefix = uid if uid.count(UID_DELIMITER) > 1 else f"{uid}{UID_DELIMITER}{FLAT_EXPERT}"
														
 
															         for i in range(prefix.count(UID_DELIMITER) - 1):
														
 
															             prefix, last_coord = split_uid(prefix)
														
 
															-            data_to_store[prefix, last_coord] = [uid, endpoint]
														
 
															+            data_to_store[prefix, last_coord] = [uid, peer_id.to_base58()]
														
 
															     keys, maybe_subkeys, values = zip(*((key, subkey, value) for (key, subkey), value in data_to_store.items()))
														
 
															     store_ok = await node.store_many(keys, values, expiration_time, subkeys=maybe_subkeys, num_workers=num_workers)
														
@@ -94,6 +94,6 @@ async def _get_experts(
 
															     experts: List[Optional[RemoteExpert]] = [None] * len(uids)
														
 
															     for i, uid in enumerate(uids):
														
 
															-        if found[uid] is not None and isinstance(found[uid].value, Endpoint):
														
 
															-            experts[i] = RemoteExpert(uid, found[uid].value)
														
 
															+        if found[uid] is not None and isinstance(found[uid].value, PeerID):
														
 
															+            experts[i] = RemoteExpert(uid, PeerInfo(peer_id=found[uid].value, addrs=[]))
														
 
															     return experts