3 tahun lalu · aad0d4db64
--- a/hivemind/moe/client/expert.py
+++ b/hivemind/moe/client/expert.py
@@ -11,7 +11,7 @@ import torch
 
				 import torch.nn as nn
			
 
				 from torch.autograd.function import once_differentiable
			
 
				 
			
 
				-import hivemind
			
 
				+from hivemind import moe
			
 
				 from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
			
 
				 from hivemind.dht import DHT
			
 
				 from hivemind.p2p import P2P, PeerInfo, StubBase
			
@@ -32,8 +32,8 @@ from hivemind.utils.streaming import gather_from_streaming, split_for_streaming
 
				 DUMMY = torch.empty(0, requires_grad=True)  # dummy tensor that triggers autograd in RemoteExpert
			
 
				 
			
 
				 
			
 
				-def _get_expert_stub(p2p: P2P, server_peer_info: PeerInfo):  # -> ConnectionHandlerStub:
			
 
				-    return hivemind.moe.server.connection_handler.ConnectionHandler.get_stub(p2p, server_peer_info.peer_id)
			
 
				+def _get_expert_stub(p2p: P2P, server_peer_info: PeerInfo) -> "ConnectionHandlerStub":
			
 
				+    return moe.server.connection_handler.ConnectionHandler.get_stub(p2p, server_peer_info.peer_id)
			
 
				 
			
 
				 
			
 
				 @dataclass(frozen=True)
			
@@ -251,13 +251,12 @@ async def expert_forward(uid: str, inputs: Sequence[torch.Tensor], compressions:
 
				 class _RemoteModuleCall(torch.autograd.Function):
			
 
				     """Internal autograd-friendly call of a remote module. For applications, use RemoteExpert instead."""
			
 
				 
			
 
				-    @classmethod
			
 
				+    @staticmethod
			
 
				     def forward(
			
 
				-        cls,
			
 
				         ctx,
			
 
				         dummy: torch.Tensor,
			
 
				         uid: str,
			
 
				-        stub,  #: ConnectionHandlerStub,
			
 
				+        stub: "ConnectionHandlerStub",
			
 
				         info: Dict[str, Any],
			
 
				         *inputs: torch.Tensor,
			
 
				     ) -> Tuple[torch.Tensor, ...]:
			
@@ -273,9 +272,9 @@ class _RemoteModuleCall(torch.autograd.Function):
 
				 
			
 
				         return tuple(deserialized_outputs)
			
 
				 
			
 
				-    @classmethod
			
 
				+    @staticmethod
			
 
				     @once_differentiable
			
 
				-    def backward(cls, ctx, *grad_outputs) -> Tuple[Optional[torch.Tensor], ...]:
			
 
				+    def backward(ctx, *grad_outputs) -> Tuple[Optional[torch.Tensor], ...]:
			
 
				         grad_outputs_cpu = tuple(tensor.cpu() for tensor in grad_outputs)
			
 
				         inputs_and_grad_outputs = tuple(nested_flatten((ctx.saved_tensors, grad_outputs_cpu)))
			
 
				         backward_schema = tuple(nested_flatten((ctx.info["forward_schema"], ctx.info["outputs_schema"])))
			
--- a/hivemind/moe/client/moe.py
+++ b/hivemind/moe/client/moe.py
@@ -183,7 +183,7 @@ class RemoteMixtureOfExperts(nn.Module):
 
				         if self._expert_info is None:
			
 
				             # grab some expert to set ensemble output shape
			
 
				             proj_device = self.proj.weight.device
			
 
				-            dummy_scores_concat: torch.Tensor = self.proj(torch.randn(1, self.proj.in_features, device=proj_device))
			
 
				+            dummy_scores_concat = self.proj(torch.randn(1, self.proj.in_features, device=proj_device))
			
 
				             dummy_scores = dummy_scores_concat.cpu().detach().split_with_sizes(self.beam_search.grid_size, dim=-1)
			
 
				             dummy_experts = self.beam_search.find_best_experts(dummy_scores, beam_size=1)
			
 
				             self._expert_info = dummy_experts[0].info
			
--- a/hivemind/moe/server/connection_handler.py
+++ b/hivemind/moe/server/connection_handler.py
@@ -19,22 +19,6 @@ from hivemind.utils.tensor_descr import BatchTensorDescriptor
 
				 logger = get_logger(__name__)
			
 
				 
			
 
				 
			
 
				-class _RequestUnpacker:
			
 
				-
			
 
				-    __slots__ = ("uid",)
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.uid: Optional[str] = None
			
 
				-
			
 
				-    def __call__(self, request: runtime_pb2.ExpertRequest) -> Iterable[runtime_pb2.Tensor]:
			
 
				-        if self.uid is None:
			
 
				-            self.uid = request.uid
			
 
				-        else:
			
 
				-            assert self.uid == request.uid, "Expert uids differ in one request"
			
 
				-
			
 
				-        return request.tensors
			
 
				-
			
 
				-
			
 
				 class ConnectionHandler(mp.context.ForkProcess, ServicerBase):
			
 
				     """
			
 
				     A process that accepts incoming requests to experts and submits them into the corresponding TaskPool.
			
@@ -78,9 +62,20 @@ class ConnectionHandler(mp.context.ForkProcess, ServicerBase):
 
				     async def _gather_inputs(
			
 
				         self, requests: AsyncIterator[runtime_pb2.ExpertRequest], context: P2PContext
			
 
				     ) -> Tuple[str, List[torch.Tensor]]:
			
 
				-        unpacker = _RequestUnpacker()
			
 
				-        inputs = await gather_from_streaming(requests, unpacker, deserialize_torch_tensor)
			
 
				-        return unpacker.uid, inputs
			
 
				+        expert_uid = None
			
 
				+
			
 
				+        def _unpack(req: runtime_pb2.ExpertRequest) -> Iterable[runtime_pb2.Tensor]:
			
 
				+            nonlocal expert_uid
			
 
				+
			
 
				+            if expert_uid is None:
			
 
				+                expert_uid = req.uid
			
 
				+            elif expert_uid != req.uid:
			
 
				+                raise ValueError("Expert uids differ in one reques")
			
 
				+
			
 
				+            return req.tensors
			
 
				+
			
 
				+        inputs = await gather_from_streaming(requests, _unpack, deserialize_torch_tensor)
			
 
				+        return expert_uid, inputs
			
 
				 
			
 
				     async def _process_inputs(
			
 
				         self,
			
--- a/hivemind/moe/server/server.py
+++ b/hivemind/moe/server/server.py
@@ -41,7 +41,7 @@ class Server(threading.Thread):
 
				      - processes incoming forward/backward requests via Runtime (created by the server)
			
 
				      - publishes updates to expert status every :update_period: seconds
			
 
				 
			
 
				-    :type dht: DHT.
			
 
				+    :type dht: an instance of hivemind.DHT.
			
 
				     :param expert_backends: dict{expert uid (str) : ExpertBackend} for all expert hosted by this server.
			
 
				     :param listen_on: server's dht address that determines how it can be accessed. Address and (optional) port
			
 
				     :param num_connection_handlers: maximum number of simultaneous requests. Please note that the default value of 1
			
--- a/tests/test_p2p_daemon_bindings.py
+++ b/tests/test_p2p_daemon_bindings.py
@@ -560,7 +560,7 @@ async def test_client_stream_handler_success(p2pcs):
 
				 
			
 
				     writer.close()
			
 
				 
			
 
				-    # test case: registering twice can override the previous registration
			
 
				+    # test case: registering twice can not override the previous registration without balanced flag
			
 
				     event_third = asyncio.Event()
			
 
				 
			
 
				     async def handler_third(stream_info, reader, writer):
			
@@ -570,8 +570,8 @@ async def test_client_stream_handler_success(p2pcs):
 
				     with pytest.raises(ControlFailure):
			
 
				         await p2pcs[1].stream_handler(another_proto, handler_third)
			
 
				 
			
 
				-    # add in balanced mode, know handler should be placed in round robin queue
			
 
				-    # also it should be next to be called
			
 
				+    # add in balanced mode: handler should be placed in round robin queue
			
 
				+    # and become the next to be called
			
 
				     await p2pcs[1].stream_handler(another_proto, handler_third, True)
			
 
				     assert another_proto in p2pcs[1].control.handlers
			
 
				     # ensure the handler is override