пре 5 година · f1565ef7af
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -27,7 +27,7 @@ jobs:
 
				           command: sudo python setup.py develop
			
 
				           name: setup
			
 
				       - run:
			
 
				-          command: pytest ./tests --full-trace
			
 
				+          command: for test_file in tests/test*.py; do pytest $test_file --full-trace; done
			
 
				           name: tests
			
 
				       - run:
			
 
				           command: python tests/benchmark_throughput.py --preset minimalistic
			
--- a/hivemind/__init__.py
+++ b/hivemind/__init__.py
@@ -1,7 +1,7 @@
 
				-from .client import *
			
 
				-from .dht import *
			
 
				-from .server import Server
			
 
				-from .utils import *
			
 
				-from .runtime import *
			
 
				+from hivemind.client import *
			
 
				+from hivemind.dht import *
			
 
				+from hivemind.server import Server
			
 
				+from hivemind.utils import *
			
 
				+from hivemind.runtime import *
			
 
				 
			
 
				 __version__ = '0.7.1'
			
--- a/hivemind/client/__init__.py
+++ b/hivemind/client/__init__.py
@@ -1,2 +1,2 @@
 
				-from .moe import RemoteMixtureOfExperts
			
 
				-from .expert import RemoteExpert
			
 
				+from hivemind.client.expert import RemoteExpert
			
 
				+from hivemind.client.moe import RemoteMixtureOfExperts
			
--- a/hivemind/client/expert.py
+++ b/hivemind/client/expert.py
@@ -1,10 +1,14 @@
 
				+import pickle
			
 
				 from typing import Tuple, Optional
			
 
				 
			
 
				+import grpc
			
 
				+import grpc.experimental.aio
			
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				 from torch.autograd.function import once_differentiable
			
 
				 
			
 
				-from ..utils import nested_flatten, DUMMY, PytorchSerializer, nested_pack, nested_compare, Connection
			
 
				+from hivemind.utils import nested_flatten, DUMMY, nested_pack, nested_compare
			
 
				+from hivemind.utils.grpc import serialize_torch_tensor, deserialize_torch_tensor, runtime_pb2, runtime_grpc
			
 
				 
			
 
				 
			
 
				 class RemoteExpert(nn.Module):
			
@@ -23,12 +27,29 @@ class RemoteExpert(nn.Module):
 
				     def __init__(self, uid, host='127.0.0.1', port=8080):
			
 
				         super().__init__()
			
 
				         self.uid, self.host, self.port = uid, host, port
			
 
				+        self._channel, self._stub = None, None
			
 
				         self._info = None
			
 
				 
			
 
				+    @property
			
 
				+    def stub(self):
			
 
				+        if self._channel is None:
			
 
				+            self._channel = grpc.insecure_channel(f'{self.host}:{self.port}', options=[
			
 
				+                ('grpc.max_send_message_length', -1),
			
 
				+                ('grpc.max_receive_message_length', -1)
			
 
				+            ])
			
 
				+        if self._stub is None:
			
 
				+            self._stub = runtime_grpc.ConnectionHandlerStub(self._channel)
			
 
				+        return self._stub
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        if self._channel is not None:
			
 
				+            self._channel.close()
			
 
				+
			
 
				     def forward(self, *args, **kwargs):
			
 
				         """ Call RemoteExpert for the specified inputs and return its output(s). Compatible with pytorch.autograd. """
			
 
				         assert len(kwargs) == len(self.info['keyword_names']), f"Keyword args should be {self.info['keyword_names']}"
			
 
				         kwargs = {key: kwargs[key] for key in self.info['keyword_names']}
			
 
				+
			
 
				         # Note: we put keyword arguments in the same order as on a server to prevent f(a=1, b=2) != f(b=2, a=1) errors
			
 
				 
			
 
				         forward_inputs = (args, kwargs)
			
@@ -36,16 +57,16 @@ class RemoteExpert(nn.Module):
 
				         if not nested_compare(forward_inputs, self.info['forward_schema']):
			
 
				             raise TypeError(f"Inputs do not match expert input schema. Did you pass the right number of parameters?")
			
 
				 
			
 
				-        flat_outputs = _RemoteModuleCall.apply(DUMMY, self.uid, self.host, self.port, *nested_flatten(forward_inputs))
			
 
				+        flat_outputs = _RemoteModuleCall.apply(DUMMY, self.uid, self.host, self.port, self.stub,
			
 
				+                                               *nested_flatten(forward_inputs))
			
 
				         # Note: we send DUMMY to prevent torch from excluding expert from backward if no other inputs require grad
			
 
				         return nested_pack(flat_outputs, structure=self.info['outputs_schema'])
			
 
				 
			
 
				     @property
			
 
				     def info(self):
			
 
				         if self._info is None:
			
 
				-            connection = Connection.create(self.host, self.port)
			
 
				-            connection.send_raw('info', PytorchSerializer.dumps(self.uid))
			
 
				-            self._info = PytorchSerializer.loads(connection.recv_message()[1])
			
 
				+            outputs = self.stub.info(runtime_pb2.ExpertUID(uid=self.uid))
			
 
				+            self._info = pickle.loads(outputs.serialized_info)
			
 
				         return self._info
			
 
				 
			
 
				     def extra_repr(self):
			
@@ -56,26 +77,27 @@ class _RemoteModuleCall(torch.autograd.Function):
 
				     """ Internal autograd-friendly call of a remote module. For applications, use RemoteExpert instead. """
			
 
				 
			
 
				     @staticmethod
			
 
				-    def forward(ctx, dummy: torch.Tensor,
			
 
				-                uid: str, host: str, port: int, *inputs: torch.Tensor) -> Tuple[torch.Tensor, ...]:
			
 
				+    def forward(ctx, dummy: torch.Tensor, uid: str, host: str, port: int, stub: runtime_grpc.ConnectionHandlerStub,
			
 
				+                *inputs: torch.Tensor) -> Tuple[torch.Tensor, ...]:
			
 
				         # Note: *inputs are flattened input tensors that follow the expert's info['input_schema']
			
 
				         inputs = tuple(map(torch.Tensor.detach, inputs))  # detach to avoid pickling the computation graph
			
 
				-        ctx.uid, ctx.host, ctx.port = uid, host, port
			
 
				+        ctx.uid, ctx.host, ctx.port, ctx.stub = uid, host, port, stub
			
 
				         ctx.save_for_backward(*inputs)
			
 
				 
			
 
				-        connection = Connection.create(ctx.host, ctx.port)
			
 
				-        connection.send_raw('fwd_', PytorchSerializer.dumps((ctx.uid, inputs)))
			
 
				-        rtype, msg = connection.recv_message()
			
 
				-        assert len(msg) != 0, "ExpertBackend.forward did not respond"
			
 
				-        return tuple(PytorchSerializer.loads(msg))  # flattened expert outputs
			
 
				+        outputs = stub.forward(
			
 
				+            runtime_pb2.ExpertRequest(uid=ctx.uid, tensors=[serialize_torch_tensor(tensor) for tensor in inputs]))
			
 
				+
			
 
				+        deserialized_outputs = [deserialize_torch_tensor(tensor) for tensor in outputs.tensors]
			
 
				+
			
 
				+        return tuple(deserialized_outputs)
			
 
				 
			
 
				     @staticmethod
			
 
				     @once_differentiable
			
 
				     def backward(ctx, *grad_outputs) -> Tuple[Optional[torch.Tensor], ...]:
			
 
				-        connection = Connection.create(ctx.host, ctx.port)
			
 
				         payload = tuple(nested_flatten((ctx.saved_tensors, grad_outputs)))
			
 
				-        connection.send_raw('bwd_', PytorchSerializer.dumps((ctx.uid, payload)))
			
 
				-        rtype, msg = connection.recv_message()
			
 
				-        assert len(msg) != 0, "ExpertBackend.backward did not respond"
			
 
				-        grad_inputs = PytorchSerializer.loads(msg)
			
 
				-        return (DUMMY, None, None, None, *grad_inputs)
			
 
				+
			
 
				+        grad_inputs = ctx.stub.backward(
			
 
				+            runtime_pb2.ExpertRequest(uid=ctx.uid, tensors=[serialize_torch_tensor(tensor) for tensor in payload]))
			
 
				+
			
 
				+        deserialized_grad_inputs = [deserialize_torch_tensor(tensor) for tensor in grad_inputs.tensors]
			
 
				+        return (DUMMY, None, None, None, None, *deserialized_grad_inputs)
			
--- a/hivemind/client/moe.py
+++ b/hivemind/client/moe.py
@@ -1,16 +1,14 @@
 
				-import multiprocessing as mp
			
 
				-import multiprocessing.pool
			
 
				 from functools import partial
			
 
				-from typing import Tuple, List, Dict, Optional
			
 
				+from typing import Tuple, List, Optional
			
 
				 
			
 
				 import numpy as np
			
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				 from torch.autograd.function import once_differentiable
			
 
				 
			
 
				-from .expert import RemoteExpert, _RemoteModuleCall
			
 
				-from ..utils import nested_map, check_numpy, run_and_await_k, nested_pack, nested_flatten, DUMMY, run_in_background
			
 
				-from ..utils import run_isolated_forward, EmulatedAutogradContext, run_isolated_backward, map_with_parallel_backward
			
 
				+from hivemind.client.expert import RemoteExpert, _RemoteModuleCall
			
 
				+from hivemind.utils import nested_map, run_and_await_k, nested_pack, nested_flatten, DUMMY, run_in_background, \
			
 
				+    run_isolated_forward, EmulatedAutogradContext, run_isolated_backward, map_with_parallel_backward
			
 
				 
			
 
				 
			
 
				 class RemoteMixtureOfExperts(nn.Module):
			
@@ -37,6 +35,7 @@ class RemoteMixtureOfExperts(nn.Module):
 
				      allow_broadcasting=True will flatten first d-1 input dimensions, apply RemoteMixtureOfExperts and un-flatten again
			
 
				      allow_broadcasting=False will raise an error
			
 
				     """
			
 
				+
			
 
				     def __init__(self, *, in_features, grid_size: Tuple[int], dht, k_best, k_min=1,
			
 
				                  forward_timeout=None, timeout_after_k_min=1.0, backward_k_min=1, backward_timeout=None,
			
 
				                  uid_prefix='', expert_padding=None, allow_broadcasting=True):
			
@@ -107,7 +106,7 @@ class RemoteMixtureOfExperts(nn.Module):
 
				         delimeters = np.array(self.dht.UID_DELIMETER)[None, None, None]  # pre-compute numpy array for fast concat
			
 
				 
			
 
				         for dim_index, dim_scores in enumerate(grid_scores):
			
 
				-            dim_scores = check_numpy(dim_scores)
			
 
				+            dim_scores = dim_scores.detach().cpu().numpy()
			
 
				             assert dim_scores.shape[-1] == self.grid_size[dim_index]
			
 
				 
			
 
				             # create all possible successsors from current beam
			
@@ -194,6 +193,7 @@ class _RemoteMoECall(torch.autograd.Function):
 
				     This function that can recover from individual failures during forward and/or backward passes.
			
 
				     For user-friendly version of this function, use RemoteMixtureOfExperts module.
			
 
				     """
			
 
				+
			
 
				     @classmethod
			
 
				     def forward(cls, ctx, expert_logits: torch.Tensor, experts: List[RemoteExpert],
			
 
				                 k_min: int, timeout_after_k_min: float, backward_k_min: int, timeout_total: Optional[float],
			
@@ -250,18 +250,19 @@ class _RemoteMoECall(torch.autograd.Function):
 
				             for grad_out, stacked_avive_out in zip(grad_outputs_flat, stacked_alive_outputs)
			
 
				         ))
			
 
				         softmax_jacobian = torch.diagflat(survived_probas) - torch.ger(survived_probas, survived_probas)
			
 
				-        grad_wrt_logits = grad_wrt_probs @ softmax_jacobian
			
 
				+        grad_wrt_survived_logits = grad_wrt_probs @ softmax_jacobian
			
 
				+        grad_wrt_logits = torch.zeros_like(expert_logits).scatter(0, backward_survivors_ix, grad_wrt_survived_logits)
			
 
				 
			
 
				         return (grad_wrt_logits, None, None, None, None, None, None, None, *flat_grad_inputs)
			
 
				 
			
 
				     @staticmethod
			
 
				     def _run_expert_forward(expert: RemoteExpert, *args: torch.Tensor, **kwargs: torch.Tensor):
			
 
				         """ Call remote expert and return flattened outputs. Compatible with concurrent autograd. """
			
 
				-        flat_inputs = nested_flatten((args, kwargs))
			
 
				-        return run_isolated_forward(_RemoteModuleCall, DUMMY, expert.uid, expert.host, expert.port, *flat_inputs)
			
 
				+        return run_isolated_forward(_RemoteModuleCall, DUMMY, expert.uid, expert.host, expert.port, expert.stub,
			
 
				+                                    *nested_flatten((args, kwargs)))
			
 
				 
			
 
				     @staticmethod
			
 
				     def _run_expert_backward(ctx: EmulatedAutogradContext, weight: torch.Tensor, *grad_outputs: torch.Tensor):
			
 
				         backward_result = run_isolated_backward(_RemoteModuleCall, ctx, *(grad * weight for grad in grad_outputs))
			
 
				-        grad_dummy, no_grad_uid, no_grad_hostname, no_grad_port, *grad_inputs = backward_result
			
 
				+        grad_dummy, no_grad_uid, no_grad_hostname, no_grad_port, no_grad_stub, *grad_inputs = backward_result
			
 
				         return grad_inputs
			
--- a/hivemind/dht/__init__.py
+++ b/hivemind/dht/__init__.py
@@ -17,13 +17,13 @@ import ctypes
 
				 import multiprocessing as mp
			
 
				 import warnings
			
 
				 from typing import List, Optional
			
 
				-import uvloop
			
 
				 
			
 
				-from .node import DHTNode, DHTID, DHTExpiration
			
 
				-from .routing import get_dht_time
			
 
				+import uvloop
			
 
				 
			
 
				-from ..client import RemoteExpert
			
 
				-from ..utils import SharedFuture, find_open_port, Endpoint, Port, run_in_background, LOCALHOST
			
 
				+from hivemind.client import RemoteExpert
			
 
				+from hivemind.dht.node import DHTNode, DHTID, DHTExpiration
			
 
				+from hivemind.dht.routing import get_dht_time
			
 
				+from hivemind.utils import SharedFuture, Endpoint, run_in_background
			
 
				 
			
 
				 
			
 
				 class DHT(mp.Process):
			
--- a/hivemind/dht/dht.proto
+++ b/hivemind/dht/dht.proto
@@ -4,52 +4,52 @@ syntax = "proto3";
 
				 // For more info, see https://learning-at-home.readthedocs.io/en/latest/modules/dht.html or help(hivemind.dht.DHTNode)
			
 
				 
			
 
				 service DHT {
			
 
				-    // find out recipient's DHTID and possibly update its routing table
			
 
				-    rpc rpc_ping(NodeInfo) returns (NodeInfo);
			
 
				+  // find out recipient's DHTID and possibly update its routing table
			
 
				+  rpc rpc_ping(NodeInfo) returns (NodeInfo);
			
 
				 
			
 
				-    // request a node to store one or multiple data items (key - value - expiration)
			
 
				-    rpc rpc_store(StoreRequest) returns (StoreResponse);
			
 
				+  // request a node to store one or multiple data items (key - value - expiration)
			
 
				+  rpc rpc_store(StoreRequest) returns (StoreResponse);
			
 
				 
			
 
				-    // for given keys, request values (if stored) or a list of peers that are likely to have them
			
 
				-    rpc rpc_find(FindRequest) returns (FindResponse);
			
 
				+  // for given keys, request values (if stored) or a list of peers that are likely to have them
			
 
				+  rpc rpc_find(FindRequest) returns (FindResponse);
			
 
				 }
			
 
				 
			
 
				 message NodeInfo {
			
 
				-    // note: both node_id and port are optional: if specified, ask peer to add you to its routing table;
			
 
				-    // if either node_id or port is absent, simply request recipient info (for client-only mode)
			
 
				-    bytes node_id = 1;                // sender's own node id serialized with DHTID.to_bytes()
			
 
				-    int32 rpc_port = 2;               // port to which sender listens for DHT RPCs
			
 
				+  // note: both node_id and port are optional: if specified, ask peer to add you to its routing table;
			
 
				+  // if either node_id or port is absent, simply request recipient info (for client-only mode)
			
 
				+  bytes node_id = 1;                // sender's own node id serialized with DHTID.to_bytes()
			
 
				+  int32 rpc_port = 2;               // port to which sender listens for DHT RPCs
			
 
				 }
			
 
				 
			
 
				 message StoreRequest {
			
 
				-    // three lists of the same length representing dht keys, dht values and expiration
			
 
				-    repeated bytes keys = 1;          // keys in the form of DHTID.generate(raw_key).to_bytes()
			
 
				-    repeated bytes values = 2;        // binary-encoded value for i-th key
			
 
				-    repeated double expiration = 3;   // expirations for i-th key (type = DHTExpiration)
			
 
				-    repeated bool in_cache = 4;       // if in_cache[i], store i-th key in cache, else store normally
			
 
				-    NodeInfo peer = 5;                // (optional) sender's own node info, same behavior as in DHT.rpc_ping
			
 
				+  // three lists of the same length representing dht keys, dht values and expiration
			
 
				+  repeated bytes keys = 1;          // keys in the form of DHTID.generate(raw_key).to_bytes()
			
 
				+  repeated bytes values = 2;        // binary-encoded value for i-th key
			
 
				+  repeated double expiration = 3;   // expirations for i-th key (type = DHTExpiration)
			
 
				+  repeated bool in_cache = 4;       // if in_cache[i], store i-th key in cache, else store normally
			
 
				+  NodeInfo peer = 5;                // (optional) sender's own node info, same behavior as in DHT.rpc_ping
			
 
				 }
			
 
				 
			
 
				 message StoreResponse {
			
 
				-    repeated bool store_ok = 1;       // for every key, True means store accepted, False means store rejected/failed
			
 
				-    NodeInfo peer = 2;                // respondent's node id, for you to update routing table
			
 
				+  repeated bool store_ok = 1;       // for every key, True means store accepted, False means store rejected/failed
			
 
				+  NodeInfo peer = 2;                // respondent's node id, for you to update routing table
			
 
				 }
			
 
				 
			
 
				 message FindRequest {
			
 
				-    repeated bytes keys = 1;          // a list of DHTID search keys encoded as bytes
			
 
				-    NodeInfo peer = 2;                // optional, same behavior as in DHT.ping
			
 
				+  repeated bytes keys = 1;          // a list of DHTID search keys encoded as bytes
			
 
				+  NodeInfo peer = 2;                // optional, same behavior as in DHT.ping
			
 
				 }
			
 
				 
			
 
				 message Peers {
			
 
				-   // two aligned arrays: DHTIDs and Endpoints, i-th endpoint corresponds to peer with i-th node id
			
 
				-   repeated bytes node_ids = 1;       // DHTID serialized with node_id.to_bytes()
			
 
				-   repeated string endpoints = 2;     // e.g. 123.123.123.123:1337 or [2a21:6с8:b192:2105]:8888
			
 
				+  // two aligned arrays: DHTIDs and Endpoints, i-th endpoint corresponds to peer with i-th node id
			
 
				+  repeated bytes node_ids = 1;       // DHTID serialized with node_id.to_bytes()
			
 
				+  repeated string endpoints = 2;     // e.g. 123.123.123.123:1337 or [2a21:6с8:b192:2105]:8888
			
 
				 }
			
 
				 
			
 
				 message FindResponse {
			
 
				-    repeated bytes values = 1;        // value for i-th key, b'' means not found locally
			
 
				-    repeated double expiration = 2;   // expiration time for i-th value, only valid value is found
			
 
				-    repeated Peers nearest = 3;       // peers ordered from nearest to farthest based on distance to i-th key
			
 
				-    NodeInfo peer = 4;                // respondent's node id, for you to update routing table
			
 
				+  repeated bytes values = 1;        // value for i-th key, b'' means not found locally
			
 
				+  repeated double expiration = 2;   // expiration time for i-th value, only valid value is found
			
 
				+  repeated Peers nearest = 3;       // peers ordered from nearest to farthest based on distance to i-th key
			
 
				+  NodeInfo peer = 4;                // respondent's node id, for you to update routing table
			
 
				 }
			
 
				 
			
--- a/hivemind/dht/node.py
+++ b/hivemind/dht/node.py
@@ -1,14 +1,15 @@
 
				 from __future__ import annotations
			
 
				+
			
 
				 import asyncio
			
 
				 import random
			
 
				 from collections import namedtuple
			
 
				 from typing import Optional, Tuple, List, Dict, Collection, Union, Set
			
 
				 from warnings import warn
			
 
				 
			
 
				-from .protocol import DHTProtocol
			
 
				-from .routing import DHTID, DHTExpiration, DHTKey, get_dht_time, DHTValue
			
 
				-from .traverse import traverse_dht
			
 
				-from ..utils import Endpoint, LOCALHOST, MSGPackSerializer
			
 
				+from hivemind.dht.protocol import DHTProtocol
			
 
				+from hivemind.dht.routing import DHTID, DHTExpiration, DHTKey, get_dht_time, DHTValue
			
 
				+from hivemind.dht.traverse import traverse_dht
			
 
				+from hivemind.utils import Endpoint, LOCALHOST, MSGPackSerializer
			
 
				 
			
 
				 
			
 
				 class DHTNode:
			
--- a/hivemind/dht/protocol.py
+++ b/hivemind/dht/protocol.py
@@ -11,8 +11,8 @@ from warnings import warn
 
				 import grpc
			
 
				 import grpc.experimental.aio
			
 
				 
			
 
				-from .routing import RoutingTable, DHTID, BinaryDHTValue, DHTExpiration, get_dht_time
			
 
				-from ..utils import Endpoint, compile_grpc, get_logger
			
 
				+from hivemind.dht.routing import RoutingTable, DHTID, BinaryDHTValue, DHTExpiration, get_dht_time
			
 
				+from hivemind.utils import Endpoint, compile_grpc, get_logger
			
 
				 
			
 
				 logger = get_logger(__name__)
			
 
				 
			
--- a/hivemind/dht/routing.py
+++ b/hivemind/dht/routing.py
@@ -2,16 +2,18 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				 import hashlib
			
 
				+import heapq
			
 
				 import os
			
 
				 import random
			
 
				-
			
 
				 import time
			
 
				-import heapq
			
 
				 from collections.abc import Iterable
			
 
				 from itertools import chain
			
 
				-from typing import Tuple, Optional, List, Dict, Set, Union, Any, Sequence, Iterator
			
 
				+from typing import Tuple, Optional, List, Dict, Set, Union, Any, Sequence
			
 
				 
			
 
				-from ..utils import Endpoint, PickleSerializer
			
 
				+from hivemind.utils import Endpoint, PickleSerializer
			
 
				+
			
 
				+DHTKey, DHTValue, DHTExpiration, BinaryDHTID, BinaryDHTValue, = Any, Any, float, bytes, bytes  # flavour types
			
 
				+get_dht_time = time.time  # time used by all dht functionality. You can replace this with any infrastructure-wide time
			
 
				 
			
 
				 
			
 
				 class RoutingTable:
			
@@ -160,6 +162,7 @@ class KBucket:
 
				     A bucket containing up to :size: of DHTIDs in [lower, upper) semi-interval.
			
 
				     Maps DHT node ids to their endpoints (hostname, addr)
			
 
				     """
			
 
				+
			
 
				     def __init__(self, lower: int, upper: int, size: int, depth: int = 0):
			
 
				         assert upper - lower == 2 ** (DHTID.HASH_NBYTES * 8 - depth)
			
 
				         self.lower, self.upper, self.size, self.depth = lower, upper, size, depth
			
@@ -288,7 +291,3 @@ class DHTID(int):
 
				 
			
 
				     def __bytes__(self):
			
 
				         return self.to_bytes()
			
 
				-
			
 
				-
			
 
				-DHTKey, DHTValue, DHTExpiration, BinaryDHTID, BinaryDHTValue, = Any, Any, float, bytes, bytes  # flavour types
			
 
				-get_dht_time = time.time  # time used by all dht functionality. You can replace this with any infrastructure-wide time
			
--- a/hivemind/dht/traverse.py
+++ b/hivemind/dht/traverse.py
@@ -2,9 +2,9 @@
 
				 import asyncio
			
 
				 import heapq
			
 
				 from collections import Counter
			
 
				-from warnings import warn
			
 
				 from typing import Dict, Awaitable, Callable, Any, Tuple, List, Set, Collection, Optional
			
 
				-from .routing import DHTID
			
 
				+
			
 
				+from hivemind.dht.routing import DHTID
			
 
				 
			
 
				 ROOT = 0  # alias for heap root
			
 
				 
			
@@ -107,13 +107,13 @@ async def traverse_dht(
 
				     if len(queries) == 0:
			
 
				         return {}, dict(visited_nodes)
			
 
				 
			
 
				-    unfinished_queries = set(queries)                             # all queries that haven't triggered finish_search yet
			
 
				-    candidate_nodes: Dict[DHTID, List[Tuple[int, DHTID]]] = {}    # heap: unvisited nodes, ordered nearest-to-farthest
			
 
				-    nearest_nodes: Dict[DHTID, List[Tuple[int, DHTID]]] = {}      # heap: top-k nearest nodes, farthest-to-nearest
			
 
				-    known_nodes: Dict[DHTID, Set[DHTID]] = {}                     # all nodes ever added to the heap (for deduplication)
			
 
				+    unfinished_queries = set(queries)  # all queries that haven't triggered finish_search yet
			
 
				+    candidate_nodes: Dict[DHTID, List[Tuple[int, DHTID]]] = {}  # heap: unvisited nodes, ordered nearest-to-farthest
			
 
				+    nearest_nodes: Dict[DHTID, List[Tuple[int, DHTID]]] = {}  # heap: top-k nearest nodes, farthest-to-nearest
			
 
				+    known_nodes: Dict[DHTID, Set[DHTID]] = {}  # all nodes ever added to the heap (for deduplication)
			
 
				     visited_nodes: Dict[DHTID, Set[DHTID]] = dict(visited_nodes)  # where we requested get_neighbors for a given query
			
 
				-    pending_tasks = set()                                         # all active tasks (get_neighbors and found_callback)
			
 
				-    active_workers = Counter({q: 0 for q in queries})             # count workers that search for this query
			
 
				+    pending_tasks = set()  # all active tasks (get_neighbors and found_callback)
			
 
				+    active_workers = Counter({q: 0 for q in queries})  # count workers that search for this query
			
 
				 
			
 
				     search_finished_event = asyncio.Event()  # used to immediately stop all workers when the search is finished
			
 
				     heap_updated_event = asyncio.Event()  # if a worker has no nodes to explore, it will await other workers
			
@@ -228,5 +228,3 @@ async def traverse_dht(
 
				         for query in queries
			
 
				     }
			
 
				     return nearest_neighbors_per_query, visited_nodes
			
 
				-
			
 
				-
			
--- a/hivemind/runtime/__init__.py
+++ b/hivemind/runtime/__init__.py
@@ -7,8 +7,8 @@ from typing import Dict
 
				 import torch
			
 
				 from prefetch_generator import BackgroundGenerator
			
 
				 
			
 
				-from .expert_backend import ExpertBackend
			
 
				-from .task_pool import TaskPool, TaskPoolBase
			
 
				+from hivemind.runtime.expert_backend import ExpertBackend
			
 
				+from hivemind.runtime.task_pool import TaskPool, TaskPoolBase
			
 
				 from hivemind.utils import get_logger
			
 
				 
			
 
				 logger = get_logger(__name__)
			
--- a/hivemind/runtime/expert_backend.py
+++ b/hivemind/runtime/expert_backend.py
@@ -3,8 +3,8 @@ from typing import Dict, Sequence, Any, Tuple, Union
 
				 import torch
			
 
				 from torch import nn
			
 
				 
			
 
				-from .task_pool import TaskPool
			
 
				-from ..utils import nested_flatten, nested_pack, nested_compare, BatchTensorDescriptor, DUMMY_BATCH_SIZE, nested_map
			
 
				+from hivemind.runtime.task_pool import TaskPool
			
 
				+from hivemind.utils import nested_flatten, nested_pack, nested_compare, BatchTensorDescriptor, DUMMY_BATCH_SIZE, nested_map
			
 
				 
			
 
				 
			
 
				 class ExpertBackend(nn.Module):
			
--- a/hivemind/server/__init__.py
+++ b/hivemind/server/__init__.py
@@ -1,16 +1,12 @@
 
				 import multiprocessing as mp
			
 
				-import os
			
 
				 import threading
			
 
				-from socket import socket, AF_INET, SOCK_STREAM, SO_REUSEADDR, SOL_SOCKET, timeout
			
 
				 from typing import Dict, Optional
			
 
				 
			
 
				-import torch
			
 
				-
			
 
				-from .connection_handler import handle_connection
			
 
				-from .dht_handler import DHTHandlerThread
			
 
				-from .checkpoint_saver import CheckpointSaver
			
 
				-from ..dht import DHT
			
 
				-from ..runtime import Runtime, ExpertBackend
			
 
				+from hivemind.dht import DHT
			
 
				+from hivemind.runtime import Runtime, ExpertBackend
			
 
				+from hivemind.server.checkpoint_saver import CheckpointSaver
			
 
				+from hivemind.server.connection_handler import ConnectionHandler
			
 
				+from hivemind.server.dht_handler import DHTHandlerThread
			
 
				 
			
 
				 
			
 
				 class Server(threading.Thread):
			
@@ -37,12 +33,12 @@ class Server(threading.Thread):
 
				     """
			
 
				 
			
 
				     def __init__(self, dht: Optional[DHT], expert_backends: Dict[str, ExpertBackend], addr='127.0.0.1',
			
 
				-                 port: int = 8080, conn_handler_processes: int = 1, update_period: int = 30, start=False,
			
 
				-                 checkpoint_dir=None, **kwargs):
			
 
				+                 port: int = 8080, conn_handler_processes: int = 1, update_period: int = 30, start=False, checkpoint_dir=None, **kwargs):
			
 
				         super().__init__()
			
 
				         self.dht, self.experts, self.update_period = dht, expert_backends, update_period
			
 
				         self.addr, self.port = addr, port
			
 
				-        self.conn_handlers = self._create_connection_handlers(conn_handler_processes)
			
 
				+        self.conn_handlers = [ConnectionHandler(f"{self.addr}:{port}", self.experts)
			
 
				+                              for _ in range(conn_handler_processes)]
			
 
				         if checkpoint_dir is not None:
			
 
				             self.checkpoint_saver = CheckpointSaver(expert_backends, checkpoint_dir, update_period)
			
 
				         else:
			
@@ -71,6 +67,9 @@ class Server(threading.Thread):
 
				             if not process.is_alive():
			
 
				                 process.start()
			
 
				 
			
 
				+        for process in self.conn_handlers:
			
 
				+            process.ready.wait()
			
 
				+
			
 
				         self.runtime.run()
			
 
				 
			
 
				         for process in self.conn_handlers:
			
@@ -104,18 +103,6 @@ class Server(threading.Thread):
 
				         """
			
 
				         return self.runtime.ready  # mp.Event that is true if self is ready to process batches
			
 
				 
			
 
				-    def _create_connection_handlers(self, num_handlers):
			
 
				-        sock = socket(AF_INET, SOCK_STREAM)
			
 
				-        sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
			
 
				-        sock.bind(('', self.port))
			
 
				-        sock.listen(1024)
			
 
				-        sock.settimeout(self.update_period)
			
 
				-
			
 
				-        processes = [mp.context.ForkProcess(
			
 
				-            target=socket_loop, name=f"socket_loop-{i}", args=(sock, self.experts), daemon=True)
			
 
				-            for i in range(num_handlers)]
			
 
				-        return processes
			
 
				-
			
 
				     def shutdown(self):
			
 
				         """
			
 
				         Gracefully terminate a hivemind server, process-safe.
			
@@ -130,17 +117,3 @@ class Server(threading.Thread):
 
				             self.dht.shutdown()
			
 
				 
			
 
				         self.runtime.shutdown()
			
 
				-
			
 
				-
			
 
				-def socket_loop(sock, experts):
			
 
				-    """ catch connections, send tasks to processing, respond with results """
			
 
				-    torch.set_num_threads(1)
			
 
				-    print(f'Spawned connection handler pid={os.getpid()}')
			
 
				-    while True:
			
 
				-        try:
			
 
				-            handle_connection(sock.accept(), experts)
			
 
				-        except KeyboardInterrupt as e:
			
 
				-            print(f'Socket loop has caught {type(e)}, exiting')
			
 
				-            break
			
 
				-        except (timeout, BrokenPipeError, ConnectionResetError, NotImplementedError):
			
 
				-            continue
			
--- a/hivemind/server/checkpoint_saver.py
+++ b/hivemind/server/checkpoint_saver.py
@@ -8,7 +8,7 @@ from typing import Dict
 
				 
			
 
				 import torch
			
 
				 
			
 
				-from ..runtime import ExpertBackend
			
 
				+from hivemind.runtime import ExpertBackend
			
 
				 
			
 
				 
			
 
				 class CheckpointSaver(threading.Thread):
			
--- a/hivemind/server/connection_handler.proto
+++ b/hivemind/server/connection_handler.proto
@@ -0,0 +1,35 @@
 
				+syntax = "proto3";
			
 
				+
			
 
				+
			
 
				+service ConnectionHandler {
			
 
				+  // Listens to incoming requests for expert computation
			
 
				+  rpc info(ExpertUID) returns (ExpertInfo);
			
 
				+  rpc forward(ExpertRequest) returns (ExpertResponse);
			
 
				+  rpc backward(ExpertRequest) returns (ExpertResponse);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+message ExpertUID {
			
 
				+  string uid = 1;
			
 
				+}
			
 
				+
			
 
				+message ExpertInfo {
			
 
				+  bytes serialized_info = 1;
			
 
				+}
			
 
				+
			
 
				+message ExpertRequest {
			
 
				+  string uid = 1;
			
 
				+  repeated Tensor tensors = 2;
			
 
				+}
			
 
				+
			
 
				+message ExpertResponse {
			
 
				+  repeated Tensor tensors = 2;
			
 
				+}
			
 
				+
			
 
				+message Tensor {
			
 
				+  bytes buffer = 1;
			
 
				+  repeated uint32 size = 2;
			
 
				+  bool requires_grad = 3;
			
 
				+  string dtype = 4;
			
 
				+}
			
 
				+
			
--- a/hivemind/server/connection_handler.py
+++ b/hivemind/server/connection_handler.py
@@ -1,29 +1,71 @@
 
				-from socket import socket
			
 
				-from typing import Tuple, Dict
			
 
				+import asyncio
			
 
				+import multiprocessing as mp
			
 
				+import os
			
 
				+import pickle
			
 
				+from typing import Dict
			
 
				+
			
 
				+import grpc.experimental.aio
			
 
				+import torch
			
 
				+import uvloop
			
 
				 
			
 
				 from hivemind.runtime.expert_backend import ExpertBackend
			
 
				-from hivemind.utils import PytorchSerializer, Connection
			
 
				-
			
 
				-
			
 
				-def handle_connection(connection_tuple: Tuple[socket, str], experts: Dict[str, ExpertBackend]):
			
 
				-    with Connection(*connection_tuple) as connection:
			
 
				-        try:
			
 
				-            header = connection.recv_header()
			
 
				-            payload = PytorchSerializer.loads(connection.recv_raw())
			
 
				-
			
 
				-            if header == 'fwd_':
			
 
				-                uid, inputs = payload
			
 
				-                response = experts[uid].forward_pool.submit_task(*inputs).result()
			
 
				-            elif header == 'bwd_':
			
 
				-                uid, inputs_and_grad_outputs = payload
			
 
				-                response = experts[uid].backward_pool.submit_task(*inputs_and_grad_outputs).result()
			
 
				-            elif header == 'info':
			
 
				-                uid = payload
			
 
				-                response = experts[uid].get_info()
			
 
				-            else:
			
 
				-                raise NotImplementedError(f"Unknown header: {header}")
			
 
				-
			
 
				-            connection.send_raw('rest', PytorchSerializer.dumps(response))
			
 
				-        except RuntimeError:
			
 
				-            # socket connection broken
			
 
				-            pass
			
 
				+from hivemind.utils import get_logger, serialize_torch_tensor, deserialize_torch_tensor, Endpoint, runtime_pb2, runtime_grpc
			
 
				+
			
 
				+logger = get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+class ConnectionHandler(mp.Process):
			
 
				+    """
			
 
				+    A process that accepts incoming requests to experts and submits them into the corresponding TaskPool.
			
 
				+
			
 
				+    :note: ConnectionHandler is designed so as to allow using multiple handler processes for the same port.
			
 
				+    :param listen_on: network interface, e.g. "0.0.0.0:1337" or "localhost:*" (* means pick any port) or "[::]:7654"
			
 
				+    :param experts: a dict [UID -> ExpertBackend] with all active experts
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, listen_on: Endpoint, experts: Dict[str, ExpertBackend]):
			
 
				+        super().__init__()
			
 
				+        self.listen_on, self.experts = listen_on, experts
			
 
				+        self.ready = mp.Event()
			
 
				+
			
 
				+    def run(self):
			
 
				+        torch.set_num_threads(1)
			
 
				+        uvloop.install()
			
 
				+        loop = asyncio.new_event_loop()
			
 
				+
			
 
				+        async def _run():
			
 
				+            grpc.experimental.aio.init_grpc_aio()
			
 
				+            logger.debug(f'Starting, pid {os.getpid()}')
			
 
				+            server = grpc.experimental.aio.server(options=[
			
 
				+                ('grpc.so_reuseport', 1),
			
 
				+                ('grpc.max_send_message_length', -1),
			
 
				+                ('grpc.max_receive_message_length', -1)
			
 
				+            ])
			
 
				+            runtime_grpc.add_ConnectionHandlerServicer_to_server(self, server)
			
 
				+
			
 
				+            found_port = server.add_insecure_port(self.listen_on)
			
 
				+            assert found_port != 0, f"Failed to listen to {self.listen_on}"
			
 
				+
			
 
				+            await server.start()
			
 
				+            self.ready.set()
			
 
				+            await server.wait_for_termination()
			
 
				+            logger.debug(f"ConnectionHandler terminated: (pid={os.getpid()})")
			
 
				+
			
 
				+        loop.run_until_complete(_run())
			
 
				+
			
 
				+    async def info(self, request: runtime_pb2.ExpertUID, context: grpc.ServicerContext):
			
 
				+        return runtime_pb2.ExpertInfo(serialized_info=pickle.dumps(self.experts[request.uid].get_info()))
			
 
				+
			
 
				+    async def forward(self, request: runtime_pb2.ExpertRequest, context: grpc.ServicerContext):
			
 
				+        inputs = [deserialize_torch_tensor(tensor) for tensor in request.tensors]
			
 
				+        future = self.experts[request.uid].forward_pool.submit_task(*inputs)
			
 
				+        response = await future.async_result()
			
 
				+        serialized_response = [serialize_torch_tensor(tensor) for tensor in response]
			
 
				+        return runtime_pb2.ExpertResponse(tensors=serialized_response)
			
 
				+
			
 
				+    async def backward(self, request: runtime_pb2.ExpertRequest, context: grpc.ServicerContext):
			
 
				+        inputs_and_grad_outputs = [deserialize_torch_tensor(tensor) for tensor in request.tensors]
			
 
				+        future = self.experts[request.uid].backward_pool.submit_task(*inputs_and_grad_outputs)
			
 
				+        response = await future.async_result()
			
 
				+        serialized_response = [serialize_torch_tensor(tensor) for tensor in response]
			
 
				+        return runtime_pb2.ExpertResponse(tensors=serialized_response)
			
--- a/hivemind/server/dht_handler.py
+++ b/hivemind/server/dht_handler.py
@@ -1,12 +1,11 @@
 
				 import threading
			
 
				 import time
			
 
				 
			
 
				-from ..dht import DHT
			
 
				+from hivemind.dht import DHT
			
 
				 
			
 
				 
			
 
				 class DHTHandlerThread(threading.Thread):
			
 
				-    def __init__(self, experts, dht: DHT,
			
 
				-                 update_period: int = 5, addr: str = '127.0.0.1', port: int = 8080):
			
 
				+    def __init__(self, experts, dht: DHT, update_period: int = 5, addr: str = '127.0.0.1', port: int = 8080):
			
 
				         super(DHTHandlerThread, self).__init__()
			
 
				         self.port = port
			
 
				         self.addr = addr
			
--- a/hivemind/utils/__init__.py
+++ b/hivemind/utils/__init__.py
@@ -1,10 +1,10 @@
 
				-from .connection import *
			
 
				-from .data import *
			
 
				-from .nested import *
			
 
				-from .tensor_descr import *
			
 
				-from .serializer import *
			
 
				-from .shared_future import *
			
 
				-from .threading import *
			
 
				-from .autograd import *
			
 
				-from .grpc import *
			
 
				-from .logging import get_logger
			
 
				+from hivemind.utils.connection import *
			
 
				+from hivemind.utils.data import *
			
 
				+from hivemind.utils.nested import *
			
 
				+from hivemind.utils.tensor_descr import *
			
 
				+from hivemind.utils.serializer import *
			
 
				+from hivemind.utils.shared_future import *
			
 
				+from hivemind.utils.threading import *
			
 
				+from hivemind.utils.autograd import *
			
 
				+from hivemind.utils.grpc import *
			
 
				+from hivemind.utils.logging import get_logger
			
--- a/hivemind/utils/autograd.py
+++ b/hivemind/utils/autograd.py
@@ -10,7 +10,7 @@ import numpy as np
 
				 import torch
			
 
				 import torch.autograd.function
			
 
				 
			
 
				-from .threading import run_in_background
			
 
				+from hivemind.utils.threading import run_in_background
			
 
				 
			
 
				 
			
 
				 class EmulatedAutogradContext(torch.autograd.function._ContextMethodMixin):
			
@@ -19,6 +19,7 @@ class EmulatedAutogradContext(torch.autograd.function._ContextMethodMixin):
 
				     such as running several parallel backwards or transferring backward to a separate device.
			
 
				     This class is not tested outside its use cases in RemoteMixtureOfExperts and we do not recommend using it elsewhere.
			
 
				     """
			
 
				+
			
 
				     @property
			
 
				     def saved_tensors(self):
			
 
				         return tuple(self.to_save)
			
@@ -71,6 +72,7 @@ class _ParallelApplyFunction(torch.autograd.Function):
 
				     Please do not call this function directly. Use apply_with_parallel_backward instead.
			
 
				     Unlike default pytorch behavior, the backward pass for each function will also happen in parallel.
			
 
				     """
			
 
				+
			
 
				     @staticmethod
			
 
				     def forward(ctx, func: torch.autograd.Function, num_calls: int, num_args_per_call: int,
			
 
				                 output_strides_ph: Future, *args_flat) -> Tuple[torch.Tensor, ...]:
			
--- a/hivemind/utils/connection.py
+++ b/hivemind/utils/connection.py
@@ -1,63 +1,11 @@
 
				 import socket
			
 
				-from contextlib import AbstractContextManager, closing
			
 
				-from typing import Tuple
			
 
				+from contextlib import closing
			
 
				 
			
 
				 Hostname, Port = str, int  # flavour types
			
 
				 Endpoint = str  # e.g. 1.2.3.4:1337 or [2a21:6с8:b192:2105]:8888, https://networkengineering.stackexchange.com/a/9435
			
 
				 LOCALHOST = '127.0.0.1'
			
 
				 
			
 
				 
			
 
				-class Connection(AbstractContextManager):
			
 
				-    header_size = 4  # number of characters in all headers
			
 
				-    payload_length_size = 8  # number of bytes used to encode payload length
			
 
				-
			
 
				-    __slots__ = ('conn', 'addr')
			
 
				-
			
 
				-    def __init__(self, conn: socket, addr: Tuple[Hostname, Port]):
			
 
				-        self.conn, self.addr = conn, addr
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def create(host: str, port: int):
			
 
				-        sock = socket.socket()
			
 
				-        addr = (host, port)
			
 
				-        sock.connect(addr)
			
 
				-        return Connection(sock, addr)
			
 
				-
			
 
				-    def send_raw(self, header: str, content: bytes):
			
 
				-        self.conn.send(header.encode())
			
 
				-        self.conn.send(len(content).to_bytes(self.payload_length_size, byteorder='big'))
			
 
				-
			
 
				-        total_sent = 0
			
 
				-        while total_sent < len(content):
			
 
				-            sent = self.conn.send(content[total_sent:])
			
 
				-            if sent == 0:
			
 
				-                raise RuntimeError("socket connection broken")
			
 
				-            total_sent = total_sent + sent
			
 
				-
			
 
				-    def recv_header(self) -> str:
			
 
				-        return self.conn.recv(self.header_size).decode()
			
 
				-
			
 
				-    def recv_raw(self, max_package: int = 2048) -> bytes:
			
 
				-        length = int.from_bytes(self.conn.recv(self.payload_length_size), byteorder='big')
			
 
				-        chunks = []
			
 
				-        bytes_recd = 0
			
 
				-        while bytes_recd < length:
			
 
				-            chunk = self.conn.recv(min(length - bytes_recd, max_package))
			
 
				-            if chunk == b'':
			
 
				-                raise RuntimeError("socket connection broken")
			
 
				-            chunks.append(chunk)
			
 
				-            bytes_recd = bytes_recd + len(chunk)
			
 
				-        ret = b''.join(chunks)
			
 
				-        assert len(ret) == length
			
 
				-        return ret
			
 
				-
			
 
				-    def recv_message(self) -> Tuple[str, bytes]:
			
 
				-        return self.recv_header(), self.recv_raw()
			
 
				-
			
 
				-    def __exit__(self, *exc_info):
			
 
				-        self.conn.close()
			
 
				-
			
 
				-
			
 
				 def find_open_port(params=(socket.AF_INET, socket.SOCK_STREAM), opt=(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)):
			
 
				     """ Finds a tcp port that can be occupied with a socket with *params and use *opt options """
			
 
				     try:
			
--- a/hivemind/utils/data.py
+++ b/hivemind/utils/data.py
@@ -1,13 +1,3 @@
 
				-import numpy as np
			
 
				 import torch
			
 
				 
			
 
				-
			
 
				-def check_numpy(x):
			
 
				-    """ Makes sure x is a numpy array """
			
 
				-    if isinstance(x, torch.Tensor):
			
 
				-        return x.detach().cpu().numpy()
			
 
				-    else:
			
 
				-        return np.asarray(x)
			
 
				-
			
 
				-
			
 
				 DUMMY = torch.empty(0, requires_grad=True)
			
--- a/hivemind/utils/grpc.py
+++ b/hivemind/utils/grpc.py
@@ -5,9 +5,12 @@ import functools
 
				 import os
			
 
				 import sys
			
 
				 import tempfile
			
 
				-from typing import Tuple
			
 
				 from argparse import Namespace
			
 
				+from typing import Tuple
			
 
				+
			
 
				 import grpc_tools.protoc
			
 
				+import numpy as np
			
 
				+import torch
			
 
				 
			
 
				 
			
 
				 @functools.lru_cache(maxsize=None)
			
@@ -42,3 +45,23 @@ def compile_grpc(proto: str, *args: str) -> Tuple[Namespace, Namespace]:
 
				         finally:
			
 
				             if sys.path.pop() != build_dir:
			
 
				                 raise ImportError("Something changed sys.path while compile_grpc was in progress.")
			
 
				+
			
 
				+
			
 
				+with open(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'server', 'connection_handler.proto')) as f_proto:
			
 
				+    runtime_pb2, runtime_grpc = compile_grpc(f_proto.read())
			
 
				+
			
 
				+
			
 
				+def serialize_torch_tensor(tensor: torch.Tensor) -> runtime_pb2.Tensor:
			
 
				+    array = tensor.numpy()
			
 
				+    proto = runtime_pb2.Tensor(
			
 
				+        buffer=array.tobytes(),
			
 
				+        size=array.shape,
			
 
				+        dtype=array.dtype.name,
			
 
				+        requires_grad=tensor.requires_grad)
			
 
				+    return proto
			
 
				+
			
 
				+
			
 
				+def deserialize_torch_tensor(tensor: runtime_pb2.Tensor) -> torch.Tensor:
			
 
				+    # TODO avoid copying the array (need to silence pytorch warning, because array is not writable)
			
 
				+    array = np.frombuffer(tensor.buffer, dtype=np.dtype(tensor.dtype)).copy()
			
 
				+    return torch.as_tensor(array).view(tuple(tensor.size)).requires_grad_(tensor.requires_grad)
			
--- a/hivemind/utils/serializer.py
+++ b/hivemind/utils/serializer.py
@@ -57,7 +57,7 @@ class MSGPackSerializer(SerializerBase):
 
				 
			
 
				     @staticmethod
			
 
				     def dumps(obj: object) -> bytes:
			
 
				-        return umsgpack.dumps(obj, use_bin_type=False) # TODO strict https://github.com/msgpack/msgpack-python/pull/158
			
 
				+        return umsgpack.dumps(obj, use_bin_type=False)  # TODO strict https://github.com/msgpack/msgpack-python/pull/158
			
 
				 
			
 
				     @staticmethod
			
 
				     def loads(buf: bytes) -> object:
			
--- a/hivemind/utils/shared_future.py
+++ b/hivemind/utils/shared_future.py
@@ -2,6 +2,7 @@ import multiprocessing as mp
 
				 import multiprocessing.connection
			
 
				 from concurrent.futures import Future, CancelledError
			
 
				 from warnings import warn
			
 
				+import asyncio
			
 
				 
			
 
				 
			
 
				 class SharedFuture(Future):
			
@@ -22,14 +23,21 @@ class SharedFuture(Future):
 
				         connection1, connection2 = mp.Pipe()
			
 
				         return cls(connection1), cls(connection2)
			
 
				 
			
 
				+    def poll_and_recv(self, timeout):
			
 
				+        available = self.connection.poll(timeout)
			
 
				+        if not available:
			
 
				+            raise TimeoutError
			
 
				+        try:
			
 
				+            status, payload = self.connection.recv()
			
 
				+            self.connection.close()
			
 
				+        except BrokenPipeError as e:
			
 
				+            status, payload = self.STATE_EXCEPTION, e
			
 
				+        return status, payload
			
 
				+
			
 
				     def _recv(self, timeout):
			
 
				+
			
 
				         if self.state in (self.STATE_PENDING, self.STATE_RUNNING):
			
 
				-            if not self.connection.poll(timeout):
			
 
				-                raise TimeoutError()
			
 
				-            try:
			
 
				-                status, payload = self.connection.recv()
			
 
				-            except BrokenPipeError as e:
			
 
				-                status, payload = self.STATE_EXCEPTION, e
			
 
				+            status, payload = self.poll_and_recv(timeout)
			
 
				 
			
 
				             assert status in self.STATES
			
 
				             self.state = status
			
@@ -47,6 +55,7 @@ class SharedFuture(Future):
 
				         try:
			
 
				             self.state, self._result = self.STATE_FINISHED, result
			
 
				             self.connection.send((self.STATE_FINISHED, result))
			
 
				+            self.connection.close()
			
 
				             return True
			
 
				         except BrokenPipeError:
			
 
				             return False
			
@@ -55,6 +64,7 @@ class SharedFuture(Future):
 
				         try:
			
 
				             self.state, self._exception = self.STATE_EXCEPTION, exception
			
 
				             self.connection.send((self.STATE_EXCEPTION, exception))
			
 
				+            self.connection.close()
			
 
				             return True
			
 
				         except BrokenPipeError:
			
 
				             return False
			
@@ -103,3 +113,35 @@ class SharedFuture(Future):
 
				             return "<MPFuture at 0x{:x} state=finished raised {}>".format(id(self), type(self._exception))
			
 
				         else:
			
 
				             return "<MPFuture at 0x{:x} state={}>".format(id(self), self.state)
			
 
				+
			
 
				+    async def _async_recv(self, timeout):
			
 
				+        loop = asyncio.get_running_loop()
			
 
				+
			
 
				+        if self.state in (self.STATE_PENDING, self.STATE_RUNNING):
			
 
				+            status, payload = await loop.run_in_executor(None, self.poll_and_recv, timeout)
			
 
				+
			
 
				+            assert status in self.STATES
			
 
				+            self.state = status
			
 
				+
			
 
				+            if status == self.STATE_FINISHED:
			
 
				+                self._result = payload
			
 
				+            elif status == self.STATE_EXCEPTION:
			
 
				+                self._exception = payload
			
 
				+            elif status in (self.STATE_RUNNING, self.STATE_CANCELLED):
			
 
				+                pass  # only update self.state
			
 
				+            else:
			
 
				+                raise ValueError("Result status should not be self.STATE_PENDING")
			
 
				+
			
 
				+    async def async_result(self, timeout=None):
			
 
				+        await self._async_recv(timeout)
			
 
				+        if self.state == self.STATE_FINISHED:
			
 
				+            return self._result
			
 
				+        elif self.state == self.STATE_EXCEPTION:
			
 
				+            raise self._exception
			
 
				+        else:
			
 
				+            assert self.state == self.STATE_CANCELLED
			
 
				+            raise CancelledError()
			
 
				+
			
 
				+    async def async_exception(self, timeout=None):
			
 
				+        await self._async_recv(timeout)
			
 
				+        return self._exception
			
--- a/hivemind/utils/threading.py
+++ b/hivemind/utils/threading.py
@@ -14,9 +14,11 @@ def run_in_background(func: callable, *args, **kwargs) -> Future:
 
				 
			
 
				 def run_forever(func: callable, *args, **kwargs):
			
 
				     """ A function that runs a :func: in background forever. Returns a future that catches exceptions """
			
 
				+
			
 
				     def repeat():
			
 
				         while True:
			
 
				             func(*args, **kwargs)
			
 
				+
			
 
				     return run_in_background(repeat)
			
 
				 
			
 
				 
			
@@ -65,4 +67,3 @@ def run_and_await_k(jobs: List[callable], k: int,
 
				             future.cancel()
			
 
				             outputs[index] = future.result() if not future.exception() else future.exception()
			
 
				     return outputs
			
 
				-
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,6 @@ torch>=1.3.0
 
				 joblib>=0.13
			
 
				 numpy>=1.17
			
 
				 prefetch_generator>=1.0.1
			
 
				-pytest
			
 
				 umsgpack
			
 
				 uvloop>=0.14.0
			
 
				 grpcio
			
--- a/tests/benchmark_throughput.py
+++ b/tests/benchmark_throughput.py
@@ -6,9 +6,9 @@ import time
 
				 
			
 
				 import torch
			
 
				 from test_utils import layers, print_device_info, increase_file_limit
			
 
				-from hivemind import find_open_port
			
 
				 
			
 
				 import hivemind
			
 
				+from hivemind import find_open_port
			
 
				 
			
 
				 
			
 
				 def client_process(can_start, benchmarking_failed, port, num_experts, batch_size, hid_dim, num_batches, backprop=True):
			
@@ -142,7 +142,8 @@ if __name__ == "__main__":
 
				         benchmark_throughput(backprop=False, num_clients=512, batch_size=512,
			
 
				                              max_batch_size=8192, num_batches_per_client=args.num_batches_per_client)
			
 
				     elif args.preset == 'minimalistic':
			
 
				-        benchmark_throughput(num_experts=1, num_clients=1, num_handlers=1)
			
 
				+        benchmark_throughput(num_experts=1, num_clients=1, num_handlers=1,
			
 
				+                             num_batches_per_client=args.num_batches_per_client)
			
 
				     elif args.preset == 'nop':
			
 
				         benchmark_throughput(expert_cls='nop', backprop=False, num_batches_per_client=args.num_batches_per_client)
			
 
				     else:
			
--- a/tests/test_utils/run_server.py
+++ b/tests/test_utils/run_server.py
@@ -1,11 +1,12 @@
 
				-import resource
			
 
				-from contextlib import contextmanager
			
 
				-import multiprocessing as mp
			
 
				 import argparse
			
 
				+import multiprocessing as mp
			
 
				+from contextlib import contextmanager
			
 
				 
			
 
				+import resource
			
 
				 import torch
			
 
				+
			
 
				 import hivemind
			
 
				-from .layers import name_to_block, name_to_input
			
 
				+from tests.test_utils.layers import name_to_block, name_to_input
			
 
				 
			
 
				 
			
 
				 def make_dummy_server(interface='0.0.0.0', port=None, num_experts=1, expert_cls='ffn', hidden_dim=1024,
			
@@ -147,12 +148,12 @@ if __name__ == '__main__':
 
				     parser.add_argument('--no_optimizer', action='store_true', help='if specified, all optimizers use learning rate=0')
			
 
				     parser.add_argument('--no_dht', action='store_true', help='if specified, the server will not be attached to a dht')
			
 
				     parser.add_argument('--initial_peers', type=str, default="[]", required=False, help='a list of peers that will'
			
 
				-                        ' introduce this node to the dht, e.g. [("1.2.3.4", 1337), ("127.0.0.1", 4321)]')
			
 
				+                                                                                        ' introduce this node to the dht, e.g. [("1.2.3.4", 1337), ("127.0.0.1", 4321)]')
			
 
				     parser.add_argument('--dht_port', type=int, default=None, required=False, help='DHT node will listen on this port')
			
 
				     parser.add_argument('--root_port', type=int, default=None, required=False, help='If this server does not have peers'
			
 
				-                        ', it will create a virtual dht node on this port. You can then use this node as initial peer.')
			
 
				+                                                                                    ', it will create a virtual dht node on this port. You can then use this node as initial peer.')
			
 
				     parser.add_argument('--increase_file_limit', action='store_true', help='On *nix, this will increase the max number'
			
 
				-                        ' of processes a server can spawn before hitting "Too many open files"; Use at your own risk.')
			
 
				+                                                                           ' of processes a server can spawn before hitting "Too many open files"; Use at your own risk.')
			
 
				 
			
 
				     args = vars(parser.parse_args())