4 lat temu · f0c5627139
--- a/benchmarks/benchmark_dht.py
+++ b/benchmarks/benchmark_dht.py
@@ -25,7 +25,7 @@ def benchmark_dht(num_peers: int, initial_peers: int, num_experts: int, expert_b
 
				     for _ in trange(num_peers):
			
 
				         neighbors = [f'0.0.0.0:{node.port}' for node in random.sample(peers, min(initial_peers, len(peers)))]
			
 
				         peer = hivemind.DHT(initial_peers=neighbors, start=True, wait_timeout=wait_timeout,
			
 
				-                            expiration=expiration, listen_on=f'0.0.0.0:*')
			
 
				+                            listen_on=f'0.0.0.0:*')
			
 
				         peers.append(peer)
			
 
				 
			
 
				     store_peer, get_peer = peers[-2:]
			
@@ -43,7 +43,8 @@ def benchmark_dht(num_peers: int, initial_peers: int, num_experts: int, expert_b
 
				     for start in trange(0, num_experts, expert_batch_size):
			
 
				         store_start = time.perf_counter()
			
 
				         endpoints.append(random_endpoint())
			
 
				-        store_ok = hivemind.declare_experts(store_peer, expert_uids[start: start + expert_batch_size], endpoints[-1])
			
 
				+        store_ok = hivemind.declare_experts(store_peer, expert_uids[start: start + expert_batch_size], endpoints[-1],
			
 
				+                                            expiration=expiration)
			
 
				         successes = store_ok.values()
			
 
				         total_store_time += time.perf_counter() - store_start
			
 
				 
			
--- a/docs/user/quickstart.md
+++ b/docs/user/quickstart.md
@@ -154,7 +154,7 @@ dht = hivemind.DHT(initial_peers=["localhost:1338"], listen=False, start=True)
 
				 # note: listen=False means that your peer will operate in "client only" mode: 
			
 
				 # this means that it can request other peers, but will not accept requests in return 
			
 
				 
			
 
				-expert1, expert4 = dht.get_experts(["expert.1", "expert.4"])
			
 
				+expert1, expert4 = hivemind.get_experts(dht, ["expert.1", "expert.4"])
			
 
				 assert expert1 is not None and expert4 is not None, "server hasn't declared experts (yet?)"
			
 
				 ```
			
 
				 
			
--- a/hivemind/client/averaging/__init__.py
+++ b/hivemind/client/averaging/__init__.py
@@ -183,7 +183,7 @@ class DecentralizedAverager(mp.Process, averaging_pb2_grpc.DecentralizedAveragin
 
				                     self._port.value = found_port
			
 
				                     await server.start()
			
 
				                 else:
			
 
				-                    logger.info(f"The averager running in an experimental client mode, please report any bugs.")
			
 
				+                    logger.debug(f"The averager is running in client mode.")
			
 
				 
			
 
				                 self._matchmaking = Matchmaking(self.endpoint, self.schema_hash, self.dht, **self.matchmaking_kwargs,
			
 
				                                                 client_mode=not self.listen)
			
@@ -422,47 +422,50 @@ class DecentralizedAverager(mp.Process, averaging_pb2_grpc.DecentralizedAveragin
 
				         return future.result() if wait else future
			
 
				 
			
 
				     async def _load_state_from_peers(self, future: MPFuture):
			
 
				-        key_manager = self._matchmaking.group_key_manager
			
 
				-        peer_priority, _ = self.dht.get(f"{key_manager.prefix}.all_averagers", latest=True) or ({}, None)
			
 
				-        peer_priority = {peer: float(info.value) for peer, info in peer_priority.items()
			
 
				-                         if isinstance(info, ValueWithExpiration) and isinstance(info.value, (float, int))}
			
 
				-
			
 
				-        if not isinstance(peer_priority, dict) or len(peer_priority) == 0:
			
 
				-            logger.info(f"Averager could not load state from peers: peer dict is absent or corrupted {peer_priority}.")
			
 
				-            future.set_result(None)
			
 
				-            return
			
 
				-
			
 
				-        metadata = None
			
 
				-        for peer in sorted(peer_priority.keys(), key=peer_priority.get, reverse=True):
			
 
				-            if peer != self.endpoint:
			
 
				-                logger.info(f"Downloading parameters from peer {peer}")
			
 
				-                stream = None
			
 
				-                try:
			
 
				-                    leader_stub = ChannelCache.get_stub(peer, averaging_pb2_grpc.DecentralizedAveragingStub, aio=True)
			
 
				-                    stream = leader_stub.rpc_download_state(averaging_pb2.DownloadRequest())
			
 
				-                    current_tensor_parts, tensors = [], []
			
 
				-                    async for message in stream:
			
 
				-                        if message.metadata:
			
 
				-                            metadata = self.serializer.loads(message.metadata)
			
 
				-                        if message.tensor_part.dtype and current_tensor_parts:
			
 
				-                            # tensor_part.dtype indicates the start of the new tensor, so we should wrap up this one
			
 
				+        try:
			
 
				+            key_manager = self._matchmaking.group_key_manager
			
 
				+            peer_priority, _ = self.dht.get(f"{key_manager.prefix}.all_averagers", latest=True) or ({}, None)
			
 
				+            peer_priority = {peer: float(info.value) for peer, info in peer_priority.items()
			
 
				+                             if isinstance(info, ValueWithExpiration) and isinstance(info.value, (float, int))}
			
 
				+
			
 
				+            if not isinstance(peer_priority, dict) or len(peer_priority) == 0:
			
 
				+                logger.info(f"Averager could not load state from peers: peer dict empty or corrupted {peer_priority}.")
			
 
				+                future.set_result(None)
			
 
				+                return
			
 
				+
			
 
				+            metadata = None
			
 
				+            for peer in sorted(peer_priority.keys(), key=peer_priority.get, reverse=True):
			
 
				+                if peer != self.endpoint:
			
 
				+                    logger.info(f"Downloading parameters from peer {peer}")
			
 
				+                    stream = None
			
 
				+                    try:
			
 
				+                        stub = ChannelCache.get_stub(peer, averaging_pb2_grpc.DecentralizedAveragingStub, aio=True)
			
 
				+                        stream = stub.rpc_download_state(averaging_pb2.DownloadRequest())
			
 
				+                        current_tensor_parts, tensors = [], []
			
 
				+                        async for message in stream:
			
 
				+                            if message.metadata:
			
 
				+                                metadata = self.serializer.loads(message.metadata)
			
 
				+                            if message.tensor_part.dtype and current_tensor_parts:
			
 
				+                                # tensor_part.dtype indicates the start of the new tensor, so we should wrap up this one
			
 
				+                                tensors.append(deserialize_torch_tensor(combine_from_streaming(current_tensor_parts)))
			
 
				+                                current_tensor_parts = []
			
 
				+                            current_tensor_parts.append(message.tensor_part)
			
 
				+                        if current_tensor_parts:
			
 
				                             tensors.append(deserialize_torch_tensor(combine_from_streaming(current_tensor_parts)))
			
 
				-                            current_tensor_parts = []
			
 
				-                        current_tensor_parts.append(message.tensor_part)
			
 
				-                    if current_tensor_parts:
			
 
				-                        tensors.append(deserialize_torch_tensor(combine_from_streaming(current_tensor_parts)))
			
 
				-                    future.set_result((metadata, tensors))
			
 
				-                    self.last_updated = get_dht_time()
			
 
				-                    return
			
 
				-                except grpc.aio.AioRpcError as e:
			
 
				-                    logger.info(f"Failed to download state from {peer} - {e}")
			
 
				-                finally:
			
 
				-                    if stream is not None:
			
 
				-                        await stream.code()
			
 
				+                        logger.info(f"Finished downloading state from {peer}")
			
 
				+                        future.set_result((metadata, tensors))
			
 
				+                        self.last_updated = get_dht_time()
			
 
				+                        return
			
 
				+                    except BaseException as e:
			
 
				+                        logger.exception(f"Failed to download state from {peer} - {repr(e)}")
			
 
				+                    finally:
			
 
				+                        if stream is not None:
			
 
				+                            await stream.code()
			
 
				 
			
 
				-        else:
			
 
				-            logger.warning("Averager could not load state from peers: found no active peers.")
			
 
				-            future.set_result(None)
			
 
				+        finally:
			
 
				+            if not future.done():
			
 
				+                logger.warning("Averager could not load state from peers: all requests have failed.")
			
 
				+                future.set_result(None)
			
 
				 
			
 
				     def get_group_bits(self, wait: bool = True):
			
 
				         """
			
--- a/hivemind/client/beam_search.py
+++ b/hivemind/client/beam_search.py
@@ -4,7 +4,7 @@ from collections import deque
 
				 from functools import partial
			
 
				 from typing import Sequence, Optional, List, Tuple, Dict, Deque, Union, Set, Iterator
			
 
				 
			
 
				-from hivemind.dht import DHT, DHTNode
			
 
				+from hivemind.dht import DHT, DHTNode, DHTExpiration
			
 
				 from hivemind.client.expert import RemoteExpert
			
 
				 from hivemind.server.expert_uid import (ExpertUID, ExpertPrefix, FLAT_EXPERT, UidEndpoint, Score, Coordinate,
			
 
				                                         PREFIX_PATTERN, UID_DELIMITER, is_valid_prefix)
			
@@ -22,7 +22,7 @@ class MoEBeamSearcher:
 
				         * optional prefix that determines expert role, experiment name, etc.
			
 
				         * one or more integers that determine that expert's position in an N-dimensional grid
			
 
				 
			
 
				-    A hivemind.Server can ``DHT.declare_experts(expert_uids: List[str])`` to make its experts visible to everyone.
			
 
				+    A hivemind.Server can ``declare_experts(dht, expert_uids: List[str])`` to make its experts visible to everyone.
			
 
				     When declaring experts, DHT will store each expert's uid and all its prefixes until :expiration: (specified at init)
			
 
				     For instance, declaring "ffn_expert.98.76.54.32.10" will store the following keys in a DHT:
			
 
				     ``"ffn_expert.98", "ffn_expert.98.76", "ffn_expert.98.76.54", ..., "ffn_expert.98.76.54.32.10"``
			
@@ -63,8 +63,8 @@ class MoEBeamSearcher:
 
				          Though, this is a pathological case (e.g. only 90 experts in an oversized 100x100 grid) that should be avoided.
			
 
				     """
			
 
				 
			
 
				-    def __init__(self, dht: DHT, uid_prefix: ExpertPrefix, grid_size: Tuple[int, ...],
			
 
				-                 num_workers: Optional[int] = None, negative_caching: bool = True, **kwargs):
			
 
				+    def __init__(self, dht: DHT, uid_prefix: ExpertPrefix, grid_size: Sequence[int], num_workers: Optional[int] = None,
			
 
				+                 negative_caching: bool = True, cache_expiration: DHTExpiration = 300, **kwargs):
			
 
				         if not uid_prefix.endswith(UID_DELIMITER):
			
 
				             uid_prefix += UID_DELIMITER
			
 
				             logger.info(f"Prefix must end with '{UID_DELIMITER}'. Changing to {uid_prefix}{UID_DELIMITER}")
			
@@ -72,7 +72,8 @@ class MoEBeamSearcher:
 
				         self.dht = dht
			
 
				         self.uid_prefix, self.grid_size = uid_prefix, grid_size
			
 
				         self.total_grid_size = sum(grid_size)
			
 
				-        self.negative_caching, self.num_workers, self.dht_kwargs = negative_caching, num_workers, kwargs
			
 
				+        self.negative_caching, self.cache_expiration = negative_caching, cache_expiration
			
 
				+        self.num_workers, self.dht_kwargs = num_workers, kwargs
			
 
				 
			
 
				     def get_initial_beam(self, scores: Sequence[float], beam_size: int, return_future: bool = False
			
 
				                          ) -> List[Tuple[Score, ExpertPrefix, Dict[Coordinate, UidEndpoint]]]:
			
@@ -84,12 +85,14 @@ class MoEBeamSearcher:
 
				         """
			
 
				         return self.dht.run_coroutine(partial(self._get_initial_beam, prefix=self.uid_prefix, beam_size=beam_size,
			
 
				                                               scores=tuple(scores), negative_caching=self.negative_caching,
			
 
				-                                              num_workers=self.num_workers), return_future)
			
 
				+                                              cache_expiration=self.cache_expiration, num_workers=self.num_workers),
			
 
				+                                      return_future)
			
 
				 
			
 
				     @staticmethod
			
 
				-    async def _get_initial_beam(dht: DHT, node: DHTNode, prefix: ExpertPrefix, beam_size: int,
			
 
				-                                scores: Tuple[float, ...], negative_caching: bool, num_workers: Optional[int] = None
			
 
				-                                ) -> List[Tuple[Score, ExpertPrefix, Dict[Coordinate, UidEndpoint]]]:
			
 
				+    async def _get_initial_beam(
			
 
				+            dht: DHT, node: DHTNode, prefix: ExpertPrefix, beam_size: int, scores: Tuple[float, ...],
			
 
				+            negative_caching: bool, cache_expiration: DHTExpiration, num_workers: Optional[int] = None,
			
 
				+    ) -> List[Tuple[Score, ExpertPrefix, Dict[Coordinate, UidEndpoint]]]:
			
 
				         num_workers = num_workers or dht.max_workers or beam_size
			
 
				         beam: List[Tuple[Score, ExpertPrefix, Dict[Coordinate, UidEndpoint]]] = []
			
 
				         unattempted_indices: List[Coordinate] = sorted(range(len(scores)), key=scores.__getitem__)  # from worst to best
			
@@ -115,7 +118,7 @@ class MoEBeamSearcher:
 
				                 elif maybe_prefix_data is None and negative_caching:
			
 
				                     logger.debug(f"DHT negative caching: storing a 'no prefix' entry for {pending_best_prefix}")
			
 
				                     asyncio.create_task(node.store(pending_best_prefix, subkey=-1, value=None,
			
 
				-                                                   expiration_time=get_dht_time() + dht.default_expiration))
			
 
				+                                                   expiration_time=get_dht_time() + cache_expiration))
			
 
				 
			
 
				             except asyncio.CancelledError:
			
 
				                 for _, pending_task in pending_tasks:
			
@@ -137,12 +140,14 @@ class MoEBeamSearcher:
 
				             assert is_valid_prefix(prefix), f"prefix '{prefix}' is invalid, it must follow {PREFIX_PATTERN.pattern}"
			
 
				         return self.dht.run_coroutine(partial(
			
 
				             self._get_active_successors, prefixes=list(prefixes), grid_size=grid_size,
			
 
				-            negative_caching=self.negative_caching, num_workers=self.num_workers), return_future=return_future)
			
 
				+            negative_caching=self.negative_caching, cache_expiration=self.cache_expiration,
			
 
				+            num_workers=self.num_workers), return_future=return_future)
			
 
				 
			
 
				     @staticmethod
			
 
				-    async def _get_active_successors(dht: DHT, node: DHTNode, prefixes: List[ExpertPrefix], grid_size: Optional[int],
			
 
				-                                     negative_caching: bool, num_workers: Optional[int] = None
			
 
				-                                     ) -> Dict[ExpertPrefix, Dict[Coordinate, UidEndpoint]]:
			
 
				+    async def _get_active_successors(
			
 
				+            dht: DHT, node: DHTNode, prefixes: List[ExpertPrefix], grid_size: Optional[int],
			
 
				+            negative_caching: bool, cache_expiration: DHTExpiration, num_workers: Optional[int] = None
			
 
				+    ) -> Dict[ExpertPrefix, Dict[Coordinate, UidEndpoint]]:
			
 
				         grid_size = grid_size or float('inf')
			
 
				         num_workers = num_workers or min(len(prefixes), dht.max_workers or len(prefixes))
			
 
				         dht_responses = await node.get_many(keys=prefixes, num_workers=num_workers)
			
@@ -157,7 +162,7 @@ class MoEBeamSearcher:
 
				                 if found is None and negative_caching:
			
 
				                     logger.debug(f"DHT negative caching: storing a 'no prefix' entry for {prefix}")
			
 
				                     asyncio.create_task(node.store(prefix, subkey=-1, value=None,
			
 
				-                                                   expiration_time=get_dht_time() + dht.default_expiration))
			
 
				+                                                   expiration_time=get_dht_time() + cache_expiration))
			
 
				         return successors
			
 
				 
			
 
				     def find_best_experts(self, grid_scores: Sequence[Sequence[float]], beam_size: int, return_future: bool = False
			
@@ -176,14 +181,16 @@ class MoEBeamSearcher:
 
				         :returns: a list that contains *up to* k_best RemoteExpert instances
			
 
				         """
			
 
				         assert len(grid_scores) == len(self.grid_size) and beam_size > 0
			
 
				-        return self.dht.run_coroutine(partial(self._find_best_experts, prefix=self.uid_prefix, beam_size=beam_size,
			
 
				-                                              grid_scores=list(grid_scores), negative_caching=self.negative_caching,
			
 
				-                                              num_workers=self.num_workers), return_future)
			
 
				+        return self.dht.run_coroutine(partial(
			
 
				+            self._find_best_experts, prefix=self.uid_prefix, beam_size=beam_size, grid_scores=list(grid_scores),
			
 
				+            negative_caching=self.negative_caching, cache_expiration=self.cache_expiration,
			
 
				+            num_workers=self.num_workers), return_future)
			
 
				 
			
 
				     @classmethod
			
 
				     async def _find_best_experts(
			
 
				             cls, dht: DHT, node: DHTNode, prefix: str, grid_scores: List[Tuple[float]], beam_size: int,
			
 
				-            negative_caching: bool, num_workers: Optional[int] = None) -> List[RemoteExpert]:
			
 
				+            negative_caching: bool, cache_expiration: DHTExpiration, num_workers: Optional[int] = None
			
 
				+    ) -> List[RemoteExpert]:
			
 
				         num_workers = num_workers or min(beam_size, dht.max_workers or beam_size)
			
 
				 
			
 
				         # form initial beam from top-k active L1 prefixes, each row is (score, uid prefix, possible suffixes)
			
@@ -209,8 +216,9 @@ class MoEBeamSearcher:
 
				             _, best_uid_prefixes = zip(*best_active_pairs)
			
 
				 
			
 
				             # search DHT for next step suffixes
			
 
				-            successors = await cls._get_active_successors(dht, node, best_uid_prefixes, grid_size=None,
			
 
				-                                                          negative_caching=negative_caching, num_workers=num_workers)
			
 
				+            successors = await cls._get_active_successors(
			
 
				+                dht, node, best_uid_prefixes, grid_size=None, negative_caching=negative_caching,
			
 
				+                cache_expiration=cache_expiration, num_workers=num_workers)
			
 
				             beam = [(score, prefix, successors[prefix]) for score, prefix in best_active_pairs if successors[prefix]]
			
 
				             if not beam:
			
 
				                 logger.warning(f"Beam search had to terminate prematurely because of empty beam (dim 0)")
			
--- a/hivemind/dht/__init__.py
+++ b/hivemind/dht/__init__.py
@@ -51,13 +51,11 @@ class DHT(mp.Process):
 
				 
			
 
				     def __init__(self, listen_on: Endpoint = "0.0.0.0:*", initial_peers: Sequence[Endpoint] = (), *, start: bool,
			
 
				                  daemon: bool = True, max_workers: Optional[int] = None, parallel_rpc: Optional[int] = None,
			
 
				-                 expiration: float = 300, record_validators: Iterable[RecordValidatorBase] = (),
			
 
				-                 **kwargs):
			
 
				+                 record_validators: Iterable[RecordValidatorBase] = (), **kwargs):
			
 
				         super().__init__()
			
 
				         assert not isinstance(initial_peers, str), "please specify a list/tuple of initial peers (even if there's one)"
			
 
				         self.listen_on, self.initial_peers, self.kwargs = listen_on, initial_peers, kwargs
			
 
				         self.max_workers, self.parallel_rpc = max_workers, parallel_rpc
			
 
				-        self.default_expiration = expiration
			
 
				         self._record_validator = CompositeValidator(record_validators)
			
 
				         self._port = mp.Value(ctypes.c_int32, 0)  # initialized after dht starts
			
 
				         self._pipe, self.pipe = mp.Pipe(duplex=True)
			
@@ -257,12 +255,3 @@ class DHT(mp.Process):
 
				         else:
			
 
				             future.set_exception(ValueError(f"Can't get address: DHT node has no peers and no public endpoint."
			
 
				                                             f" Please ensure the node is connected or specify peers=... manually."))
			
 
				-
			
 
				-    def declare_experts(self, uids, endpoint, wait: bool = True):
			
 
				-        logger.warning("dht.declare_experts is scheduled for removal in 0.9.8, please use hivemind.declare_experts.")
			
 
				-        return hivemind.declare_experts(self, uids, endpoint, wait=wait)
			
 
				-
			
 
				-    def get_experts(self, uids, expiration_time: Optional[DHTExpiration] = None,
			
 
				-                    return_future: bool = False) -> List[Optional[RemoteExpert]]:
			
 
				-        logger.warning("dht.get_experts is scheduled for removal in 0.9.8, please use hivemind.get_experts.")
			
 
				-        return hivemind.get_experts(self, uids, expiration_time, return_future)
			
--- a/hivemind/server/__init__.py
+++ b/hivemind/server/__init__.py
@@ -67,7 +67,7 @@ class Server(threading.Thread):
 
				 
			
 
				         if self.dht and self.experts:
			
 
				             self.dht_handler_thread = DHTHandlerThread(experts=self.experts, dht=self.dht, endpoint=self.listen_on,
			
 
				-                                                       update_period=self.update_period)
			
 
				+                                                       update_period=self.update_period, daemon=True)
			
 
				 
			
 
				         if start:
			
 
				             self.run_in_background(await_ready=True)
			
@@ -261,7 +261,8 @@ class Server(threading.Thread):
 
				             self.dht.join()
			
 
				 
			
 
				         logger.debug(f"Shutting down runtime")
			
 
				-        self.runtime.stop.set()
			
 
				+
			
 
				+        self.runtime.shutdown()
			
 
				         logger.info("Server shutdown succesfully")
			
 
				 
			
 
				 
			
--- a/hivemind/server/dht_handler.py
+++ b/hivemind/server/dht_handler.py
@@ -10,8 +10,8 @@ from hivemind.utils import Endpoint, get_dht_time, get_port
 
				 
			
 
				 
			
 
				 class DHTHandlerThread(threading.Thread):
			
 
				-    def __init__(self, experts, dht: DHT, endpoint: Endpoint, update_period: int = 5):
			
 
				-        super().__init__()
			
 
				+    def __init__(self, experts, dht: DHT, endpoint: Endpoint, update_period: int = 5, **kwargs):
			
 
				+        super().__init__(**kwargs)
			
 
				         assert get_port(endpoint) is not None
			
 
				         self.endpoint = endpoint
			
 
				         self.experts = experts
			
@@ -25,7 +25,7 @@ class DHTHandlerThread(threading.Thread):
 
				             declare_experts(self.dht, self.experts.keys(), self.endpoint)
			
 
				 
			
 
				 
			
 
				-def declare_experts(dht: DHT, uids: Sequence[ExpertUID], endpoint: Endpoint,
			
 
				+def declare_experts(dht: DHT, uids: Sequence[ExpertUID], endpoint: Endpoint, expiration: DHTExpiration = 300,
			
 
				                     wait: bool = True) -> Dict[ExpertUID, bool]:
			
 
				     """
			
 
				     Make experts visible to all DHT peers; update timestamps if declared previously.
			
@@ -33,18 +33,20 @@ def declare_experts(dht: DHT, uids: Sequence[ExpertUID], endpoint: Endpoint,
 
				     :param uids: a list of expert ids to update
			
 
				     :param endpoint: endpoint that serves these experts, usually your server endpoint (e.g. "201.111.222.333:1337")
			
 
				     :param wait: if True, awaits for declaration to finish, otherwise runs in background
			
 
				-    :param timeout: waits for the procedure to finish for up to this long, None means wait indefinitely
			
 
				+    :param expiration: experts will be visible for this many seconds
			
 
				     :returns: if wait, returns store status for every key (True = store succeeded, False = store rejected)
			
 
				     """
			
 
				     assert not isinstance(uids, str), "Please send a list / tuple of expert uids."
			
 
				     for uid in uids:
			
 
				         assert is_valid_uid(uid), f"{uid} is not a valid expert uid. All uids must follow {UID_PATTERN.pattern}"
			
 
				-    return dht.run_coroutine(partial(_declare_experts, uids=list(uids), endpoint=endpoint), return_future=not wait)
			
 
				+    return dht.run_coroutine(partial(_declare_experts, uids=list(uids), endpoint=endpoint, expiration=expiration),
			
 
				+                             return_future=not wait)
			
 
				 
			
 
				 
			
 
				-async def _declare_experts(dht: DHT, node: DHTNode, uids: List[ExpertUID], endpoint: Endpoint) -> Dict[ExpertUID, bool]:
			
 
				+async def _declare_experts(dht: DHT, node: DHTNode, uids: List[ExpertUID], endpoint: Endpoint,
			
 
				+                           expiration: DHTExpiration) -> Dict[ExpertUID, bool]:
			
 
				     num_workers = len(uids) if dht.max_workers is None else min(len(uids), dht.max_workers)
			
 
				-    expiration_time = get_dht_time() + dht.default_expiration  # TODO use local expiration
			
 
				+    expiration_time = get_dht_time() + expiration
			
 
				     data_to_store: Dict[Tuple[ExpertPrefix, Optional[Coordinate]], DHTValue] = {}
			
 
				     for uid in uids:
			
 
				         data_to_store[uid, None] = endpoint
			
--- a/hivemind/server/expert_uid.py
+++ b/hivemind/server/expert_uid.py
@@ -2,6 +2,7 @@ import random
 
				 import re
			
 
				 from typing import NamedTuple, Union, Tuple, Optional, List
			
 
				 
			
 
				+import hivemind
			
 
				 from hivemind.dht import DHT
			
 
				 from hivemind.utils import Endpoint, get_logger
			
 
				 
			
@@ -81,7 +82,7 @@ def generate_uids_from_pattern(num_experts: int, expert_pattern: Optional[str],
 
				 
			
 
				         # 2. look into DHT (if given) and remove duplicates
			
 
				         if dht:
			
 
				-            existing_expert_uids = {found_expert.uid for found_expert in dht.get_experts(new_uids)
			
 
				+            existing_expert_uids = {found_expert.uid for found_expert in hivemind.get_experts(dht, new_uids)
			
 
				                                     if found_expert is not None}
			
 
				             new_uids = [new_uid for new_uid in new_uids if new_uid not in existing_expert_uids]
			
 
				 
			
--- a/hivemind/server/runtime.py
+++ b/hivemind/server/runtime.py
@@ -41,6 +41,7 @@ class Runtime(threading.Thread):
 
				 
			
 
				     :param stats_report_interval: interval to collect and log statistics about runtime performance
			
 
				     """
			
 
				+    SHUTDOWN_TRIGGER = "RUNTIME SHUTDOWN TRIGGERED"
			
 
				 
			
 
				     def __init__(self, expert_backends: Dict[str, ExpertBackend], prefetch_batches=64, sender_threads: int = 1,
			
 
				                  device: torch.device = None, stats_report_interval: Optional[int] = None):
			
@@ -48,8 +49,9 @@ class Runtime(threading.Thread):
 
				         self.expert_backends = expert_backends
			
 
				         self.pools = tuple(chain(*(expert.get_pools() for expert in expert_backends.values())))
			
 
				         self.device, self.prefetch_batches, self.sender_threads = device, prefetch_batches, sender_threads
			
 
				+        self.shutdown_recv, self.shutdown_send = mp.Pipe(duplex=False)
			
 
				+        self.shutdown_trigger = mp.Event()
			
 
				         self.ready = mp.Event()  # event is set iff server is currently running and ready to accept batches
			
 
				-        self.stop = threading.Event()
			
 
				 
			
 
				         self.stats_report_interval = stats_report_interval
			
 
				         if self.stats_report_interval is not None:
			
@@ -86,18 +88,18 @@ class Runtime(threading.Thread):
 
				 
			
 
				                     output_sender_pool.apply_async(pool.send_outputs_from_runtime, args=[batch_index, outputs])
			
 
				             finally:
			
 
				-                self.shutdown()
			
 
				+                if not self.shutdown_trigger.is_set():
			
 
				+                    self.shutdown()
			
 
				 
			
 
				     def shutdown(self):
			
 
				         """ Gracefully terminate a running runtime. """
			
 
				         logger.info("Shutting down")
			
 
				+        self.ready.clear()
			
 
				 
			
 
				         if self.stats_report_interval is not None:
			
 
				             self.stats_reporter.stop.set()
			
 
				             self.stats_reporter.join()
			
 
				 
			
 
				-        self.stop.set()  # trigger background thread to shutdown
			
 
				-
			
 
				         logger.debug("Terminating pools")
			
 
				         for pool in self.pools:
			
 
				             if pool.is_alive():
			
@@ -105,6 +107,10 @@ class Runtime(threading.Thread):
 
				                 pool.join()
			
 
				         logger.debug("Pools terminated")
			
 
				 
			
 
				+        # trigger background thread to shutdown
			
 
				+        self.shutdown_send.send(self.SHUTDOWN_TRIGGER)
			
 
				+        self.shutdown_trigger.set()
			
 
				+
			
 
				     def iterate_minibatches_from_pools(self, timeout=None):
			
 
				         """
			
 
				         Chooses pool according to priority, then copies exposed batch and frees the buffer
			
@@ -112,12 +118,15 @@ class Runtime(threading.Thread):
 
				         with DefaultSelector() as selector:
			
 
				             for pool in self.pools:
			
 
				                 selector.register(pool.batch_receiver, EVENT_READ, pool)
			
 
				+            # selector.register(self.shutdown_recv, EVENT_READ, self.SHUTDOWN_TRIGGER)
			
 
				 
			
 
				-            while not self.stop.is_set():
			
 
				+            while True:
			
 
				                 # wait until at least one batch_receiver becomes available
			
 
				                 logger.debug("Waiting for inputs from task pools")
			
 
				                 ready_fds = selector.select()
			
 
				                 ready_objects = {key.data for (key, events) in ready_fds}
			
 
				+                if self.SHUTDOWN_TRIGGER in ready_objects:
			
 
				+                    break  # someone asked us to shutdown, break from the loop
			
 
				 
			
 
				                 logger.debug("Choosing the pool with highest priority")
			
 
				                 pool = max(ready_objects, key=lambda pool: pool.priority)
			
--- a/hivemind/server/task_pool.py
+++ b/hivemind/server/task_pool.py
@@ -136,7 +136,7 @@ class TaskPool(TaskPoolBase):
 
				         pending_batches = {}  # Dict[batch uuid, List[MPFuture]] for each batch currently in runtime
			
 
				 
			
 
				         output_thread = threading.Thread(target=self._pool_output_loop, args=[pending_batches],
			
 
				-                                         name=f'{self.name}_output')
			
 
				+                                         name=f'{self.name}_output', daemon=True)
			
 
				 
			
 
				         try:
			
 
				             output_thread.start()
			
--- a/hivemind/utils/timed_storage.py
+++ b/hivemind/utils/timed_storage.py
@@ -9,10 +9,11 @@ from dataclasses import dataclass
 
				 KeyType = TypeVar('KeyType')
			
 
				 ValueType = TypeVar('ValueType')
			
 
				 get_dht_time = time.time  # a global (weakly synchronized) time
			
 
				-MAX_DHT_TIME_DISCREPANCY_SECONDS = 3  # max allowed difference between get_dht_time for two DHT nodes. Enforced when joining DHT.(TODO)
			
 
				+MAX_DHT_TIME_DISCREPANCY_SECONDS = 3  # max allowed difference between get_dht_time for two DHT nodes
			
 
				 DHTExpiration = float
			
 
				 ROOT = 0
			
 
				 
			
 
				+
			
 
				 @dataclass(init=True, repr=True, frozen=True)
			
 
				 class ValueWithExpiration(Generic[ValueType]):
			
 
				     value: ValueType
			
@@ -37,11 +38,13 @@ class ValueWithExpiration(Generic[ValueType]):
 
				         else:
			
 
				             return False
			
 
				 
			
 
				+
			
 
				 @dataclass(init=True, repr=True, order=True, frozen=True)
			
 
				 class HeapEntry(Generic[KeyType]):
			
 
				     expiration_time: DHTExpiration
			
 
				     key: KeyType
			
 
				 
			
 
				+
			
 
				 class TimedStorage(Generic[KeyType, ValueType]):
			
 
				     """ A dictionary that maintains up to :maxsize: key-value-expiration tuples until their expiration_time """
			
 
				     frozen = False  # can be set to True. If true, do not remove outdated elements
			
--- a/tests/test_dht_experts.py
+++ b/tests/test_dht_experts.py
@@ -6,7 +6,7 @@ import pytest
 
				 
			
 
				 import hivemind
			
 
				 import hivemind.server.expert_uid
			
 
				-from hivemind import LOCALHOST
			
 
				+from hivemind import LOCALHOST, declare_experts, get_experts
			
 
				 from hivemind.client.beam_search import MoEBeamSearcher
			
 
				 from hivemind.server.expert_uid import UidEndpoint, is_valid_uid, is_valid_prefix, split_uid
			
 
				 
			
@@ -26,13 +26,13 @@ def test_store_get_experts():
 
				     for batch_start in range(0, len(expert_uids), batch_size):
			
 
				         hivemind.declare_experts(first_peer, expert_uids[batch_start: batch_start + batch_size], 'localhost:1234')
			
 
				 
			
 
				-    found = other_peer.get_experts(random.sample(expert_uids, 5) + ['foo', 'bar'])
			
 
				+    found = get_experts(other_peer, random.sample(expert_uids, 5) + ['foo', 'bar'])
			
 
				     assert all(res is not None for res in found[:-2]), "Could not find some existing experts"
			
 
				     assert all(res is None for res in found[-2:]), "Found non-existing experts"
			
 
				 
			
 
				     other_expert, other_port = "my_other_expert.1337", random.randint(1000, 9999)
			
 
				     hivemind.declare_experts(other_peer, [other_expert], f'that_host:{other_port}')
			
 
				-    first_notfound, first_found = hivemind.get_experts(first_peer, ['foobar', other_expert])
			
 
				+    first_notfound, first_found = get_experts(first_peer, ['foobar', other_expert])
			
 
				     assert isinstance(first_found, hivemind.RemoteExpert)
			
 
				     assert first_found.endpoint == f'that_host:{other_port}'
			
 
				 
			
@@ -46,19 +46,18 @@ def test_beam_search(dht_size=20, total_experts=128, batch_size=32, initial_peer
 
				     dht = []
			
 
				     for i in range(dht_size):
			
 
				         neighbors_i = [f'{LOCALHOST}:{node.port}' for node in random.sample(dht, min(initial_peers, len(dht)))]
			
 
				-        dht.append(hivemind.DHT(start=True, expiration=999999, initial_peers=neighbors_i, parallel_rpc=parallel_rpc))
			
 
				+        dht.append(hivemind.DHT(start=True, initial_peers=neighbors_i, parallel_rpc=parallel_rpc))
			
 
				 
			
 
				     real_experts = sorted({
			
 
				         'expert.' + '.'.join([str(random.randint(0, dim - 1)) for dim in grid_dims])
			
 
				         for _ in range(total_experts)
			
 
				     })
			
 
				     for batch_start in range(0, len(real_experts), batch_size):
			
 
				-        random.choice(dht).declare_experts(
			
 
				-            real_experts[batch_start: batch_start + batch_size], wait=True,
			
 
				-            endpoint=f"host{batch_start // batch_size}:{random.randint(0, 65536)}")
			
 
				+        declare_experts(random.choice(dht), real_experts[batch_start: batch_start + batch_size], wait=True,
			
 
				+                        endpoint=f"host{batch_start // batch_size}:{random.randint(0, 65536)}")
			
 
				 
			
 
				     neighbors_i = [f'{LOCALHOST}:{node.port}' for node in random.sample(dht, min(initial_peers, len(dht)))]
			
 
				-    you = hivemind.DHT(start=True, expiration=999999, initial_peers=neighbors_i, parallel_rpc=parallel_rpc)
			
 
				+    you = hivemind.DHT(start=True, initial_peers=neighbors_i, parallel_rpc=parallel_rpc)
			
 
				     beam_search = MoEBeamSearcher(you, 'expert.', grid_dims)
			
 
				 
			
 
				     for i in range(10):
			
@@ -76,17 +75,17 @@ def test_beam_search(dht_size=20, total_experts=128, batch_size=32, initial_peer
 
				 
			
 
				 @pytest.mark.forked
			
 
				 def test_dht_single_node():
			
 
				-    node = hivemind.DHT(start=True, expiration=999)
			
 
				+    node = hivemind.DHT(start=True)
			
 
				     beam_search = MoEBeamSearcher(node, 'expert.', grid_size=(10,))
			
 
				 
			
 
				-    assert all(node.declare_experts(['expert.1', 'expert.2', 'expert.3'], f"{hivemind.LOCALHOST}:1337").values())
			
 
				-    assert len(node.declare_experts(["ffn.1", "ffn.2"], endpoint="that_place")) == 4
			
 
				-    assert len(node.declare_experts(['e.1.2.3', 'e.1.2.5', 'e.2.0'], f"{hivemind.LOCALHOST}:42")) == 7
			
 
				+    assert all(declare_experts(node, ['expert.1', 'expert.2', 'expert.3'], f"{hivemind.LOCALHOST}:1337").values())
			
 
				+    assert len(declare_experts(node, ["ffn.1", "ffn.2"], endpoint="that_place")) == 4
			
 
				+    assert len(declare_experts(node, ['e.1.2.3', 'e.1.2.5', 'e.2.0'], f"{hivemind.LOCALHOST}:42")) == 7
			
 
				 
			
 
				-    for expert in node.get_experts(['expert.3', 'expert.2']):
			
 
				+    for expert in get_experts(node, ['expert.3', 'expert.2']):
			
 
				         assert expert.endpoint == f"{hivemind.LOCALHOST}:1337"
			
 
				 
			
 
				-    assert all(node.declare_experts(['expert.5', 'expert.2'], f"{hivemind.LOCALHOST}:1337").values())
			
 
				+    assert all(declare_experts(node, ['expert.5', 'expert.2'], f"{hivemind.LOCALHOST}:1337").values())
			
 
				     found_experts = beam_search.find_best_experts([(0., 1., 2., 3., 4., 5., 6., 7., 8.)], beam_size=2)
			
 
				     assert len(found_experts) == 2 and [expert.uid for expert in found_experts] == ['expert.5', 'expert.3']
			
 
				 
			
--- a/tests/test_moe.py
+++ b/tests/test_moe.py
@@ -15,7 +15,7 @@ def test_moe():
 
				                        for _ in range(10)]
			
 
				     with background_server(expert_uids=all_expert_uids, device='cpu', expert_cls='ffn', num_handlers=1,
			
 
				                            hidden_dim=16) as (server_endpoint, dht_endpoint):
			
 
				-        dht = hivemind.DHT(start=True, expiration=999, initial_peers=[dht_endpoint])
			
 
				+        dht = hivemind.DHT(start=True, initial_peers=[dht_endpoint])
			
 
				 
			
 
				         dmoe = hivemind.RemoteMixtureOfExperts(
			
 
				             in_features=16, grid_size=(4, 4, 4), dht=dht, k_best=3, uid_prefix='ffn.')
			
@@ -31,7 +31,7 @@ def test_no_experts():
 
				                        for _ in range(10)]
			
 
				     with background_server(expert_uids=all_expert_uids, device='cpu', expert_cls='nop_delay', num_handlers=1,
			
 
				                            hidden_dim=16) as (server_endpoint, dht_endpoint):
			
 
				-        dht = hivemind.DHT(start=True, expiration=999, initial_peers=[dht_endpoint])
			
 
				+        dht = hivemind.DHT(start=True, initial_peers=[dht_endpoint])
			
 
				 
			
 
				         dmoe = hivemind.RemoteSwitchMixtureOfExperts(
			
 
				             in_features=16, grid_size=(4, 4, 4), dht=dht, uid_prefix='expert.', forward_timeout=0.1,
			
@@ -119,8 +119,8 @@ def test_remote_module_call(hidden_dim=16):
 
				 @pytest.mark.forked
			
 
				 def test_beam_search_correctness():
			
 
				     all_expert_uids = [f'ffn.{5 + i}.{10 + j}.{15 + k}' for i in range(10) for j in range(10) for k in range(10)]
			
 
				-    dht = hivemind.DHT(start=True, expiration=999)
			
 
				-    assert all(dht.declare_experts(all_expert_uids, endpoint='fake-endpoint'))
			
 
				+    dht = hivemind.DHT(start=True)
			
 
				+    assert all(hivemind.declare_experts(dht, all_expert_uids, endpoint='fake-endpoint'))
			
 
				 
			
 
				     dmoe = hivemind.RemoteMixtureOfExperts(
			
 
				         in_features=32, grid_size=(32, 32, 32), dht=dht, k_best=4, uid_prefix='ffn.')
			
@@ -208,7 +208,7 @@ def test_client_anomaly_detection():
 
				 
			
 
				     experts['expert.3'].expert.ffn.weight.data[0, 0] = float('nan')
			
 
				 
			
 
				-    dht = hivemind.DHT(start=True, expiration=999)
			
 
				+    dht = hivemind.DHT(start=True)
			
 
				     server = hivemind.Server(dht, experts, num_connection_handlers=1)
			
 
				     server.start()
			
 
				     try:
			
--- a/tests/test_training.py
+++ b/tests/test_training.py
@@ -48,7 +48,7 @@ def test_moe_training(max_steps: int = 100, threshold: float = 0.9, num_experts=
 
				     all_expert_uids = [f'expert.{i}' for i in range(num_experts)]
			
 
				     with background_server(expert_uids=all_expert_uids, device='cpu', optim_cls=SGD, hidden_dim=64, num_handlers=1) \
			
 
				             as (server_endpoint, dht_endpoint):
			
 
				-        dht = DHT(start=True, expiration=999, initial_peers=[dht_endpoint])
			
 
				+        dht = DHT(start=True, initial_peers=[dht_endpoint])
			
 
				 
			
 
				         moe = RemoteMixtureOfExperts(in_features=64, grid_size=(num_experts,), dht=dht, uid_prefix='expert.', k_best=2)
			
 
				         model = nn.Sequential(moe, nn.Linear(64, 2))
			
@@ -91,7 +91,7 @@ def test_switch_training(max_steps: int = 10, threshold: float = 0.9, num_expert
 
				     all_expert_uids = [f'expert.{i}' for i in range(num_experts)]
			
 
				     with background_server(expert_uids=all_expert_uids, device='cpu', optim_cls=SGD, hidden_dim=64,
			
 
				                            num_handlers=1) as (server_endpoint, dht_endpoint):
			
 
				-        dht = DHT(start=True, expiration=999, initial_peers=[dht_endpoint])
			
 
				+        dht = DHT(start=True, initial_peers=[dht_endpoint])
			
 
				 
			
 
				         model = SwitchNetwork(dht, 64, 2, num_experts)
			
 
				         opt = SGD(model.parameters(), lr=0.05)