3 gadi atpakaļ · 52ea24730b
--- a/cli/run_server.py
+++ b/cli/run_server.py
@@ -124,10 +124,9 @@ def main():
 
				     use_auth_token = args.pop("use_auth_token")
			
 
				     args["use_auth_token"] = True if use_auth_token in ("True", "true", "") else use_auth_token
			
 
				 
			
 
				-    server = Server(**args, compression=compression, attn_cache_size=attn_cache_size, start=True)
			
 
				-
			
 
				+    server = Server(**args, compression=compression, attn_cache_size=attn_cache_size)
			
 
				     try:
			
 
				-        server.join()
			
 
				+        server.run()
			
 
				     except KeyboardInterrupt:
			
 
				         logger.info("Caught KeyboardInterrupt, shutting down")
			
 
				     finally:
			
--- a/src/server/server.py
+++ b/src/server/server.py
@@ -32,7 +32,7 @@ use_hivemind_log_handler("in_root_logger")
 
				 logger = get_logger(__file__)
			
 
				 
			
 
				 
			
 
				-class Server(threading.Thread):
			
 
				+class Server:
			
 
				     """
			
 
				     Runs ModuleContainer, periodically checks that the network is balanced,
			
 
				     restarts the ModuleContainer with other layers if the imbalance is significant
			
@@ -68,13 +68,10 @@ class Server(threading.Thread):
 
				         mean_block_selection_delay: float = 0.5,
			
 
				         use_auth_token: Optional[str] = None,
			
 
				         load_in_8bit: bool = False,
			
 
				-        start: bool,
			
 
				         **kwargs,
			
 
				     ):
			
 
				         """Create a server with one or more bloom blocks. See run_server.py for documentation."""
			
 
				 
			
 
				-        super().__init__()
			
 
				-
			
 
				         self.converted_model_name_or_path = converted_model_name_or_path
			
 
				         self.num_handlers = num_handlers
			
 
				         self.min_batch_size, self.max_batch_size = min_batch_size, max_batch_size
			
@@ -147,8 +144,6 @@ class Server(threading.Thread):
 
				         self.mean_block_selection_delay = mean_block_selection_delay
			
 
				 
			
 
				         self.stop = threading.Event()
			
 
				-        if start:
			
 
				-            self.start()
			
 
				 
			
 
				     def run(self):
			
 
				         while True:
			
@@ -312,7 +307,19 @@ class ModuleContainer(threading.Thread):
 
				                     min_batch_size=min_batch_size,
			
 
				                     max_batch_size=max_batch_size,
			
 
				                 )
			
 
				-        finally:
			
 
				+        except:
			
 
				+            joining_announcer.stop.set()
			
 
				+            joining_announcer.join()
			
 
				+            declare_active_modules(
			
 
				+                dht,
			
 
				+                module_uids,
			
 
				+                expiration_time=get_dht_time() + expiration,
			
 
				+                state=ServerState.OFFLINE,
			
 
				+                throughput=throughput,
			
 
				+            )
			
 
				+            logger.info(f"Announced that blocks {module_uids} are offline")
			
 
				+            raise
			
 
				+        else:
			
 
				             joining_announcer.stop.set()
			
 
				             joining_announcer.join()