Explorar o código

load state timeout

justheuristic %!s(int64=4) %!d(string=hai) anos
pai
achega
97bd64fc7e
Modificáronse 2 ficheiros con 3 adicións e 2 borrados
  1. 1 1
      hivemind/averaging/averager.py
  2. 2 1
      hivemind/optim/collaborative.py

+ 1 - 1
hivemind/averaging/averager.py

@@ -616,7 +616,7 @@ class DecentralizedAverager(mp.Process, ServicerBase):
 
         finally:
             if not future.done():
-                logger.warning("Averager could not load state from peers: all requests have failed.")
+                logger.warning("Averager could not load state from peers: none of the requests succeeded.")
                 future.set_result(None)
 
     def get_group_bits(self, wait: bool = True):

+ 2 - 1
hivemind/optim/collaborative.py

@@ -191,8 +191,9 @@ class CollaborativeOptimizer(DecentralizedOptimizerBase):
             while True:
                 try:
                     self.averager.load_state_from_peers(timeout=self.load_state_timeout, **kwargs)
+                    break
                 except BaseException as e:
-                    logger.exception(f"Failed to load state from peers: {e}, will retry now")
+                    logger.exception(f"Failed to load state from peers: {e}, retrying ...")
                     continue
 
             self.local_samples_accumulated = self.local_steps_accumulated = 0