justheuristic 4 роки тому
батько
коміт
97bd64fc7e
2 змінених файлів з 3 додано та 2 видалено
  1. 1 1
      hivemind/averaging/averager.py
  2. 2 1
      hivemind/optim/collaborative.py

+ 1 - 1
hivemind/averaging/averager.py

@@ -616,7 +616,7 @@ class DecentralizedAverager(mp.Process, ServicerBase):
 
         finally:
             if not future.done():
-                logger.warning("Averager could not load state from peers: all requests have failed.")
+                logger.warning("Averager could not load state from peers: none of the requests succeeded.")
                 future.set_result(None)
 
     def get_group_bits(self, wait: bool = True):

+ 2 - 1
hivemind/optim/collaborative.py

@@ -191,8 +191,9 @@ class CollaborativeOptimizer(DecentralizedOptimizerBase):
             while True:
                 try:
                     self.averager.load_state_from_peers(timeout=self.load_state_timeout, **kwargs)
+                    break
                 except BaseException as e:
-                    logger.exception(f"Failed to load state from peers: {e}, will retry now")
+                    logger.exception(f"Failed to load state from peers: {e}, retrying ...")
                     continue
 
             self.local_samples_accumulated = self.local_steps_accumulated = 0