Browse Source

black style

Artem Chumachenko 3 năm trước cách đây
mục cha
commit
68a7d022f0

+ 2 - 1
hivemind/optim/grad_averager.py

@@ -11,7 +11,7 @@ from hivemind.utils import DHTExpiration, get_dht_time, get_logger
 logger = get_logger(__name__)
 
 
-TGradientAverager = TypeVar('TGradientAverager', bound='GradientAverager')
+TGradientAverager = TypeVar("TGradientAverager", bound="GradientAverager")
 GradientAveragerFactory = Callable[[Type[TGradientAverager], Any], TGradientAverager]
 
 
@@ -235,4 +235,5 @@ class GradientAverager(DecentralizedAverager):
     def get_factory(cls, **kwargs1) -> GradientAveragerFactory:
         def _factory(**kwargs2):
             return cls(**kwargs1, **kwargs2)
+
         return _factory

+ 3 - 4
hivemind/optim/optimizer.py

@@ -248,7 +248,8 @@ class Optimizer(torch.optim.Optimizer):
             assert grad_averager is not None, "Use external gradient buffers only with working gradient averager."
             averaged_grads = [
                 torch.zeros_like(param, device="cpu").share_memory_()
-                for param_group in params for param in param_group["params"]
+                for param_group in params
+                for param in param_group["params"]
             ]
             extra_tensors = [e for e in extra_tensors] + [ag for ag in averaged_grads]
         self.state_averager = self._make_state_averager(
@@ -265,9 +266,7 @@ class Optimizer(torch.optim.Optimizer):
         )
         if grad_averager:
             self.grad_averager = self._make_gradient_averager(
-                reuse_grad_buffers=reuse_grad_buffers,
-                grad_averager=grad_averager,
-                averaged_grads=averaged_grads
+                reuse_grad_buffers=reuse_grad_buffers, grad_averager=grad_averager, averaged_grads=averaged_grads
             )
         else:
             self.grad_averager = None

+ 6 - 2
hivemind/optim/power_ef_averager.py

@@ -126,7 +126,9 @@ class PowerEFGradientAverager(GradientAverager):
             )
 
             async with enter_asynchronously(self.get_tensors()) as averaged_grads:
-                cs = [rest for idx, rest in enumerate(self._gradient_residual) if idx not in self._uncompressed_gradients]
+                cs = [
+                    rest for idx, rest in enumerate(self._gradient_residual) if idx not in self._uncompressed_gradients
+                ]
                 ps = [
                     torch.zeros((grad.size(0), self.rank), device="cpu")
                     for idx, grad in enumerate(averaged_grads)
@@ -212,7 +214,9 @@ class PowerEFGradientAverager(GradientAverager):
         # divide locally accumulated gradients by the number of times they were accumulated
         grad_scale = (1.0 / self.local_times_accumulated) if self.local_times_accumulated != 0 else 0.0
         with self.get_tensors() as averaged_grads:
-            for grad_acc, averaged_grad, rest in zip(self._grad_accumulators(), averaged_grads, self._gradient_residual):
+            for grad_acc, averaged_grad, rest in zip(
+                self._grad_accumulators(), averaged_grads, self._gradient_residual
+            ):
                 rest.copy_(grad_acc, non_blocking=False).mul_(grad_scale).sub_(averaged_grad)
 
 

+ 1 - 4
tests/test_optimizer.py

@@ -290,10 +290,7 @@ def test_progress_tracker():
 @pytest.mark.forked
 @pytest.mark.parametrize(
     "grad_averager",
-    [
-        (GradientAverager.get_factory(),),
-        (PowerEFGradientAverager.get_factory(averager_rank=1),)
-    ],
+    [(GradientAverager.get_factory(),), (PowerEFGradientAverager.get_factory(averager_rank=1),)],
 )
 def test_optimizer(
     grad_averager: GradientAveragerFactory,