Эх сурвалжийг харах

Pin isort version, bump black (#456)

* Pin isort version, bump black

Co-authored-by: Alexander Borzunov <hxrussia@gmail.com>
Max Ryabinin 3 жил өмнө
parent
commit
762f116ffc

+ 3 - 1
.github/workflows/check-style.yml

@@ -13,7 +13,7 @@ jobs:
       - uses: psf/black@stable
         with:
           options: "--check --diff"
-          version: "21.6b0"
+          version: "22.1.0"
   isort:
     runs-on: ubuntu-latest
     steps:
@@ -22,3 +22,5 @@ jobs:
         with:
           python-version: 3.8
       - uses: isort/isort-action@master
+        with:
+          isortVersion: "5.10.1"

+ 1 - 1
examples/albert/arguments.py

@@ -148,4 +148,4 @@ class AlbertTrainingArguments(TrainingArguments):
 
     save_total_limit: int = 2
     save_steps: int = 500
-    max_steps: int = 10 ** 30  # meant as "peer should compute gradients forever"
+    max_steps: int = 10**30  # meant as "peer should compute gradients forever"

+ 1 - 1
hivemind/averaging/load_balancing.py

@@ -65,7 +65,7 @@ def optimize_parts_lp(vector_size: int, bandwidths: np.ndarray, min_size: int =
     # the constraints below are tuples (A, b) such that Ax <= b
     nonnegative_weights = -np.eye(group_size, num_variables, dtype=c.dtype), np.zeros(group_size, c.dtype)
     weights_sum_to_one = c[None, :] - 1.0, np.array([-1.0])
-    coeff_per_variable = (group_size - 2.0) / np.maximum(bandwidths, 10 ** -LOAD_BALANCING_LP_DECIMALS)
+    coeff_per_variable = (group_size - 2.0) / np.maximum(bandwidths, 10**-LOAD_BALANCING_LP_DECIMALS)
     coeff_matrix_minus_xi = np.hstack([np.diag(coeff_per_variable), -np.ones((group_size, 1), c.dtype)])
     xi_is_maximum = coeff_matrix_minus_xi[is_nonzero], -1.0 / bandwidths[is_nonzero]
     force_max_weights = np.eye(group_size, M=num_variables, dtype=c.dtype), is_nonzero.astype(c.dtype)

+ 1 - 1
hivemind/averaging/partition.py

@@ -13,7 +13,7 @@ from hivemind.proto import runtime_pb2
 from hivemind.utils import amap_in_executor, as_aiter, get_logger
 
 T = TypeVar("T")
-DEFAULT_PART_SIZE_BYTES = 2 ** 19
+DEFAULT_PART_SIZE_BYTES = 2**19
 logger = get_logger(__name__)
 
 

+ 2 - 2
hivemind/compression/quantization.py

@@ -48,7 +48,7 @@ class Quantization(CompressionBase, ABC):
 
     @property
     def n_bins(self):
-        return 2 ** self.n_bits
+        return 2**self.n_bits
 
 
 class Uniform8BitQuantization(Quantization):
@@ -94,7 +94,7 @@ def get_chunk_size(num_elements: int, min_chunk_size: int) -> int:
     return min_chunk_size + (leftover_elements - 1) // num_chunks + 1
 
 
-def quantile_qq_approximation(array: np.ndarray, n_quantiles: int, min_chunk_size: int = 10 ** 5) -> np.ndarray:
+def quantile_qq_approximation(array: np.ndarray, n_quantiles: int, min_chunk_size: int = 10**5) -> np.ndarray:
     """Estimate uniform quantiles of data using quantile-of-quantiles. Runs in parallel."""
     if not array.data.c_contiguous and array.data.f_contiguous:
         array = array.T

+ 1 - 1
hivemind/moe/client/switch_moe.py

@@ -150,7 +150,7 @@ class RemoteSwitchMixtureOfExperts(RemoteMixtureOfExperts):
         # for each grid dimension, sum across all indices for a dimension. Optimizing this leads to uniform allocation
         balancing_loss = torch.stack(
             [
-                torch.mean(dim_softmax.mean(0) * dim_utilization) * (dim_size ** 2)
+                torch.mean(dim_softmax.mean(0) * dim_utilization) * dim_size**2
                 for dim_softmax, dim_utilization, dim_size in zip(
                     grid_softmax, self.grid_utilization, self.beam_search.grid_size
                 )

+ 1 - 1
hivemind/p2p/p2p_daemon.py

@@ -265,7 +265,7 @@ class P2P:
         return self._daemon_listen_maddr
 
     @staticmethod
-    async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2 ** 16) -> None:
+    async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2**16) -> None:
         writer.write(len(data).to_bytes(P2P.HEADER_LEN, P2P.BYTEORDER))
         data = memoryview(data)
         for offset in range(0, len(data), chunk_size):

+ 1 - 1
hivemind/p2p/p2p_daemon_bindings/control.py

@@ -26,7 +26,7 @@ SUPPORT_CONN_PROTOCOLS = (
 SUPPORTED_PROTOS = (protocols.protocol_with_code(proto) for proto in SUPPORT_CONN_PROTOCOLS)
 logger = get_logger(__name__)
 
-DEFAULT_MAX_MSG_SIZE = 4 * 1024 ** 2
+DEFAULT_MAX_MSG_SIZE = 4 * 1024**2
 
 
 def parse_conn_protocol(maddr: Multiaddr) -> int:

+ 1 - 1
hivemind/utils/grpc.py

@@ -175,7 +175,7 @@ class ChannelCache(TimedStorage[ChannelInfo, Tuple[Union[grpc.Channel, grpc.aio.
         raise ValueError(f"Please use {self.__class__.__name__}.get_stub to get or create stubs")
 
 
-STREAMING_CHUNK_SIZE_BYTES = 2 ** 16
+STREAMING_CHUNK_SIZE_BYTES = 2**16
 
 
 def split_for_streaming(

+ 1 - 1
hivemind/utils/limits.py

@@ -3,7 +3,7 @@ from hivemind.utils.logging import get_logger
 logger = get_logger(__name__)
 
 
-def increase_file_limit(new_soft=2 ** 15, new_hard=2 ** 15):
+def increase_file_limit(new_soft=2**15, new_hard=2**15):
     """Increase the maximum number of open files. On Linux, this allows spawning more processes/threads."""
     try:
         import resource  # local import to avoid ImportError for Windows users

+ 1 - 1
pyproject.toml

@@ -1,6 +1,6 @@
 [tool.black]
 line-length = 119
-required-version = "21.6b0"
+required-version = "22.1.0"
 
 [tool.isort]
 profile = "black"

+ 2 - 2
requirements-dev.txt

@@ -6,6 +6,6 @@ coverage==6.0.2  # see https://github.com/pytest-dev/pytest-cov/issues/520
 tqdm
 scikit-learn
 torchvision
-black==21.6b0
-isort
+black==22.1.0
+isort==5.10.1
 psutil

+ 9 - 9
tests/test_allreduce.py

@@ -33,7 +33,7 @@ async def test_partitioning():
 
     # note: this test does _not_ use parameterization to reuse sampled tensors
     for num_tensors in 1, 3, 5:
-        for part_size_bytes in 31337, 2 ** 20, 10 ** 10:
+        for part_size_bytes in 31337, 2**20, 10**10:
             for weights in [(1, 1), (0.333, 0.1667, 0.5003), (1.0, 0.0), [0.0, 0.4, 0.6, 0.0]]:
                 tensors = random.choices(all_tensors, k=num_tensors)
                 partition = TensorPartContainer(tensors, weights, part_size_bytes=part_size_bytes)
@@ -157,16 +157,16 @@ NODE, CLIENT, AUX = AveragingMode.NODE, AveragingMode.CLIENT, AveragingMode.AUX
 @pytest.mark.parametrize(
     "peer_modes, averaging_weights, peer_fractions, part_size_bytes",
     [
-        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 1, 1, 1), 2 ** 20),
-        ((NODE, NODE, NODE, NODE), (0.1, 0.2, 0.3, 0.4), (1, 1, 1, 1), 2 ** 20),
-        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 2, 3, 0), 2 ** 20),
-        ((NODE, NODE, NODE, CLIENT), (1, 1, 1, 1), (1, 2, 3, 0), 2 ** 20),
-        ((NODE, NODE, NODE, AUX), (1, 1, 1, 0), (1, 2, 3, 4), 2 ** 20),
-        ((NODE, NODE, NODE, NODE), (0.15, 0.0, 0.35, 0.45), (1, 1, 1, 1), 2 ** 20),
-        ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 2 ** 20),
+        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 1, 1, 1), 2**20),
+        ((NODE, NODE, NODE, NODE), (0.1, 0.2, 0.3, 0.4), (1, 1, 1, 1), 2**20),
+        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 2, 3, 0), 2**20),
+        ((NODE, NODE, NODE, CLIENT), (1, 1, 1, 1), (1, 2, 3, 0), 2**20),
+        ((NODE, NODE, NODE, AUX), (1, 1, 1, 0), (1, 2, 3, 4), 2**20),
+        ((NODE, NODE, NODE, NODE), (0.15, 0.0, 0.35, 0.45), (1, 1, 1, 1), 2**20),
+        ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 2**20),
         ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 256),
         ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 19),
-        ((AUX, AUX, AUX, AUX), (0.0, 0.0, 0.0, 0.0), (1, 2, 3, 4), 2 ** 20),
+        ((AUX, AUX, AUX, AUX), (0.0, 0.0, 0.0, 0.0), (1, 2, 3, 4), 2**20),
     ],
 )
 @pytest.mark.forked

+ 1 - 1
tests/test_allreduce_fault_tolerance.py

@@ -155,7 +155,7 @@ def test_fault_tolerance(fault0: Fault, fault1: Fault):
             min_matchmaking_time=1.0,
             next_chunk_timeout=0.5,
             allreduce_timeout=5,
-            part_size_bytes=2 ** 16,
+            part_size_bytes=2**16,
             client_mode=(i == 1),
             start=True,
             fault=fault0 if i == 0 else fault1 if i == 1 else Fault.NONE,

+ 1 - 1
tests/test_averaging.py

@@ -283,7 +283,7 @@ def test_load_balancing():
         load_balance_peers(100, (0, 0, 0))
 
     for i in range(10):
-        vector_size = np.random.randint(1, 1024 ** 3)
+        vector_size = np.random.randint(1, 1024**3)
         num_peers = np.random.randint(1, 256)
         scale = 1e-9 + np.random.rand() * 1e5
         bandwidths = np.random.rand(num_peers) * scale + 1e-6

+ 1 - 1
tests/test_compression.py

@@ -53,7 +53,7 @@ def test_serialize_tensor():
         assert torch.allclose(deserialize_torch_tensor(restored), tensor, rtol=rtol, atol=atol)
 
     tensor = torch.randn(512, 12288)
-    for chunk_size in [1024, 64 * 1024, 64 * 1024 + 1, 10 ** 9]:
+    for chunk_size in [1024, 64 * 1024, 64 * 1024 + 1, 10**9]:
         _check(tensor, CompressionType.NONE, chunk_size=chunk_size)
 
     _check(tensor, CompressionType.FLOAT16, rtol=0.0, atol=1e-2)

+ 2 - 2
tests/test_dht.py

@@ -72,7 +72,7 @@ async def dummy_dht_coro_stateful(self, node):
 
 async def dummy_dht_coro_long(self, node):
     await asyncio.sleep(0.25)
-    return self._x_dummy ** 2
+    return self._x_dummy**2
 
 
 async def dummy_dht_coro_for_cancel(self, node):
@@ -94,7 +94,7 @@ def test_run_coroutine():
     assert dht.run_coroutine(dummy_dht_coro_stateful) == 125
     assert dht.run_coroutine(dummy_dht_coro_stateful) == 126
     assert not hasattr(dht, "_x_dummy")
-    assert bg_task.result() == 126 ** 2
+    assert bg_task.result() == 126**2
 
     future = dht.run_coroutine(dummy_dht_coro_for_cancel, return_future=True)
     time.sleep(0.25)

+ 3 - 3
tests/test_p2p_daemon.py

@@ -89,7 +89,7 @@ async def test_unary_handler_edge_cases():
     p2p_replica = await P2P.replicate(p2p.daemon_listen_maddr)
 
     async def square_handler(data: test_pb2.TestRequest, context):
-        return test_pb2.TestResponse(number=data.number ** 2)
+        return test_pb2.TestResponse(number=data.number**2)
 
     await p2p.add_protobuf_handler("square", square_handler, test_pb2.TestRequest)
 
@@ -202,7 +202,7 @@ async def handle_square_stream(_, reader: asyncio.StreamReader, writer: asyncio.
             except asyncio.IncompleteReadError:
                 break
 
-            result = x ** 2
+            result = x**2
 
             await P2P.send_raw_data(MSGPackSerializer.dumps(result), writer)
 
@@ -215,7 +215,7 @@ async def validate_square_stream(reader: asyncio.StreamReader, writer: asyncio.S
             await P2P.send_raw_data(MSGPackSerializer.dumps(x), writer)
             result = MSGPackSerializer.loads(await P2P.receive_raw_data(reader))
 
-            assert result == x ** 2
+            assert result == x**2
 
 
 @pytest.mark.asyncio

+ 7 - 7
tests/test_p2p_daemon_bindings.py

@@ -38,15 +38,15 @@ PAIRS_INT_SERIALIZED_VALID = (
     (0, b"\x00"),
     (1, b"\x01"),
     (128, b"\x80\x01"),
-    (2 ** 32, b"\x80\x80\x80\x80\x10"),
-    (2 ** 64 - 1, b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"),
+    (2**32, b"\x80\x80\x80\x80\x10"),
+    (2**64 - 1, b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"),
 )
 
 PAIRS_INT_SERIALIZED_OVERFLOW = (
-    (2 ** 64, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
-    (2 ** 64 + 1, b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
+    (2**64, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
+    (2**64 + 1, b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
     (
-        2 ** 128,
+        2**128,
         b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x04",
     ),
 )
@@ -94,7 +94,7 @@ async def test_write_unsigned_varint_overflow(integer):
         await write_unsigned_varint(s, integer)
 
 
-@pytest.mark.parametrize("integer", (-1, -(2 ** 32), -(2 ** 64), -(2 ** 128)))
+@pytest.mark.parametrize("integer", (-1, -(2**32), -(2**64), -(2**128)))
 @pytest.mark.asyncio
 async def test_write_unsigned_varint_negative(integer):
     s = MockWriter()
@@ -125,7 +125,7 @@ async def test_read_write_unsigned_varint_max_bits_edge(max_bits):
     Test edge cases with different `max_bits`
     """
     for i in range(-3, 0):
-        integer = i + (2 ** max_bits)
+        integer = i + 2**max_bits
         s = MockReaderWriter()
         await write_unsigned_varint(s, integer, max_bits=max_bits)
         s.seek(0, 0)

+ 3 - 3
tests/test_p2p_servicer.py

@@ -21,7 +21,7 @@ async def server_client():
 async def test_unary_unary(server_client):
     class ExampleServicer(ServicerBase):
         async def rpc_square(self, request: test_pb2.TestRequest, _context: P2PContext) -> test_pb2.TestResponse:
-            return test_pb2.TestResponse(number=request.number ** 2)
+            return test_pb2.TestResponse(number=request.number**2)
 
     server, client = server_client
     servicer = ExampleServicer()
@@ -83,8 +83,8 @@ async def test_stream_stream(server_client):
             self, stream: AsyncIterator[test_pb2.TestRequest], _context: P2PContext
         ) -> AsyncIterator[test_pb2.TestResponse]:
             async for item in stream:
-                yield test_pb2.TestResponse(number=item.number ** 2)
-                yield test_pb2.TestResponse(number=item.number ** 3)
+                yield test_pb2.TestResponse(number=item.number**2)
+                yield test_pb2.TestResponse(number=item.number**3)
 
     server, client = server_client
     servicer = ExampleServicer()

+ 3 - 3
tests/test_util_modules.py

@@ -397,7 +397,7 @@ def test_split_parts():
     chunks2 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10_000))
     assert len(chunks2) == int(np.ceil(tensor.numel() * tensor.element_size() / 10_000))
 
-    chunks3 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10 ** 9))
+    chunks3 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10**9))
     assert len(chunks3) == 1
 
     compressed_tensor_part = serialize_torch_tensor(tensor, CompressionType.FLOAT16, allow_inplace=False)
@@ -440,8 +440,8 @@ async def test_asyncio_utils():
     assert res == list(range(len(res)))
 
     num_steps = 0
-    async for elem in amap_in_executor(lambda x: x ** 2, as_aiter(*range(100)), max_prefetch=5):
-        assert elem == num_steps ** 2
+    async for elem in amap_in_executor(lambda x: x**2, as_aiter(*range(100)), max_prefetch=5):
+        assert elem == num_steps**2
         num_steps += 1
     assert num_steps == 100