3 жил өмнө · 762f116ffc
--- a/.github/workflows/check-style.yml
+++ b/.github/workflows/check-style.yml
@@ -13,7 +13,7 @@ jobs:
 
				       - uses: psf/black@stable
			
 
				         with:
			
 
				           options: "--check --diff"
			
 
				-          version: "21.6b0"
			
 
				+          version: "22.1.0"
			
 
				   isort:
			
 
				     runs-on: ubuntu-latest
			
 
				     steps:
			
@@ -22,3 +22,5 @@ jobs:
 
				         with:
			
 
				           python-version: 3.8
			
 
				       - uses: isort/isort-action@master
			
 
				+        with:
			
 
				+          isortVersion: "5.10.1"
			
--- a/examples/albert/arguments.py
+++ b/examples/albert/arguments.py
@@ -148,4 +148,4 @@ class AlbertTrainingArguments(TrainingArguments):
 
				 
			
 
				     save_total_limit: int = 2
			
 
				     save_steps: int = 500
			
 
				-    max_steps: int = 10 ** 30  # meant as "peer should compute gradients forever"
			
 
				+    max_steps: int = 10**30  # meant as "peer should compute gradients forever"
			
--- a/hivemind/averaging/load_balancing.py
+++ b/hivemind/averaging/load_balancing.py
@@ -65,7 +65,7 @@ def optimize_parts_lp(vector_size: int, bandwidths: np.ndarray, min_size: int =
 
				     # the constraints below are tuples (A, b) such that Ax <= b
			
 
				     nonnegative_weights = -np.eye(group_size, num_variables, dtype=c.dtype), np.zeros(group_size, c.dtype)
			
 
				     weights_sum_to_one = c[None, :] - 1.0, np.array([-1.0])
			
 
				-    coeff_per_variable = (group_size - 2.0) / np.maximum(bandwidths, 10 ** -LOAD_BALANCING_LP_DECIMALS)
			
 
				+    coeff_per_variable = (group_size - 2.0) / np.maximum(bandwidths, 10**-LOAD_BALANCING_LP_DECIMALS)
			
 
				     coeff_matrix_minus_xi = np.hstack([np.diag(coeff_per_variable), -np.ones((group_size, 1), c.dtype)])
			
 
				     xi_is_maximum = coeff_matrix_minus_xi[is_nonzero], -1.0 / bandwidths[is_nonzero]
			
 
				     force_max_weights = np.eye(group_size, M=num_variables, dtype=c.dtype), is_nonzero.astype(c.dtype)
			
--- a/hivemind/averaging/partition.py
+++ b/hivemind/averaging/partition.py
@@ -13,7 +13,7 @@ from hivemind.proto import runtime_pb2
 
				 from hivemind.utils import amap_in_executor, as_aiter, get_logger
			
 
				 
			
 
				 T = TypeVar("T")
			
 
				-DEFAULT_PART_SIZE_BYTES = 2 ** 19
			
 
				+DEFAULT_PART_SIZE_BYTES = 2**19
			
 
				 logger = get_logger(__name__)
			
 
				 
			
 
				 
			
--- a/hivemind/compression/quantization.py
+++ b/hivemind/compression/quantization.py
@@ -48,7 +48,7 @@ class Quantization(CompressionBase, ABC):
 
				 
			
 
				     @property
			
 
				     def n_bins(self):
			
 
				-        return 2 ** self.n_bits
			
 
				+        return 2**self.n_bits
			
 
				 
			
 
				 
			
 
				 class Uniform8BitQuantization(Quantization):
			
@@ -94,7 +94,7 @@ def get_chunk_size(num_elements: int, min_chunk_size: int) -> int:
 
				     return min_chunk_size + (leftover_elements - 1) // num_chunks + 1
			
 
				 
			
 
				 
			
 
				-def quantile_qq_approximation(array: np.ndarray, n_quantiles: int, min_chunk_size: int = 10 ** 5) -> np.ndarray:
			
 
				+def quantile_qq_approximation(array: np.ndarray, n_quantiles: int, min_chunk_size: int = 10**5) -> np.ndarray:
			
 
				     """Estimate uniform quantiles of data using quantile-of-quantiles. Runs in parallel."""
			
 
				     if not array.data.c_contiguous and array.data.f_contiguous:
			
 
				         array = array.T
			
--- a/hivemind/moe/client/switch_moe.py
+++ b/hivemind/moe/client/switch_moe.py
@@ -150,7 +150,7 @@ class RemoteSwitchMixtureOfExperts(RemoteMixtureOfExperts):
 
				         # for each grid dimension, sum across all indices for a dimension. Optimizing this leads to uniform allocation
			
 
				         balancing_loss = torch.stack(
			
 
				             [
			
 
				-                torch.mean(dim_softmax.mean(0) * dim_utilization) * (dim_size ** 2)
			
 
				+                torch.mean(dim_softmax.mean(0) * dim_utilization) * dim_size**2
			
 
				                 for dim_softmax, dim_utilization, dim_size in zip(
			
 
				                     grid_softmax, self.grid_utilization, self.beam_search.grid_size
			
 
				                 )
			
--- a/hivemind/p2p/p2p_daemon.py
+++ b/hivemind/p2p/p2p_daemon.py
@@ -265,7 +265,7 @@ class P2P:
 
				         return self._daemon_listen_maddr
			
 
				 
			
 
				     @staticmethod
			
 
				-    async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2 ** 16) -> None:
			
 
				+    async def send_raw_data(data: bytes, writer: asyncio.StreamWriter, *, chunk_size: int = 2**16) -> None:
			
 
				         writer.write(len(data).to_bytes(P2P.HEADER_LEN, P2P.BYTEORDER))
			
 
				         data = memoryview(data)
			
 
				         for offset in range(0, len(data), chunk_size):
			
--- a/hivemind/p2p/p2p_daemon_bindings/control.py
+++ b/hivemind/p2p/p2p_daemon_bindings/control.py
@@ -26,7 +26,7 @@ SUPPORT_CONN_PROTOCOLS = (
 
				 SUPPORTED_PROTOS = (protocols.protocol_with_code(proto) for proto in SUPPORT_CONN_PROTOCOLS)
			
 
				 logger = get_logger(__name__)
			
 
				 
			
 
				-DEFAULT_MAX_MSG_SIZE = 4 * 1024 ** 2
			
 
				+DEFAULT_MAX_MSG_SIZE = 4 * 1024**2
			
 
				 
			
 
				 
			
 
				 def parse_conn_protocol(maddr: Multiaddr) -> int:
			
--- a/hivemind/utils/grpc.py
+++ b/hivemind/utils/grpc.py
@@ -175,7 +175,7 @@ class ChannelCache(TimedStorage[ChannelInfo, Tuple[Union[grpc.Channel, grpc.aio.
 
				         raise ValueError(f"Please use {self.__class__.__name__}.get_stub to get or create stubs")
			
 
				 
			
 
				 
			
 
				-STREAMING_CHUNK_SIZE_BYTES = 2 ** 16
			
 
				+STREAMING_CHUNK_SIZE_BYTES = 2**16
			
 
				 
			
 
				 
			
 
				 def split_for_streaming(
			
--- a/hivemind/utils/limits.py
+++ b/hivemind/utils/limits.py
@@ -3,7 +3,7 @@ from hivemind.utils.logging import get_logger
 
				 logger = get_logger(__name__)
			
 
				 
			
 
				 
			
 
				-def increase_file_limit(new_soft=2 ** 15, new_hard=2 ** 15):
			
 
				+def increase_file_limit(new_soft=2**15, new_hard=2**15):
			
 
				     """Increase the maximum number of open files. On Linux, this allows spawning more processes/threads."""
			
 
				     try:
			
 
				         import resource  # local import to avoid ImportError for Windows users
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 
				 [tool.black]
			
 
				 line-length = 119
			
 
				-required-version = "21.6b0"
			
 
				+required-version = "22.1.0"
			
 
				 
			
 
				 [tool.isort]
			
 
				 profile = "black"
			
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -6,6 +6,6 @@ coverage==6.0.2  # see https://github.com/pytest-dev/pytest-cov/issues/520
 
				 tqdm
			
 
				 scikit-learn
			
 
				 torchvision
			
 
				-black==21.6b0
			
 
				-isort
			
 
				+black==22.1.0
			
 
				+isort==5.10.1
			
 
				 psutil
			
--- a/tests/test_allreduce.py
+++ b/tests/test_allreduce.py
@@ -33,7 +33,7 @@ async def test_partitioning():
 
				 
			
 
				     # note: this test does _not_ use parameterization to reuse sampled tensors
			
 
				     for num_tensors in 1, 3, 5:
			
 
				-        for part_size_bytes in 31337, 2 ** 20, 10 ** 10:
			
 
				+        for part_size_bytes in 31337, 2**20, 10**10:
			
 
				             for weights in [(1, 1), (0.333, 0.1667, 0.5003), (1.0, 0.0), [0.0, 0.4, 0.6, 0.0]]:
			
 
				                 tensors = random.choices(all_tensors, k=num_tensors)
			
 
				                 partition = TensorPartContainer(tensors, weights, part_size_bytes=part_size_bytes)
			
@@ -157,16 +157,16 @@ NODE, CLIENT, AUX = AveragingMode.NODE, AveragingMode.CLIENT, AveragingMode.AUX
 
				 @pytest.mark.parametrize(
			
 
				     "peer_modes, averaging_weights, peer_fractions, part_size_bytes",
			
 
				     [
			
 
				-        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 1, 1, 1), 2 ** 20),
			
 
				-        ((NODE, NODE, NODE, NODE), (0.1, 0.2, 0.3, 0.4), (1, 1, 1, 1), 2 ** 20),
			
 
				-        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 2, 3, 0), 2 ** 20),
			
 
				-        ((NODE, NODE, NODE, CLIENT), (1, 1, 1, 1), (1, 2, 3, 0), 2 ** 20),
			
 
				-        ((NODE, NODE, NODE, AUX), (1, 1, 1, 0), (1, 2, 3, 4), 2 ** 20),
			
 
				-        ((NODE, NODE, NODE, NODE), (0.15, 0.0, 0.35, 0.45), (1, 1, 1, 1), 2 ** 20),
			
 
				-        ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 2 ** 20),
			
 
				+        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 1, 1, 1), 2**20),
			
 
				+        ((NODE, NODE, NODE, NODE), (0.1, 0.2, 0.3, 0.4), (1, 1, 1, 1), 2**20),
			
 
				+        ((NODE, NODE, NODE, NODE), (1, 1, 1, 1), (1, 2, 3, 0), 2**20),
			
 
				+        ((NODE, NODE, NODE, CLIENT), (1, 1, 1, 1), (1, 2, 3, 0), 2**20),
			
 
				+        ((NODE, NODE, NODE, AUX), (1, 1, 1, 0), (1, 2, 3, 4), 2**20),
			
 
				+        ((NODE, NODE, NODE, NODE), (0.15, 0.0, 0.35, 0.45), (1, 1, 1, 1), 2**20),
			
 
				+        ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 2**20),
			
 
				         ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 256),
			
 
				         ((NODE, AUX, NODE, CLIENT), (0.15, 0.0, 0.35, 0.45), (150, 200, 67, 0), 19),
			
 
				-        ((AUX, AUX, AUX, AUX), (0.0, 0.0, 0.0, 0.0), (1, 2, 3, 4), 2 ** 20),
			
 
				+        ((AUX, AUX, AUX, AUX), (0.0, 0.0, 0.0, 0.0), (1, 2, 3, 4), 2**20),
			
 
				     ],
			
 
				 )
			
 
				 @pytest.mark.forked
			
--- a/tests/test_allreduce_fault_tolerance.py
+++ b/tests/test_allreduce_fault_tolerance.py
@@ -155,7 +155,7 @@ def test_fault_tolerance(fault0: Fault, fault1: Fault):
 
				             min_matchmaking_time=1.0,
			
 
				             next_chunk_timeout=0.5,
			
 
				             allreduce_timeout=5,
			
 
				-            part_size_bytes=2 ** 16,
			
 
				+            part_size_bytes=2**16,
			
 
				             client_mode=(i == 1),
			
 
				             start=True,
			
 
				             fault=fault0 if i == 0 else fault1 if i == 1 else Fault.NONE,
			
--- a/tests/test_averaging.py
+++ b/tests/test_averaging.py
@@ -283,7 +283,7 @@ def test_load_balancing():
 
				         load_balance_peers(100, (0, 0, 0))
			
 
				 
			
 
				     for i in range(10):
			
 
				-        vector_size = np.random.randint(1, 1024 ** 3)
			
 
				+        vector_size = np.random.randint(1, 1024**3)
			
 
				         num_peers = np.random.randint(1, 256)
			
 
				         scale = 1e-9 + np.random.rand() * 1e5
			
 
				         bandwidths = np.random.rand(num_peers) * scale + 1e-6
			
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@@ -53,7 +53,7 @@ def test_serialize_tensor():
 
				         assert torch.allclose(deserialize_torch_tensor(restored), tensor, rtol=rtol, atol=atol)
			
 
				 
			
 
				     tensor = torch.randn(512, 12288)
			
 
				-    for chunk_size in [1024, 64 * 1024, 64 * 1024 + 1, 10 ** 9]:
			
 
				+    for chunk_size in [1024, 64 * 1024, 64 * 1024 + 1, 10**9]:
			
 
				         _check(tensor, CompressionType.NONE, chunk_size=chunk_size)
			
 
				 
			
 
				     _check(tensor, CompressionType.FLOAT16, rtol=0.0, atol=1e-2)
			
--- a/tests/test_dht.py
+++ b/tests/test_dht.py
@@ -72,7 +72,7 @@ async def dummy_dht_coro_stateful(self, node):
 
				 
			
 
				 async def dummy_dht_coro_long(self, node):
			
 
				     await asyncio.sleep(0.25)
			
 
				-    return self._x_dummy ** 2
			
 
				+    return self._x_dummy**2
			
 
				 
			
 
				 
			
 
				 async def dummy_dht_coro_for_cancel(self, node):
			
@@ -94,7 +94,7 @@ def test_run_coroutine():
 
				     assert dht.run_coroutine(dummy_dht_coro_stateful) == 125
			
 
				     assert dht.run_coroutine(dummy_dht_coro_stateful) == 126
			
 
				     assert not hasattr(dht, "_x_dummy")
			
 
				-    assert bg_task.result() == 126 ** 2
			
 
				+    assert bg_task.result() == 126**2
			
 
				 
			
 
				     future = dht.run_coroutine(dummy_dht_coro_for_cancel, return_future=True)
			
 
				     time.sleep(0.25)
			
--- a/tests/test_p2p_daemon.py
+++ b/tests/test_p2p_daemon.py
@@ -89,7 +89,7 @@ async def test_unary_handler_edge_cases():
 
				     p2p_replica = await P2P.replicate(p2p.daemon_listen_maddr)
			
 
				 
			
 
				     async def square_handler(data: test_pb2.TestRequest, context):
			
 
				-        return test_pb2.TestResponse(number=data.number ** 2)
			
 
				+        return test_pb2.TestResponse(number=data.number**2)
			
 
				 
			
 
				     await p2p.add_protobuf_handler("square", square_handler, test_pb2.TestRequest)
			
 
				 
			
@@ -202,7 +202,7 @@ async def handle_square_stream(_, reader: asyncio.StreamReader, writer: asyncio.
 
				             except asyncio.IncompleteReadError:
			
 
				                 break
			
 
				 
			
 
				-            result = x ** 2
			
 
				+            result = x**2
			
 
				 
			
 
				             await P2P.send_raw_data(MSGPackSerializer.dumps(result), writer)
			
 
				 
			
@@ -215,7 +215,7 @@ async def validate_square_stream(reader: asyncio.StreamReader, writer: asyncio.S
 
				             await P2P.send_raw_data(MSGPackSerializer.dumps(x), writer)
			
 
				             result = MSGPackSerializer.loads(await P2P.receive_raw_data(reader))
			
 
				 
			
 
				-            assert result == x ** 2
			
 
				+            assert result == x**2
			
 
				 
			
 
				 
			
 
				 @pytest.mark.asyncio
			
--- a/tests/test_p2p_daemon_bindings.py
+++ b/tests/test_p2p_daemon_bindings.py
@@ -38,15 +38,15 @@ PAIRS_INT_SERIALIZED_VALID = (
 
				     (0, b"\x00"),
			
 
				     (1, b"\x01"),
			
 
				     (128, b"\x80\x01"),
			
 
				-    (2 ** 32, b"\x80\x80\x80\x80\x10"),
			
 
				-    (2 ** 64 - 1, b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"),
			
 
				+    (2**32, b"\x80\x80\x80\x80\x10"),
			
 
				+    (2**64 - 1, b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"),
			
 
				 )
			
 
				 
			
 
				 PAIRS_INT_SERIALIZED_OVERFLOW = (
			
 
				-    (2 ** 64, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
			
 
				-    (2 ** 64 + 1, b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
			
 
				+    (2**64, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
			
 
				+    (2**64 + 1, b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x02"),
			
 
				     (
			
 
				-        2 ** 128,
			
 
				+        2**128,
			
 
				         b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x04",
			
 
				     ),
			
 
				 )
			
@@ -94,7 +94,7 @@ async def test_write_unsigned_varint_overflow(integer):
 
				         await write_unsigned_varint(s, integer)
			
 
				 
			
 
				 
			
 
				-@pytest.mark.parametrize("integer", (-1, -(2 ** 32), -(2 ** 64), -(2 ** 128)))
			
 
				+@pytest.mark.parametrize("integer", (-1, -(2**32), -(2**64), -(2**128)))
			
 
				 @pytest.mark.asyncio
			
 
				 async def test_write_unsigned_varint_negative(integer):
			
 
				     s = MockWriter()
			
@@ -125,7 +125,7 @@ async def test_read_write_unsigned_varint_max_bits_edge(max_bits):
 
				     Test edge cases with different `max_bits`
			
 
				     """
			
 
				     for i in range(-3, 0):
			
 
				-        integer = i + (2 ** max_bits)
			
 
				+        integer = i + 2**max_bits
			
 
				         s = MockReaderWriter()
			
 
				         await write_unsigned_varint(s, integer, max_bits=max_bits)
			
 
				         s.seek(0, 0)
			
--- a/tests/test_p2p_servicer.py
+++ b/tests/test_p2p_servicer.py
@@ -21,7 +21,7 @@ async def server_client():
 
				 async def test_unary_unary(server_client):
			
 
				     class ExampleServicer(ServicerBase):
			
 
				         async def rpc_square(self, request: test_pb2.TestRequest, _context: P2PContext) -> test_pb2.TestResponse:
			
 
				-            return test_pb2.TestResponse(number=request.number ** 2)
			
 
				+            return test_pb2.TestResponse(number=request.number**2)
			
 
				 
			
 
				     server, client = server_client
			
 
				     servicer = ExampleServicer()
			
@@ -83,8 +83,8 @@ async def test_stream_stream(server_client):
 
				             self, stream: AsyncIterator[test_pb2.TestRequest], _context: P2PContext
			
 
				         ) -> AsyncIterator[test_pb2.TestResponse]:
			
 
				             async for item in stream:
			
 
				-                yield test_pb2.TestResponse(number=item.number ** 2)
			
 
				-                yield test_pb2.TestResponse(number=item.number ** 3)
			
 
				+                yield test_pb2.TestResponse(number=item.number**2)
			
 
				+                yield test_pb2.TestResponse(number=item.number**3)
			
 
				 
			
 
				     server, client = server_client
			
 
				     servicer = ExampleServicer()
			
--- a/tests/test_util_modules.py
+++ b/tests/test_util_modules.py
@@ -397,7 +397,7 @@ def test_split_parts():
 
				     chunks2 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10_000))
			
 
				     assert len(chunks2) == int(np.ceil(tensor.numel() * tensor.element_size() / 10_000))
			
 
				 
			
 
				-    chunks3 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10 ** 9))
			
 
				+    chunks3 = list(hivemind.utils.split_for_streaming(serialized_tensor_part, 10**9))
			
 
				     assert len(chunks3) == 1
			
 
				 
			
 
				     compressed_tensor_part = serialize_torch_tensor(tensor, CompressionType.FLOAT16, allow_inplace=False)
			
@@ -440,8 +440,8 @@ async def test_asyncio_utils():
 
				     assert res == list(range(len(res)))
			
 
				 
			
 
				     num_steps = 0
			
 
				-    async for elem in amap_in_executor(lambda x: x ** 2, as_aiter(*range(100)), max_prefetch=5):
			
 
				-        assert elem == num_steps ** 2
			
 
				+    async for elem in amap_in_executor(lambda x: x**2, as_aiter(*range(100)), max_prefetch=5):
			
 
				+        assert elem == num_steps**2
			
 
				         num_steps += 1
			
 
				     assert num_steps == 100