4 years ago · 025e095d55
--- a/hivemind/optim/collaborative.py
+++ b/hivemind/optim/collaborative.py
@@ -263,7 +263,7 @@ class CollaborativeOptimizer(DecentralizedOptimizerBase):
 
															         with self.lock_local_progress:
														
 
															             self.local_samples_accumulated += batch_size
														
 
															             self.local_updates_accumulated += 1
														
 
															-            self.performance_ema.update(num_processed=batch_size)
														
 
															+            self.performance_ema.update(task_size=batch_size)
														
 
															             self.should_report_progress.set()
														
 
															         if not self.collaboration_state.ready_for_step:
														
--- a/hivemind/optim/performance_ema.py
+++ b/hivemind/optim/performance_ema.py
@@ -1,6 +1,7 @@
 
															+import time
														
 
															 from contextlib import contextmanager
														
 
															-
														
 
															-from hivemind.utils import get_dht_time
														
 
															+from threading import Lock
														
 
															+from typing import Optional
														
 
															 class PerformanceEMA:
														
@@ -9,22 +10,28 @@ class PerformanceEMA:
 
															     :param alpha: Smoothing factor in range [0, 1], [default: 0.1].
														
 
															     """
														
 
															-    def __init__(self, alpha: float = 0.1, eps: float = 1e-20):
														
 
															+    def __init__(self, alpha: float = 0.1, eps: float = 1e-20, paused: bool = False):
														
 
															         self.alpha, self.eps, self.num_updates = alpha, eps, 0
														
 
															         self.ema_seconds_per_sample, self.samples_per_second = 0, eps
														
 
															-        self.timestamp = get_dht_time()
														
 
															-        self.paused = False
														
 
															+        self.timestamp = time.perf_counter()
														
 
															+        self.paused = paused
														
 
															+        self.lock = Lock()
														
 
															-    def update(self, num_processed: int) -> float:
														
 
															+    def update(self, task_size: float, interval: Optional[float] = None) -> float:
														
 
															         """
														
 
															-        :param num_processed: how many items were processed since last call
														
 
															+        :param task_size: how many items were processed since last call
														
 
															+        :param interval: optionally provide the time delta it took to process this task
														
 
															         :returns: current estimate of performance (samples per second), but at most
														
 
															         """
														
 
															-        assert not self.paused, "PerformanceEMA is currently paused"
														
 
															-        assert num_processed > 0, f"Can't register processing {num_processed} samples"
														
 
															-        self.timestamp, old_timestamp = get_dht_time(), self.timestamp
														
 
															-        seconds_per_sample = max(0, self.timestamp - old_timestamp) / num_processed
														
 
															-        self.ema_seconds_per_sample = self.alpha * seconds_per_sample + (1 - self.alpha) * self.ema_seconds_per_sample
														
 
															+        assert task_size > 0, f"Can't register processing {task_size} samples"
														
 
															+        if not self.paused:
														
 
															+            self.timestamp, old_timestamp = time.perf_counter(), self.timestamp
														
 
															+            interval = interval if interval is not None else self.timestamp - old_timestamp
														
 
															+        else:
														
 
															+            assert interval is not None, "If PerformanceEMA is paused, please specify the time interval"
														
 
															+        self.ema_seconds_per_sample = (
														
 
															+            self.alpha * interval / task_size + (1 - self.alpha) * self.ema_seconds_per_sample
														
 
															+        )
														
 
															         self.num_updates += 1
														
 
															         adjusted_seconds_per_sample = self.ema_seconds_per_sample / (1 - (1 - self.alpha) ** self.num_updates)
														
 
															         self.samples_per_second = 1 / max(adjusted_seconds_per_sample, self.eps)
														
@@ -37,5 +44,23 @@ class PerformanceEMA:
 
															         try:
														
 
															             yield
														
 
															         finally:
														
 
															-            self.timestamp = get_dht_time()
														
 
															+            self.timestamp = time.perf_counter()
														
 
															             self.paused = was_paused
														
 
															+
														
 
															+    def __repr__(self):
														
 
															+        return f"{self.__class__.__name__}(ema={self.samples_per_second:.5f}, num_updates={self.num_updates})"
														
 
															+
														
 
															+    @contextmanager
														
 
															+    def update_threadsafe(self, task_size: float):
														
 
															+        """
														
 
															+        Update the EMA throughput of a code that runs inside the context manager, supports multiple concurrent threads.
														
 
															+
														
 
															+        :param task_size: how many items were processed since last call
														
 
															+        """
														
 
															+        start_timestamp = time.perf_counter()
														
 
															+        yield
														
 
															+        with self.lock:
														
 
															+            self.update(task_size, interval=time.perf_counter() - max(start_timestamp, self.timestamp))
														
 
															+            # note: we define interval as such to support two distinct scenarios:
														
 
															+            # (1) if this is the first call to measure_threadsafe after a pause, count time from entering this context
														
 
															+            # (2) if there are concurrent calls to measure_threadsafe, respect the timestamp updates from these calls
														
--- a/tests/test_util_modules.py
+++ b/tests/test_util_modules.py
@@ -3,6 +3,7 @@ import concurrent.futures
 
															 import multiprocessing as mp
														
 
															 import random
														
 
															 import time
														
 
															+from concurrent.futures import ThreadPoolExecutor
														
 
															 import numpy as np
														
 
															 import pytest
														
@@ -10,6 +11,7 @@ import torch
 
															 import hivemind
														
 
															 from hivemind.compression import deserialize_torch_tensor, serialize_torch_tensor
														
 
															+from hivemind.optim.performance_ema import PerformanceEMA
														
 
															 from hivemind.proto.dht_pb2_grpc import DHTStub
														
 
															 from hivemind.proto.runtime_pb2 import CompressionType
														
 
															 from hivemind.proto.runtime_pb2_grpc import ConnectionHandlerStub
														
@@ -549,3 +551,29 @@ def test_batch_tensor_descriptor_msgpack():
 
															         and tensor_descr.pin_memory == tensor_descr.pin_memory
														
 
															         and tensor_descr.compression == tensor_descr.compression
														
 
															     )
														
 
															+
														
 
															+
														
 
															+@pytest.mark.parametrize("max_workers", [1, 2, 10])
														
 
															+def test_performance_ema_threadsafe(
														
 
															+    max_workers: int,
														
 
															+    interval: float = 0.01,
														
 
															+    num_updates: int = 100,
														
 
															+    alpha: float = 0.05,
														
 
															+    bias_power: float = 0.7,
														
 
															+    tolerance: float = 0.05,
														
 
															+):
														
 
															+    def run_task(ema):
														
 
															+        task_size = random.randint(1, 4)
														
 
															+        with ema.update_threadsafe(task_size):
														
 
															+            time.sleep(task_size * interval * (0.9 + 0.2 * random.random()))
														
 
															+            return task_size
														
 
															+
														
 
															+    with ThreadPoolExecutor(max_workers) as pool:
														
 
															+        ema = PerformanceEMA(alpha=alpha)
														
 
															+        start_time = time.perf_counter()
														
 
															+        futures = [pool.submit(run_task, ema) for i in range(num_updates)]
														
 
															+        total_size = sum(future.result() for future in futures)
														
 
															+        end_time = time.perf_counter()
														
 
															+        target = total_size / (end_time - start_time)
														
 
															+        assert ema.samples_per_second >= (1 - tolerance) * target * max_workers ** (bias_power - 1)
														
 
															+        assert ema.samples_per_second <= (1 + tolerance) * target