Эх сурвалжийг харах

Fix "too many open files" issue (#444)

Using memory sharing strategy not relying on file descriptors
Michael Diskin 3 жил өмнө
parent
commit
aa6d65a818

+ 2 - 0
hivemind/utils/mpfuture.py

@@ -18,6 +18,8 @@ from hivemind.utils.logging import get_logger
 
 logger = get_logger(__name__)
 
+torch.multiprocessing.set_sharing_strategy(os.environ.get("HIVEMIND_MEMORY_SHARING_STRATEGY", "file_system"))
+
 # flavour types
 ResultType = TypeVar("ResultType")
 PID, UID, State, PipeEnd = int, int, str, mp.connection.Connection

+ 3 - 0
tests/test_util_modules.py

@@ -313,6 +313,8 @@ def test_many_futures():
     p.start()
 
     some_fork_futures = receiver.recv()
+
+    time.sleep(0.1)  # giving enough time for the futures to be destroyed
     assert len(hivemind.MPFuture._active_futures) == 700
 
     for future in some_fork_futures:
@@ -323,6 +325,7 @@ def test_many_futures():
     evt.set()
     for future in main_futures:
         future.cancel()
+    time.sleep(0.1)  # giving enough time for the futures to be destroyed
     assert len(hivemind.MPFuture._active_futures) == 0
     p.join()