浏览代码

Require bitsandbytes == 0.38.0.post2, hivemind == 1.1.7 (#302)

In particular, this PR fixes 8-bit support on nvidia16 GPUs (such as 1660) by including https://github.com/TimDettmers/bitsandbytes/pull/292. This support was requested multiple times on Discord.
Alexander Borzunov 2 年之前
父节点
当前提交
35662b4a16
共有 3 个文件被更改,包括 4 次插入4 次删除
  1. 2 2
      setup.cfg
  2. 1 1
      src/petals/cli/run_server.py
  3. 1 1
      src/petals/client/remote_model.py

+ 2 - 2
setup.cfg

@@ -32,12 +32,12 @@ packages = find:
 python_requires = >=3.7
 install_requires =
     torch>=1.12
-    bitsandbytes==0.37.1
+    bitsandbytes==0.38.0.post2
     accelerate==0.15.0
     huggingface-hub==0.11.1
     transformers>=4.25.1,<5.0.0
     speedtest-cli==2.1.3
-    hivemind @ git+https://github.com/learning-at-home/hivemind.git
+    hivemind==1.1.7
     tensor_parallel==1.0.23
     humanfriendly
     async-timeout>=4.0.2

+ 1 - 1
src/petals/cli/run_server.py

@@ -47,7 +47,7 @@ def main():
     parser.add_argument('--announce_maddrs', nargs='+', required=False,
                         help='Visible multiaddrs the host announces for external connections from other peers')
 
-    parser.add_argument('--daemon_startup_timeout', type=float, default=120,
+    parser.add_argument('--daemon_startup_timeout', type=float, default=60,
                         help='Timeout for the libp2p daemon connecting to initial peers')
 
     parser.add_argument('--compression', type=str, default='NONE', required=False, help='Tensor compression communication')

+ 1 - 1
src/petals/client/remote_model.py

@@ -32,7 +32,7 @@ class DistributedBloomConfig(BloomConfig):
 
     initial_peers: List[str] = PUBLIC_INITIAL_PEERS  # a list of initial peers for hivemind DHT
     dht_prefix: str  # a prefix for all dht keys that correspond to this model (usually equal to model name)
-    daemon_startup_timeout: int = 120  # timeout for the libp2p daemon connecting to initial peers
+    daemon_startup_timeout: int = 60  # timeout for the libp2p daemon connecting to initial peers
     dht: Optional[hivemind.DHT] = None  # a running DHT instance, e.g. when using the same DHT for multiple models
     request_timeout: int = 3 * 60  # a number of seconds for waiting result from each node
     max_retries: Optional[int] = None  # max number retries before the client raises an exception (default: inf)