Răsfoiți Sursa

Remove cpufeature from setup.cfg

Aleksandr Borzunov 2 ani în urmă
părinte
comite
84bcc8090c
3 a modificat fișierele cu 1 adăugiri și 13 ștergeri
  1. 0 1
      setup.cfg
  2. 0 10
      src/petals/bloom/modeling_utils.py
  3. 1 2
      src/petals/client/remote_model.py

+ 0 - 1
setup.cfg

@@ -41,7 +41,6 @@ install_requires =
     tensor_parallel==1.0.23
     humanfriendly
     async-timeout>=4.0.2
-    cpufeature>=0.2.0
     packaging>=20.9
 
 [options.extras_require]

+ 0 - 10
src/petals/bloom/modeling_utils.py

@@ -29,16 +29,6 @@ class LMHead(nn.Module):
         self.word_embeddings = word_embeddings
 
         self.use_chunked_forward = config.use_chunked_forward
-        if self.use_chunked_forward == "auto":
-            if platform.machine() == "x86_64":
-                # Import of cpufeature may crash on non-x86_64 machines
-                from cpufeature import CPUFeature
-
-                # If the CPU supports AVX512, plain bfloat16 is ~10x faster than chunked_forward().
-                # Otherwise, it's ~8x slower.
-                self.use_chunked_forward = not (CPUFeature["AVX512f"] and CPUFeature["OS_AVX512"])
-            else:
-                self.use_chunked_forward = True
         self.chunked_forward_step = config.chunked_forward_step
         self._bf16_warning_shown = False
 

+ 1 - 2
src/petals/client/remote_model.py

@@ -44,8 +44,7 @@ class DistributedBloomConfig(BloomConfig):
     tuning_mode: Optional[str] = None  # One of the finetune options: [None, 'shallow_ptune', 'deep_ptune', 'adapters']
 
     # This settings matter for running the client with dtype bfloat16 on CPU.
-    # If the CPU doesn't support AVX512, chunked_forward() significantly speeds up computations.
-    use_chunked_forward: Union[str, bool] = "auto"
+    use_chunked_forward: bool = True
     chunked_forward_step: int = 16384