Răsfoiți Sursa

Try 5% fails

Aleksandr Borzunov 1 an în urmă
părinte
comite
63282afb4e
2 a modificat fișierele cu 7 adăugiri și 1 ștergeri
  1. 1 1
      src/petals/client/config.py
  2. 6 0
      src/petals/client/inference_session.py

+ 1 - 1
src/petals/client/config.py

@@ -27,7 +27,7 @@ class ClientConfig:
 
     max_retries: Optional[int] = DEFAULT_MAX_RETRIES  # max number of retries before an exception (default: inf)
     min_backoff: float = 1  # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1)
-    max_backoff: float = 60  # limit maximal sleep time between retries to this value
+    max_backoff: float = 5  # limit maximal sleep time between retries to this value
     ban_timeout: float = 15  # when a remote peer fails to respond, prevent routing to that peer for this many seconds
     active_adapter: Optional[str] = None  # name of active LoRA adapter (usually, Hugging Face repo)
 

+ 6 - 0
src/petals/client/inference_session.py

@@ -144,6 +144,12 @@ class _ServerInferenceSession:
                 )
             )
         )
+
+        import random
+
+        if random.random() < 0.05:
+            raise Exception("fail")
+
         outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors))
         assert (
             outputs[0].shape == inputs.shape