2 ani în urmă · 63282afb4e
--- a/src/petals/client/config.py
+++ b/src/petals/client/config.py
@@ -27,7 +27,7 @@ class ClientConfig:
 
				 
			
 
				     max_retries: Optional[int] = DEFAULT_MAX_RETRIES  # max number of retries before an exception (default: inf)
			
 
				     min_backoff: float = 1  # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1)
			
 
				-    max_backoff: float = 60  # limit maximal sleep time between retries to this value
			
 
				+    max_backoff: float = 5  # limit maximal sleep time between retries to this value
			
 
				     ban_timeout: float = 15  # when a remote peer fails to respond, prevent routing to that peer for this many seconds
			
 
				     active_adapter: Optional[str] = None  # name of active LoRA adapter (usually, Hugging Face repo)
			
 
				 
			
--- a/src/petals/client/inference_session.py
+++ b/src/petals/client/inference_session.py
@@ -144,6 +144,12 @@ class _ServerInferenceSession:
 
				                 )
			
 
				             )
			
 
				         )
			
 
				+
			
 
				+        import random
			
 
				+
			
 
				+        if random.random() < 0.05:
			
 
				+            raise Exception("fail")
			
 
				+
			
 
				         outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors))
			
 
				         assert (
			
 
				             outputs[0].shape == inputs.shape