3 年之前 · 4252b44775
--- a/hivemind/optim/experimental/state_averager.py
+++ b/hivemind/optim/experimental/state_averager.py
@@ -155,9 +155,11 @@ class TrainingStateAverager(DecentralizedAverager):
 
				         params_with_grad = sum(p.numel() for p in parameters if p.requires_grad)
			
 
				         params_no_grad = sum(p.numel() for p in parameters if not p.requires_grad)
			
 
				         if params_no_grad >= params_with_grad:
			
 
				-            logging.warning("The majority of parameters have requires_grad=False, but they are still synchronized"
			
 
				-                            " with peers. If these parameters are frozen (not updated), please do not feed them into "
			
 
				-                            "the optimizer at all in order to avoid communication overhead. Proceeding anyway.")
			
 
				+            logging.warning(
			
 
				+                "The majority of parameters have requires_grad=False, but they are still synchronized"
			
 
				+                " with peers. If these parameters are frozen (not updated), please do not feed them into "
			
 
				+                "the optimizer at all in order to avoid communication overhead. Proceeding anyway."
			
 
				+            )
			
 
				 
			
 
				         return param_groups, parameters, parameter_names