justheuristic 5 éve
szülő
commit
b17f0259c6
2 módosított fájl, 1 hozzáadás és 3 törlés
  1. 0 2
      tesseract/client/moe.py
  2. 1 1
      tests/test_moe.py

+ 0 - 2
tesseract/client/moe.py

@@ -203,8 +203,6 @@ class _RemoteMoECall(torch.autograd.Function):
         flat_average_outputs = tuple(dot_along_first_axis(alive_expert_probs, stacked_out)
                                      for stacked_out in stacked_alive_outputs)
 
-        print(f'ours {[flat_average_outputs[0].min(), flat_average_outputs[0].max(), flat_average_outputs[0].norm()]}')
-
         # 3. save individual outputs for backward pass
         ctx.save_for_backward(expert_logits, alive_ix, alive_expert_probs, *stacked_alive_outputs)
         ctx._alive_contexts = alive_contexts

+ 1 - 1
tests/test_moe.py

@@ -12,7 +12,7 @@ def test_remote_module_call():
     timeout_total = None
     backward_timeout = None
     rtol = 1e-3
-    atol = 1e-5
+    atol = 1e-6
 
     xx = torch.randn(32, 1024, requires_grad=True)
     logits = torch.randn(3, requires_grad=True)