浏览代码

grad logits wrt actual logits

justheuristic 5 年之前
父节点
当前提交
b20f3ee985
共有 1 个文件被更改,包括 1 次插入1 次删除
  1. 1 1
      tests/test_moe.py

+ 1 - 1
tests/test_moe.py

@@ -20,7 +20,7 @@ def test_remote_module_call():
         [(None,), {}], xx)
 
     grad_xx_moe, = torch.autograd.grad(torch.sum(random_proj * moe_output), xx, retain_graph=True)
-    grad_logits_moe, = torch.autograd.grad(torch.sum(random_proj * moe_output), xx, retain_graph=True)
+    grad_logits_moe, = torch.autograd.grad(torch.sum(random_proj * moe_output), logits, retain_graph=True)
 
     # reference outputs: call all experts manually and average their outputs with softmax probabilities
     probs = torch.softmax(logits, 0)