justheuristic 5 年之前
父节点
当前提交
f1589d5653
共有 1 个文件被更改,包括 1 次插入1 次删除
  1. 1 1
      tesseract/client/moe.py

+ 1 - 1
tesseract/client/moe.py

@@ -84,7 +84,7 @@ class RemoteMixtureOfExperts(nn.Module):
             for i in range(len(input))
         )
 
-        averaged_outputs_flat = map_with_parallel_backward(_RemoteMoECall, *batch_jobs_args)
+        averaged_outputs_flat = map(torch.cat, map_with_parallel_backward(_RemoteMoECall, *batch_jobs_args)
         return nested_pack(averaged_outputs_flat, self.outputs_schema)
 
     def beam_search(self, grid_scores: List[torch.Tensor], k_best: int, **kwargs) -> List[List[RemoteExpert]]: