瀏覽代碼

Fix dummy cache allocation (#574)

* Fix dummy cache allocation

* Try mps device selecting

* Rechain reloc
Artem Chumachenko 1 年之前
父節點
當前提交
30f522d1a0
共有 1 個文件被更改,包括 1 次插入1 次删除
  1. 1 1
      src/petals/server/throughput.py

+ 1 - 1
src/petals/server/throughput.py

@@ -206,7 +206,7 @@ def measure_compute_rps(
         block = block.to(dtype)
         block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True)
 
-        cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype))
+        cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device))
         elapsed = 0
         dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)