|
@@ -436,30 +436,31 @@ class ModuleContainer(threading.Thread):
|
|
|
|
|
|
blocks = {}
|
|
|
try:
|
|
|
+ module_uid, block_index = module_uids[0], block_indices[0]
|
|
|
+ block = load_pretrained_block(
|
|
|
+ converted_model_name_or_path,
|
|
|
+ block_index,
|
|
|
+ config=block_config,
|
|
|
+ torch_dtype=torch_dtype,
|
|
|
+ revision=revision,
|
|
|
+ token=token,
|
|
|
+ cache_dir=cache_dir,
|
|
|
+ max_disk_space=max_disk_space,
|
|
|
+ )
|
|
|
+ block = convert_block(
|
|
|
+ block,
|
|
|
+ block_index,
|
|
|
+ block_config,
|
|
|
+ tensor_parallel_devices,
|
|
|
+ device,
|
|
|
+ quant_type,
|
|
|
+ adapters=server_info.adapters,
|
|
|
+ freeze=True,
|
|
|
+ token=token,
|
|
|
+ cache_dir=cache_dir,
|
|
|
+ max_disk_space=max_disk_space,
|
|
|
+ )
|
|
|
for module_uid, block_index in zip(module_uids, block_indices):
|
|
|
- block = load_pretrained_block(
|
|
|
- converted_model_name_or_path,
|
|
|
- block_index,
|
|
|
- config=block_config,
|
|
|
- torch_dtype=torch_dtype,
|
|
|
- revision=revision,
|
|
|
- token=token,
|
|
|
- cache_dir=cache_dir,
|
|
|
- max_disk_space=max_disk_space,
|
|
|
- )
|
|
|
- block = convert_block(
|
|
|
- block,
|
|
|
- block_index,
|
|
|
- block_config,
|
|
|
- tensor_parallel_devices,
|
|
|
- device,
|
|
|
- quant_type,
|
|
|
- adapters=server_info.adapters,
|
|
|
- freeze=True,
|
|
|
- token=token,
|
|
|
- cache_dir=cache_dir,
|
|
|
- max_disk_space=max_disk_space,
|
|
|
- )
|
|
|
blocks[module_uid] = TransformerBackend(
|
|
|
module_uid,
|
|
|
block,
|