Skip to content
7 changes: 5 additions & 2 deletions vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,8 +995,11 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
for sub_name in sub_modules:
self.target_modules.append(
name.replace(last_name, sub_name))
else:
self.target_modules.append(name)
# Add original module name even if the module has stacked map,
# in case model has a mixture of disk-merged and disk-splitted
# weights with same last name.
self.target_modules.append(name)

assert (self.target_modules
), "vllm currently does not support BNB quantization for"
f" {type(model).__name__}"
Expand Down
Loading