Skip to content

Commit 842df18

Browse files
kylesayrsshreyankg
authored andcommitted
[Quant] Molmo SupportsQuant (vllm-project#13336)
1 parent 4d2d0e0 commit 842df18

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

vllm/model_executor/models/molmo.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@
5252
from vllm.sequence import IntermediateTensors
5353
from vllm.utils import JSONTree, json_map_leaves
5454

55-
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP
55+
from .interfaces import (SupportsLoRA, SupportsMultiModal, SupportsPP,
56+
SupportsQuant)
5657
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
5758
is_pp_missing_parameter,
5859
make_empty_intermediate_tensors_factory, make_layers,
@@ -633,7 +634,8 @@ def forward(
633634
return hidden_states, residual
634635

635636

636-
class MolmoVisionBackbone(nn.Module):
637+
class MolmoVisionBackbone(nn.Module, SupportsQuant):
638+
packed_modules_mapping = {"merged_linear": ["gate_proj", "up_proj"]}
637639

638640
def __init__(
639641
self,
@@ -794,7 +796,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
794796

795797

796798
@support_torch_compile
797-
class MolmoModel(nn.Module):
799+
class MolmoModel(nn.Module, SupportsQuant):
798800

799801
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
800802
super().__init__()
@@ -1402,8 +1404,8 @@ def get_replacement_molmo(item_idx: int):
14021404
@MULTIMODAL_REGISTRY.register_processor(MolmoMultiModalProcessor,
14031405
info=MolmoProcessingInfo,
14041406
dummy_inputs=MolmoDummyInputsBuilder)
1405-
class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP,
1406-
SupportsLoRA):
1407+
class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA,
1408+
SupportsQuant):
14071409
hf_to_vllm_mapper = WeightsMapper(
14081410
orig_to_new_substr={
14091411
# vision backbone mapping

0 commit comments

Comments
 (0)