|
52 | 52 | from vllm.sequence import IntermediateTensors
|
53 | 53 | from vllm.utils import JSONTree, json_map_leaves
|
54 | 54 |
|
55 |
| -from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP |
| 55 | +from .interfaces import (SupportsLoRA, SupportsMultiModal, SupportsPP, |
| 56 | + SupportsQuant) |
56 | 57 | from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
|
57 | 58 | is_pp_missing_parameter,
|
58 | 59 | make_empty_intermediate_tensors_factory, make_layers,
|
@@ -633,7 +634,8 @@ def forward(
|
633 | 634 | return hidden_states, residual
|
634 | 635 |
|
635 | 636 |
|
636 |
| -class MolmoVisionBackbone(nn.Module): |
| 637 | +class MolmoVisionBackbone(nn.Module, SupportsQuant): |
| 638 | + packed_modules_mapping = {"merged_linear": ["gate_proj", "up_proj"]} |
637 | 639 |
|
638 | 640 | def __init__(
|
639 | 641 | self,
|
@@ -794,7 +796,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
|
794 | 796 |
|
795 | 797 |
|
796 | 798 | @support_torch_compile
|
797 |
| -class MolmoModel(nn.Module): |
| 799 | +class MolmoModel(nn.Module, SupportsQuant): |
798 | 800 |
|
799 | 801 | def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
800 | 802 | super().__init__()
|
@@ -1402,8 +1404,8 @@ def get_replacement_molmo(item_idx: int):
|
1402 | 1404 | @MULTIMODAL_REGISTRY.register_processor(MolmoMultiModalProcessor,
|
1403 | 1405 | info=MolmoProcessingInfo,
|
1404 | 1406 | dummy_inputs=MolmoDummyInputsBuilder)
|
1405 |
| -class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, |
1406 |
| - SupportsLoRA): |
| 1407 | +class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, |
| 1408 | + SupportsQuant): |
1407 | 1409 | hf_to_vllm_mapper = WeightsMapper(
|
1408 | 1410 | orig_to_new_substr={
|
1409 | 1411 | # vision backbone mapping
|
|
0 commit comments