Skip to content

Commit c6f7151

Browse files
Mohit SoniMohit Soni
authored andcommitted
[Qwen2_5_vl] - Onboarding Qwen2_5_vl model in QEfficient
Signed-off-by: Mohit Soni <[email protected]> Signed-off-by: Mohit Soni <[email protected]>
1 parent faab245 commit c6f7151

File tree

5 files changed

+721
-3
lines changed

5 files changed

+721
-3
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,13 @@ def kv_offload_generate(
817817
vision_end = perf_counter()
818818

819819
lang_inputs = {k: v for k, v in inputs.items() if k not in vision_inputs}
820-
lang_inputs["position_ids"] = np.where(
821-
lang_inputs.pop("attention_mask"), np.arange(padded_len), -1
822-
) # Need to use -1 as position_ids for invalid tokens
820+
821+
if "position_ids" in inputs:
822+
lang_inputs["position_ids"] = inputs["position_ids"]
823+
else:
824+
lang_inputs["position_ids"] = np.where(
825+
lang_inputs.pop("attention_mask"), np.arange(padded_len), -1
826+
) # Need to use -1 as position_ids for invalid tokens
823827

824828
not_mllama = hasattr(self.model.config, "model_type") and self.model.config.model_type != "mllama"
825829
if not_mllama:

QEfficient/transformers/models/pytorch_transforms.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,15 @@
136136
Qwen2Model,
137137
Qwen2RMSNorm,
138138
)
139+
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
140+
Qwen2_5_VLAttention,
141+
Qwen2_5_VLDecoderLayer,
142+
Qwen2_5_VLForConditionalGeneration,
143+
Qwen2_5_VLModel,
144+
)
145+
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
146+
Qwen2RMSNorm as Qwen2_5RMSNorm,
147+
)
139148
from transformers.models.starcoder2.modeling_starcoder2 import (
140149
Starcoder2Attention,
141150
Starcoder2DecoderLayer,
@@ -303,6 +312,12 @@
303312
QEffQwen2ForCausalLM,
304313
QEffQwen2Model,
305314
)
315+
from QEfficient.transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
316+
QEffQwen2_5_VLAttention,
317+
QEffQwen2_5_VLDecoderLayer,
318+
QEffQwen2_5_VLModel,
319+
QEffQwen_2_5_vl_ForConditionalGeneration,
320+
)
306321
from QEfficient.transformers.models.starcoder2.modeling_starcoder2 import (
307322
QEffStarcoder2Attention,
308323
QEFFStarcoder2DecoderLayer,
@@ -335,6 +350,7 @@ class CustomOpsTransform(ModuleMappingTransform):
335350
MixtralRMSNorm: CustomRMSNormAIC,
336351
Phi3RMSNorm: CustomRMSNormAIC,
337352
Qwen2RMSNorm: CustomRMSNormAIC,
353+
Qwen2_5RMSNorm: CustomRMSNormAIC,
338354
MllamaTextRMSNorm: CustomRMSNormAIC,
339355
GraniteRMSNorm: CustomRMSNormAIC,
340356
GraniteMoeRMSNorm: CustomRMSNormAIC,
@@ -452,6 +468,11 @@ class KVCacheTransform(ModuleMappingTransform):
452468
Qwen2DecoderLayer: QEffQwen2DecoderLayer,
453469
Qwen2Model: QEffQwen2Model,
454470
Qwen2ForCausalLM: QEffQwen2ForCausalLM,
471+
# Qwen2.5 VL
472+
Qwen2_5_VLForConditionalGeneration: QEffQwen_2_5_vl_ForConditionalGeneration,
473+
Qwen2_5_VLModel: QEffQwen2_5_VLModel,
474+
Qwen2_5_VLAttention: QEffQwen2_5_VLAttention,
475+
Qwen2_5_VLDecoderLayer: QEffQwen2_5_VLDecoderLayer,
455476
# Starcoder2
456477
Starcoder2Attention: QEffStarcoder2Attention,
457478
Starcoder2DecoderLayer: QEFFStarcoder2DecoderLayer,
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)