quic
diff --git a/‎QEfficient/transformers/models/modeling_auto.py‎
Lines changed: 7 additions & 3 deletions b/‎QEfficient/transformers/models/modeling_auto.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎QEfficient/transformers/models/pytorch_transforms.py‎
Lines changed: 21 additions & 0 deletions b/‎QEfficient/transformers/models/pytorch_transforms.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎QEfficient/transformers/models/qwen2_5_vl/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎QEfficient/transformers/models/qwen2_5_vl/__init__.py‎
Lines changed: 6 additions & 0 deletions
@@ -817,9 +817,13 @@ def kv_offload_generate(
         vision_end = perf_counter()
 
         lang_inputs = {k: v for k, v in inputs.items() if k not in vision_inputs}
-        lang_inputs["position_ids"] = np.where(
-            lang_inputs.pop("attention_mask"), np.arange(padded_len), -1
-        )  # Need to use -1 as position_ids for invalid tokens
+
+        if "position_ids" in inputs:
+            lang_inputs["position_ids"] = inputs["position_ids"]
+        else:
+            lang_inputs["position_ids"] = np.where(
+                lang_inputs.pop("attention_mask"), np.arange(padded_len), -1
+            )  # Need to use -1 as position_ids for invalid tokens
 
         not_mllama = hasattr(self.model.config, "model_type") and self.model.config.model_type != "mllama"
         if not_mllama:
 
@@ -136,6 +136,15 @@
     Qwen2Model,
     Qwen2RMSNorm,
 )
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
+    Qwen2_5_VLAttention,
+    Qwen2_5_VLDecoderLayer,
+    Qwen2_5_VLForConditionalGeneration,
+    Qwen2_5_VLModel,
+)
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
+    Qwen2RMSNorm as Qwen2_5RMSNorm,
+)
 from transformers.models.starcoder2.modeling_starcoder2 import (
     Starcoder2Attention,
     Starcoder2DecoderLayer,
@@ -303,6 +312,12 @@
     QEffQwen2ForCausalLM,
     QEffQwen2Model,
 )
+from QEfficient.transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
+    QEffQwen2_5_VLAttention,
+    QEffQwen2_5_VLDecoderLayer,
+    QEffQwen2_5_VLModel,
+    QEffQwen_2_5_vl_ForConditionalGeneration,
+)
 from QEfficient.transformers.models.starcoder2.modeling_starcoder2 import (
     QEffStarcoder2Attention,
     QEFFStarcoder2DecoderLayer,
@@ -335,6 +350,7 @@ class CustomOpsTransform(ModuleMappingTransform):
         MixtralRMSNorm: CustomRMSNormAIC,
         Phi3RMSNorm: CustomRMSNormAIC,
         Qwen2RMSNorm: CustomRMSNormAIC,
+        Qwen2_5RMSNorm: CustomRMSNormAIC,
         MllamaTextRMSNorm: CustomRMSNormAIC,
         GraniteRMSNorm: CustomRMSNormAIC,
         GraniteMoeRMSNorm: CustomRMSNormAIC,
@@ -452,6 +468,11 @@ class KVCacheTransform(ModuleMappingTransform):
         Qwen2DecoderLayer: QEffQwen2DecoderLayer,
         Qwen2Model: QEffQwen2Model,
         Qwen2ForCausalLM: QEffQwen2ForCausalLM,
+        # Qwen2.5 VL
+        Qwen2_5_VLForConditionalGeneration: QEffQwen_2_5_vl_ForConditionalGeneration,
+        Qwen2_5_VLModel: QEffQwen2_5_VLModel,
+        Qwen2_5_VLAttention: QEffQwen2_5_VLAttention,
+        Qwen2_5_VLDecoderLayer: QEffQwen2_5_VLDecoderLayer,
         # Starcoder2
         Starcoder2Attention: QEffStarcoder2Attention,
         Starcoder2DecoderLayer: QEFFStarcoder2DecoderLayer,
 
@@ -0,0 +1,6 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------