diff --git a/setup.py b/setup.py index c635db02e33c..e4d73a3d956f 100644 --- a/setup.py +++ b/setup.py @@ -79,14 +79,12 @@ stale_egg_info = Path(__file__).parent / "transformers.egg-info" if stale_egg_info.exists(): print( - ( - "Warning: {} exists.\n\n" - "If you recently updated transformers to 3.0 or later, this is expected,\n" - "but it may prevent transformers from installing in editable mode.\n\n" - "This directory is automatically generated by Python's packaging tools.\n" - "I will remove it now.\n\n" - "See https://github.com/pypa/pip/issues/5466 for details.\n" - ).format(stale_egg_info) + f"Warning: {stale_egg_info} exists.\n\n" + "If you recently updated transformers to 3.0 or later, this is expected,\n" + "but it may prevent transformers from installing in editable mode.\n\n" + "This directory is automatically generated by Python's packaging tools.\n" + "I will remove it now.\n\n" + "See https://github.com/pypa/pip/issues/5466 for details.\n" ) shutil.rmtree(stale_egg_info) diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index bd7f02e64cee..05caed152c6e 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -564,12 +564,6 @@ def validate(self, strict=False): ) # 1.4. Watermarking attributes if self.watermarking_config is not None: - if not (isinstance(self.watermarking_config, (WatermarkingConfig, SynthIDTextWatermarkingConfig))): - minor_issues["watermarking_config"] = ( - "`watermarking_config` as a dict is deprecated and will be removed in v4.54.0. Please construct " - "`watermarking_config` object with `WatermarkingConfig` or `SynthIDTextWatermarkingConfig` class." - ) - self.watermarking_config = WatermarkingConfig.from_dict(self.watermarking_config) self.watermarking_config.validate() # 2. Validation of attribute combinations diff --git a/src/transformers/models/bloom/modeling_bloom.py b/src/transformers/models/bloom/modeling_bloom.py index 64576b683952..699a177fc6c1 100644 --- a/src/transformers/models/bloom/modeling_bloom.py +++ b/src/transformers/models/bloom/modeling_bloom.py @@ -545,10 +545,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.word_embeddings(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py index 6c4f16ed1cb5..6ccb502766cb 100644 --- a/src/transformers/models/codegen/modeling_codegen.py +++ b/src/transformers/models/codegen/modeling_codegen.py @@ -376,10 +376,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.wte(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/cohere2/configuration_cohere2.py b/src/transformers/models/cohere2/configuration_cohere2.py index 96efb4460f34..49ddd30ce755 100644 --- a/src/transformers/models/cohere2/configuration_cohere2.py +++ b/src/transformers/models/cohere2/configuration_cohere2.py @@ -19,8 +19,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings - from ...configuration_utils import PretrainedConfig, layer_type_validation from ...modeling_rope_utils import rope_config_validation @@ -230,17 +228,5 @@ def __init__( ] layer_type_validation(self.layer_types) - @property - def sliding_window_pattern(self): - warnings.warn( - "The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.", - FutureWarning, - ) - return self._sliding_window_pattern - - @sliding_window_pattern.setter - def sliding_window_pattern(self, value): - self._sliding_window_pattern = value - __all__ = ["Cohere2Config"] diff --git a/src/transformers/models/cohere2/modular_cohere2.py b/src/transformers/models/cohere2/modular_cohere2.py index 56a72b102203..85d18429f9ce 100644 --- a/src/transformers/models/cohere2/modular_cohere2.py +++ b/src/transformers/models/cohere2/modular_cohere2.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings from typing import Callable, Optional import torch @@ -250,18 +249,6 @@ def __init__( ] layer_type_validation(self.layer_types) - @property - def sliding_window_pattern(self): - warnings.warn( - "The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.", - FutureWarning, - ) - return self._sliding_window_pattern - - @sliding_window_pattern.setter - def sliding_window_pattern(self, value): - self._sliding_window_pattern = value - class Cohere2RotaryEmbedding(CohereRotaryEmbedding): pass diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index 8517a235389d..1f80a48455fe 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -910,10 +910,6 @@ def forward( inputs_embeds = nn.functional.dropout(inputs_embeds, p=self.emb_pdrop, training=self.training) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index b13127a6f86f..78db13c42d68 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -570,15 +570,6 @@ def forward( if use_cache: if past_key_values is None: past_key_values = DynamicCache(config=self.config) - elif isinstance(past_key_values, tuple): - logger.warning_once( - "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.53.0. " - "You should pass an instance of `Cache` instead, e.g. " - "`past_key_values=DynamicCache.from_legacy_cache(past_key_values)`." - ) - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - elif past_key_values is None: - past_key_values = DynamicCache(config=self.config) if self.config.add_cross_attention and not isinstance(past_key_values, EncoderDecoderCache): past_key_values = EncoderDecoderCache(past_key_values, DynamicCache(config=self.config)) diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index 2489efaf2e95..c6e248a30fcf 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -760,10 +760,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.word_embeddings(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/gemma3/configuration_gemma3.py b/src/transformers/models/gemma3/configuration_gemma3.py index b1ec3311ba66..04e614765b1b 100644 --- a/src/transformers/models/gemma3/configuration_gemma3.py +++ b/src/transformers/models/gemma3/configuration_gemma3.py @@ -19,7 +19,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings from typing import Any, Optional, Union from ...configuration_utils import PretrainedConfig, layer_type_validation @@ -241,18 +240,6 @@ def __init__( ] layer_type_validation(self.layer_types) - @property - def sliding_window_pattern(self): - warnings.warn( - "The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.", - FutureWarning, - ) - return self._sliding_window_pattern - - @sliding_window_pattern.setter - def sliding_window_pattern(self, value): - self._sliding_window_pattern = value - class Gemma3Config(PretrainedConfig): r""" diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index fc70fa6e9d8e..95d863e57bed 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy -import warnings from collections.abc import Callable from typing import Any, Optional, Union @@ -252,18 +251,6 @@ def __init__( ] layer_type_validation(self.layer_types) - @property - def sliding_window_pattern(self): - warnings.warn( - "The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.", - FutureWarning, - ) - return self._sliding_window_pattern - - @sliding_window_pattern.setter - def sliding_window_pattern(self, value): - self._sliding_window_pattern = value - class Gemma3Config(PretrainedConfig): r""" diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 2b69cf07a046..c6436c834062 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -405,10 +405,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/glm4v/modeling_glm4v.py b/src/transformers/models/glm4v/modeling_glm4v.py index 0f2d574193fc..2f5fc91c967b 100644 --- a/src/transformers/models/glm4v/modeling_glm4v.py +++ b/src/transformers/models/glm4v/modeling_glm4v.py @@ -38,14 +38,11 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling from ...utils.generic import check_model_inputs from .configuration_glm4v import Glm4vConfig, Glm4vTextConfig, Glm4vVisionConfig -logger = logging.get_logger(__name__) - - @use_kernel_forward_from_hub("RMSNorm") class Glm4vRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): @@ -304,18 +301,7 @@ def forward( query_states, key_states, value_states = ( self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0) ) - if position_embeddings is None: - logger.warning_once( - "The attention layers in this model are transitioning from computing the RoPE embeddings internally " - "through `rotary_pos_emb` (2D tensor of RoPE theta values), to using externally computed " - "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.54 `rotary_pos_emb` will be " - "removed and `position_embeddings` will be mandatory." - ) - emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) - cos = emb.cos() - sin = emb.sin() - else: - cos, sin = position_embeddings + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb_vision(query_states, key_states, cos, sin) query_states = query_states.transpose(0, 1).unsqueeze(0) diff --git a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py index 1511ca34833b..141374b84dd9 100644 --- a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py +++ b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py @@ -38,15 +38,12 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling from ...utils.deprecation import deprecate_kwarg from ...utils.generic import check_model_inputs from .configuration_glm4v_moe import Glm4vMoeConfig, Glm4vMoeTextConfig, Glm4vMoeVisionConfig -logger = logging.get_logger(__name__) - - @use_kernel_forward_from_hub("RMSNorm") class Glm4vMoeRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): @@ -648,18 +645,7 @@ def forward( query_states, key_states, value_states = ( self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0) ) - if position_embeddings is None: - logger.warning_once( - "The attention layers in this model are transitioning from computing the RoPE embeddings internally " - "through `rotary_pos_emb` (2D tensor of RoPE theta values), to using externally computed " - "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.54 `rotary_pos_emb` will be " - "removed and `position_embeddings` will be mandatory." - ) - emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) - cos = emb.cos() - sin = emb.sin() - else: - cos, sin = position_embeddings + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb_vision(query_states, key_states, cos, sin) query_states = query_states.transpose(0, 1).unsqueeze(0) diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 9622620ca376..f7cf160cbb21 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -571,10 +571,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.wte(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index 4125946e1206..4a8dd649c99a 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -428,10 +428,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_in(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/gpt_neox/modular_gpt_neox.py b/src/transformers/models/gpt_neox/modular_gpt_neox.py index 08e8f182f9d0..1d808304c306 100644 --- a/src/transformers/models/gpt_neox/modular_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modular_gpt_neox.py @@ -306,10 +306,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_in(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py index 108a68ef70d1..e25548d90f0c 100755 --- a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py @@ -450,10 +450,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_in(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 88c66089aead..5681398972fc 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -654,10 +654,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.wte(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/granitemoe/modeling_granitemoe.py b/src/transformers/models/granitemoe/modeling_granitemoe.py index 7f864395ccb6..cb4258c1a1ac 100644 --- a/src/transformers/models/granitemoe/modeling_granitemoe.py +++ b/src/transformers/models/granitemoe/modeling_granitemoe.py @@ -673,10 +673,6 @@ def forward( inputs_embeds = inputs_embeds * self.embedding_multiplier - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py index 9bbb7b768a10..1ef28d710e2d 100644 --- a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py +++ b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py @@ -649,10 +649,6 @@ def forward( inputs_embeds = inputs_embeds * self.embedding_multiplier - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 00d449b7aaa1..6f16fe0b7871 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -1048,10 +1048,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_tokens(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/idefics2/modeling_idefics2.py b/src/transformers/models/idefics2/modeling_idefics2.py index ec3d8b64cb92..fcd2691da47a 100644 --- a/src/transformers/models/idefics2/modeling_idefics2.py +++ b/src/transformers/models/idefics2/modeling_idefics2.py @@ -1053,10 +1053,6 @@ def forward( else: raise ValueError("You have to specify either input_ids or inputs_embeds") - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py index 64a93b71c9cd..e0dfaeaf700f 100644 --- a/src/transformers/models/jetmoe/modeling_jetmoe.py +++ b/src/transformers/models/jetmoe/modeling_jetmoe.py @@ -921,10 +921,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_tokens(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py index c10a0f80acf1..10dc2b629fbf 100644 --- a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +++ b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py @@ -868,10 +868,6 @@ def forward( # embed positions hidden_states = inputs_embeds - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/layoutlm/configuration_layoutlm.py b/src/transformers/models/layoutlm/configuration_layoutlm.py index 95bc2eda6fab..18bfacb75592 100644 --- a/src/transformers/models/layoutlm/configuration_layoutlm.py +++ b/src/transformers/models/layoutlm/configuration_layoutlm.py @@ -14,7 +14,6 @@ # limitations under the License. """LayoutLM model configuration""" -import warnings from collections import OrderedDict from collections.abc import Mapping from typing import Any, Optional @@ -68,12 +67,6 @@ class LayoutLMConfig(PretrainedConfig): The epsilon used by the layer normalization layers. pad_token_id (`int`, *optional*, defaults to 0): The value used to pad input_ids. - position_embedding_type (`str`, *optional*, defaults to `"absolute"`): - Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For - positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to - [Self-Attention with Relative Position Representations (Shaw et al.)](https://huggingface.co/papers/1803.02155). - For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models - with Better Relative Position Embeddings (Huang et al.)](https://huggingface.co/papers/2009.13658). use_cache (`bool`, *optional*, defaults to `True`): Whether or not the model should return the last key/values attentions (not used by all models). Only relevant if `config.is_decoder=True`. @@ -113,7 +106,6 @@ def __init__( initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, - position_embedding_type="absolute", use_cache=True, max_2d_position_embeddings=1024, **kwargs, @@ -131,22 +123,9 @@ def __init__( self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps - self._position_embedding_type = position_embedding_type self.use_cache = use_cache self.max_2d_position_embeddings = max_2d_position_embeddings - @property - def position_embedding_type(self): - warnings.warn( - "The `position_embedding_type` attribute is deprecated and will be removed in v4.55.", - FutureWarning, - ) - return self._position_embedding_type - - @position_embedding_type.setter - def position_embedding_type(self, value): - self._position_embedding_type = value - class LayoutLMOnnxConfig(OnnxConfig): def __init__( diff --git a/src/transformers/models/markuplm/configuration_markuplm.py b/src/transformers/models/markuplm/configuration_markuplm.py index e5945cb3307b..34c2083df85a 100644 --- a/src/transformers/models/markuplm/configuration_markuplm.py +++ b/src/transformers/models/markuplm/configuration_markuplm.py @@ -14,8 +14,6 @@ # limitations under the License. """MarkupLM model configuration""" -import warnings - from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -120,7 +118,6 @@ def __init__( subs_pad_id=1001, xpath_unit_hidden_size=32, max_depth=50, - position_embedding_type="absolute", use_cache=True, classifier_dropout=None, **kwargs, @@ -143,7 +140,6 @@ def __init__( self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps - self._position_embedding_type = position_embedding_type self.use_cache = use_cache self.classifier_dropout = classifier_dropout # additional properties @@ -154,17 +150,5 @@ def __init__( self.subs_pad_id = subs_pad_id self.xpath_unit_hidden_size = xpath_unit_hidden_size - @property - def position_embedding_type(self): - warnings.warn( - "The `position_embedding_type` attribute is deprecated and will be removed in v4.55.", - FutureWarning, - ) - return self._position_embedding_type - - @position_embedding_type.setter - def position_embedding_type(self, value): - self._position_embedding_type = value - __all__ = ["MarkupLMConfig"] diff --git a/src/transformers/models/mimi/modeling_mimi.py b/src/transformers/models/mimi/modeling_mimi.py index b14f8217c71d..119f4a4d1afb 100644 --- a/src/transformers/models/mimi/modeling_mimi.py +++ b/src/transformers/models/mimi/modeling_mimi.py @@ -1098,10 +1098,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index df254675450d..27c08626115d 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -1275,10 +1275,6 @@ def forward( # embed positions hidden_states = inputs_embeds - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/olmoe/modeling_olmoe.py b/src/transformers/models/olmoe/modeling_olmoe.py index 0b427f9bef19..277d601df04b 100644 --- a/src/transformers/models/olmoe/modeling_olmoe.py +++ b/src/transformers/models/olmoe/modeling_olmoe.py @@ -788,10 +788,6 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_tokens(input_ids) - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index e6ac1d8311fc..4c7cfd236ac5 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -472,10 +472,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/phimoe/modeling_phimoe.py b/src/transformers/models/phimoe/modeling_phimoe.py index ec311754efab..295af2a6c736 100644 --- a/src/transformers/models/phimoe/modeling_phimoe.py +++ b/src/transformers/models/phimoe/modeling_phimoe.py @@ -974,10 +974,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index fd67771f542e..1e5065fa23c4 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -205,18 +205,7 @@ def forward( query_states, key_states, value_states = ( self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0) ) - if position_embeddings is None: - logger.warning_once( - "The attention layers in this model are transitioning from computing the RoPE embeddings internally " - "through `rotary_pos_emb` (2D tensor of RoPE theta values), to using externally computed " - "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.54 `rotary_pos_emb` will be " - "removed and `position_embeddings` will be mandatory." - ) - emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) - cos = emb.cos() - sin = emb.sin() - else: - cos, sin = position_embeddings + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb_vision(query_states, key_states, cos, sin) query_states = query_states.transpose(0, 1).unsqueeze(0) diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 238ca1fee4cb..f9540485e656 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -823,10 +823,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py index 745829856db0..bc173085a6dc 100644 --- a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py @@ -347,18 +347,7 @@ def forward( query_states, key_states, value_states = ( self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0) ) - if position_embeddings is None: - logger.warning_once( - "The attention layers in this model are transitioning from computing the RoPE embeddings internally " - "through `rotary_pos_emb` (2D tensor of RoPE theta values), to using externally computed " - "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.54 `rotary_pos_emb` will be " - "removed and `position_embeddings` will be mandatory." - ) - emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1) - cos = emb.cos() - sin = emb.sin() - else: - cos, sin = position_embeddings + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb_vision(query_states, key_states, cos, sin) query_states = query_states.transpose(0, 1).unsqueeze(0) diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 69cf41bcfdd8..5413f54ee584 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -700,10 +700,6 @@ def forward( ) use_cache = False - # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache - if not isinstance(past_key_values, (type(None), Cache)): - raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.") - if use_cache and past_key_values is None: past_key_values = DynamicCache(config=self.config) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index b0c4bace52a3..38d092e9aa66 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -141,9 +141,6 @@ is_spqr_available, is_sudachi_available, is_sudachi_projection_available, - is_tensorflow_probability_available, - is_tensorflow_text_available, - is_tf2onnx_available, is_tf_available, is_tiktoken_available, is_timm_available, @@ -514,14 +511,6 @@ def require_jinja(test_case): return unittest.skipUnless(is_jinja_available(), "test requires jinja")(test_case) -def require_tf2onnx(test_case): - logger.warning_once( - "TensorFlow test-related code, including `require_tf2onnx`, is deprecated and will be removed in " - "Transformers v4.55" - ) - return unittest.skipUnless(is_tf2onnx_available(), "test requires tf2onnx")(test_case) - - def require_onnx(test_case): return unittest.skipUnless(is_onnx_available(), "test requires ONNX")(test_case) @@ -716,22 +705,6 @@ def require_intel_extension_for_pytorch(test_case): )(test_case) -def require_tensorflow_probability(test_case): - """ - Decorator marking a test that requires TensorFlow probability. - - These tests are skipped when TensorFlow probability isn't installed. - - """ - logger.warning_once( - "TensorFlow test-related code, including `require_tensorflow_probability`, is deprecated and will be " - "removed in Transformers v4.55" - ) - return unittest.skipUnless(is_tensorflow_probability_available(), "test requires TensorFlow probability")( - test_case - ) - - def require_torchaudio(test_case): """ Decorator marking a test that requires torchaudio. These tests are skipped when torchaudio isn't installed. @@ -739,26 +712,6 @@ def require_torchaudio(test_case): return unittest.skipUnless(is_torchaudio_available(), "test requires torchaudio")(test_case) -def require_tf(test_case): - """ - Decorator marking a test that requires TensorFlow. These tests are skipped when TensorFlow isn't installed. - """ - logger.warning_once( - "TensorFlow test-related code, including `require_tf`, is deprecated and will be removed in Transformers v4.55" - ) - return unittest.skipUnless(is_tf_available(), "test requires TensorFlow")(test_case) - - -def require_flax(test_case): - """ - Decorator marking a test that requires JAX & Flax. These tests are skipped when one / both are not installed - """ - logger.warning_once( - "JAX test-related code, including `require_flax`, is deprecated and will be removed in Transformers v4.55" - ) - return unittest.skipUnless(is_flax_available(), "test requires JAX & Flax")(test_case) - - def require_sentencepiece(test_case): """ Decorator marking a test that requires SentencePiece. These tests are skipped when SentencePiece isn't installed. @@ -794,18 +747,6 @@ def require_tokenizers(test_case): return unittest.skipUnless(is_tokenizers_available(), "test requires tokenizers")(test_case) -def require_tensorflow_text(test_case): - """ - Decorator marking a test that requires tensorflow_text. These tests are skipped when tensroflow_text isn't - installed. - """ - logger.warning_once( - "TensorFlow test-related code, including `require_tensorflow_text`, is deprecated and will be " - "removed in Transformers v4.55" - ) - return unittest.skipUnless(is_tensorflow_text_available(), "test requires tensorflow_text")(test_case) - - def require_keras_nlp(test_case): """ Decorator marking a test that requires keras_nlp. These tests are skipped when keras_nlp isn't installed. diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index 34bf0511a320..22c7f67972ee 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -303,9 +303,6 @@ "local_attention", "local_rope_theta", ], - # position_embedding_type not used and deprecated. Should be deleted in v4.55 - "LayoutLMConfig": ["position_embedding_type"], - "MarkupLMConfig": ["position_embedding_type"], "SmolLM3Config": ["no_rope_layer_interval"], "Gemma3nVisionConfig": ["architecture", "do_pooling", "model_args"], # this is for use in `timm` }