Skip to content

Commit 9ab6078

Browse files
authored
remove gemmas eager training warning (#40744)
* removed warning * removed remaining warnings
1 parent 2a1eb5b commit 9ab6078

File tree

6 files changed

+1
-40
lines changed

6 files changed

+1
-40
lines changed

src/transformers/models/gemma2/modeling_gemma2.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,11 +534,6 @@ def forward(
534534
"What is your favorite condiment?"
535535
```"""
536536

537-
if self.training and self.config._attn_implementation != "eager":
538-
logger.warning_once(
539-
"It is strongly recommended to train Gemma2 models with the `eager` attention implementation "
540-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
541-
)
542537
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
543538
output_hidden_states = (
544539
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

src/transformers/models/gemma2/modular_gemma2.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -520,11 +520,6 @@ def forward(
520520
"What is your favorite condiment?"
521521
```"""
522522

523-
if self.training and self.config._attn_implementation != "eager":
524-
logger.warning_once(
525-
"It is strongly recommended to train Gemma2 models with the `eager` attention implementation "
526-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
527-
)
528523
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
529524
output_hidden_states = (
530525
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

src/transformers/models/gemma3/modeling_gemma3.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -654,11 +654,6 @@ def forward(
654654
"What is your favorite condiment?"
655655
```"""
656656

657-
if self.training and self.config._attn_implementation != "eager":
658-
logger.warning_once(
659-
"It is strongly recommended to train Gemma3 models with the `eager` attention implementation "
660-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
661-
)
662657
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
663658
output_hidden_states = (
664659
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

src/transformers/models/gemma3n/modeling_gemma3n.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1808,11 +1808,6 @@ def forward(
18081808
"What is your favorite condiment?"
18091809
```"""
18101810

1811-
if self.training and self.config._attn_implementation != "eager":
1812-
logger.warning_once(
1813-
"It is strongly recommended to train Gemma3n models with the `eager` attention implementation "
1814-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
1815-
)
18161811
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
18171812
output_hidden_states = (
18181813
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

src/transformers/models/t5gemma/modeling_t5gemma.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
4242
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
4343
from ...processing_utils import Unpack
44-
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging
44+
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
4545
from ...utils.deprecation import deprecate_kwarg
4646
from ...utils.generic import OutputRecorder, check_model_inputs
4747
from .configuration_t5gemma import T5GemmaConfig, T5GemmaModuleConfig
@@ -1064,15 +1064,6 @@ def forward(
10641064
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
10651065
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
10661066
"""
1067-
if self.training and self.config._attn_implementation != "eager":
1068-
msg = (
1069-
"It is strongly recommended to train T5Gemma models with the `eager` attention implementation "
1070-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
1071-
)
1072-
if is_torchdynamo_compiling():
1073-
raise ValueError(msg)
1074-
else:
1075-
logger.warning_once(msg)
10761067

10771068
if labels is not None and decoder_input_ids is None and decoder_inputs_embeds is None:
10781069
# get decoder inputs from shifting lm labels to the right

src/transformers/models/t5gemma/modular_t5gemma.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
TransformersKwargs,
3838
auto_docstring,
3939
can_return_tuple,
40-
is_torchdynamo_compiling,
4140
logging,
4241
)
4342
from ...utils.deprecation import deprecate_kwarg
@@ -921,15 +920,6 @@ def forward(
921920
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
922921
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
923922
"""
924-
if self.training and self.config._attn_implementation != "eager":
925-
msg = (
926-
"It is strongly recommended to train T5Gemma models with the `eager` attention implementation "
927-
f"instead of `{self.config._attn_implementation}`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`."
928-
)
929-
if is_torchdynamo_compiling():
930-
raise ValueError(msg)
931-
else:
932-
logger.warning_once(msg)
933923

934924
if labels is not None and decoder_input_ids is None and decoder_inputs_embeds is None:
935925
# get decoder inputs from shifting lm labels to the right

0 commit comments

Comments
 (0)