Skip to content

Commit 613153e

Browse files
committed
fix
1 parent 9ba85d1 commit 613153e

File tree

4 files changed

+35
-32
lines changed

4 files changed

+35
-32
lines changed

tests/models/embedding/language/test_snowflake_arctic_embed.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,35 +18,35 @@
1818
EmbedModelInfo("Snowflake/snowflake-arctic-embed-xs",
1919
is_matryoshka=False,
2020
architecture="BertModel",
21-
enable_ci_test=True),
21+
enable_test=True),
2222
EmbedModelInfo("Snowflake/snowflake-arctic-embed-s",
2323
is_matryoshka=False,
2424
architecture="BertModel",
25-
enable_ci_test=False),
25+
enable_test=False),
2626
EmbedModelInfo("Snowflake/snowflake-arctic-embed-m",
2727
is_matryoshka=False,
2828
architecture="BertModel",
29-
enable_ci_test=False),
29+
enable_test=False),
3030
EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long",
3131
is_matryoshka=False,
3232
architecture="NomicBertModel",
33-
enable_ci_test=True),
33+
enable_test=True),
3434
EmbedModelInfo("Snowflake/snowflake-arctic-embed-l",
3535
is_matryoshka=False,
3636
architecture="BertModel",
37-
enable_ci_test=False),
37+
enable_test=False),
3838
EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5",
3939
is_matryoshka=True,
4040
architecture="BertModel",
41-
enable_ci_test=True),
41+
enable_test=True),
4242
EmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0",
4343
is_matryoshka=True,
4444
architecture="XLMRobertaModel",
45-
enable_ci_test=True),
45+
enable_test=True),
4646
EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
4747
is_matryoshka=True,
4848
architecture="GteModel",
49-
enable_ci_test=True),
49+
enable_test=True),
5050
]
5151

5252

@@ -60,10 +60,10 @@ def test_models(
6060
dtype: str,
6161
monkeypatch,
6262
) -> None:
63-
if not model_info.enable_ci_test:
63+
if not model_info.enable_test:
6464
# A model family has many models with the same architecture,
6565
# and we don't need to test each one.
66-
pytest.skip("Skipping CI test.")
66+
pytest.skip("Skipping test.")
6767

6868
example_prompts = example_prompts + EMBEDDING_PROMPTS
6969

tests/models/embedding/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ class EmbedModelInfo(NamedTuple):
4444
name: str
4545
is_matryoshka: bool
4646
architecture: str = ""
47-
enable_ci_test: bool = True
47+
enable_test: bool = True

tests/models/registry.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,15 @@ def check_available_online(
245245
"BertModel": _HfExamplesInfo("BAAI/bge-base-en-v1.5"),
246246
"Gemma2Model": _HfExamplesInfo("BAAI/bge-multilingual-gemma2"),
247247
"GritLM": _HfExamplesInfo("parasail-ai/GritLM-7B-vllm"),
248-
"GteModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-v2.0"),
248+
"GteModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
249+
trust_remote_code=True),
249250
"InternLM2ForRewardModel": _HfExamplesInfo("internlm/internlm2-1_8b-reward",
250251
trust_remote_code=True),
251252
"JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"), # noqa: E501
252253
"LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
253254
"MistralModel": _HfExamplesInfo("intfloat/e5-mistral-7b-instruct"),
254-
"NomicBertModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-long"), # noqa: E501
255+
"NomicBertModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-long", # noqa: E501
256+
trust_remote_code=True),
255257
"Qwen2Model": _HfExamplesInfo("ssmits/Qwen2-7B-Instruct-embed-base"),
256258
"Qwen2ForRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-RM-72B"),
257259
"Qwen2ForProcessRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-PRM-7B"),

vllm/model_executor/models/bert.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ class BertEncoder(nn.Module):
111111
def __init__(self,
112112
vllm_config: VllmConfig,
113113
bias: bool = True,
114-
gate_up_proj_bias: bool = True,
115114
rotary_kwargs: Optional[dict] = None,
116115
prefix: str = ""):
117116
super().__init__()
@@ -123,7 +122,6 @@ def __init__(self,
123122
cache_config=cache_config,
124123
quant_config=quant_config,
125124
bias=bias,
126-
gate_up_proj_bias=gate_up_proj_bias,
127125
rotary_kwargs=rotary_kwargs,
128126
prefix=f"{prefix}.layer.{layer_idx}")
129127
for layer_idx in range(config.num_hidden_layers)
@@ -146,7 +144,6 @@ def __init__(self,
146144
cache_config: Optional[CacheConfig] = None,
147145
quant_config: Optional[QuantizationConfig] = None,
148146
bias: bool = True,
149-
gate_up_proj_bias: bool = True,
150147
rotary_kwargs: Optional[dict] = None,
151148
prefix: str = ""):
152149
super().__init__()
@@ -166,7 +163,7 @@ def __init__(self,
166163
hidden_size=config.hidden_size,
167164
intermediate_size=config.intermediate_size,
168165
hidden_act=config.hidden_act,
169-
gate_up_proj_bias=gate_up_proj_bias,
166+
bias=bias,
170167
quant_config=quant_config,
171168
prefix=f"{prefix}.intermediate")
172169
else:
@@ -350,15 +347,15 @@ def __init__(self,
350347
hidden_size: int,
351348
intermediate_size: int,
352349
hidden_act: str,
353-
gate_up_proj_bias: bool = True,
350+
bias: bool = True,
354351
quant_config: Optional[QuantizationConfig] = None,
355352
prefix: str = ""):
356353
super().__init__()
357354
self.act_fn = get_act_and_mul_fn(hidden_act)
358355
self.gate_up_proj = MergedColumnParallelLinear(
359356
hidden_size,
360357
[intermediate_size] * 2,
361-
bias=gate_up_proj_bias,
358+
bias=bias,
362359
quant_config=quant_config,
363360
prefix=f"{prefix}.gate_up_proj",
364361
)
@@ -410,24 +407,18 @@ def __init__(self,
410407
prefix: str = "",
411408
embedding_class: type = BertEmbedding,
412409
bias: bool = True,
413-
gate_up_proj_bias: bool = True,
414410
rotary_kwargs: Optional[dict] = None,
415411
add_pooling_layer: bool = False):
416412
super().__init__()
417413
"""
418414
For BertModel, all linear layers have bias.
419-
For NomicBertModel, all linear layers do not have bias,
420-
the bias parameter intended to control all linear layers.
421-
For GteModel, only up_gate_proj layer does not have bias,
422-
so the gate_up_proj_bias parameter must be added.
423-
see #16649
415+
For NomicBertModel, all linear layers do not have bias.
424416
"""
425417

426418
config = vllm_config.model_config.hf_config
427419
self.embeddings = embedding_class(config)
428420
self.encoder = BertEncoder(vllm_config=vllm_config,
429421
bias=bias,
430-
gate_up_proj_bias=gate_up_proj_bias,
431422
rotary_kwargs=rotary_kwargs,
432423
prefix=f"{prefix}.encoder")
433424
self.pooler = BertPooler(config) if add_pooling_layer else None
@@ -672,7 +663,6 @@ def _build_model(self,
672663
return BertModel(vllm_config=vllm_config,
673664
prefix=prefix,
674665
bias=False,
675-
gate_up_proj_bias=False,
676666
rotary_kwargs=rotary_kwargs,
677667
embedding_class=BertEmbedding)
678668

@@ -694,6 +684,7 @@ def _build_model(self,
694684

695685
assert config.__class__.__name__ == "GteConfig"
696686
assert config.position_embedding_type == "rope"
687+
assert config.hidden_act == "gelu"
697688

698689
config.position_embedding_type = "rotary"
699690
config.hidden_act = "gelu_and_mul"
@@ -706,11 +697,21 @@ def _build_model(self,
706697
"base": config.rope_theta,
707698
}
708699

709-
return BertModel(vllm_config=vllm_config,
710-
prefix=prefix,
711-
gate_up_proj_bias=False,
712-
rotary_kwargs=rotary_kwargs,
713-
embedding_class=BertEmbedding)
700+
model = BertModel(vllm_config=vllm_config,
701+
prefix=prefix,
702+
rotary_kwargs=rotary_kwargs,
703+
embedding_class=BertEmbedding)
704+
705+
# GteModel only gate_up_proj does not have bias.
706+
for layer in model.encoder.layer:
707+
layer.intermediate.gate_up_proj = MergedColumnParallelLinear(
708+
config.hidden_size,
709+
[config.intermediate_size] * 2,
710+
bias=False,
711+
quant_config=vllm_config.quant_config,
712+
prefix=f"{prefix}.gate_up_proj",
713+
)
714+
return model
714715

715716
def split_up_gate_proj(self, weights: Iterable[Tuple[str, torch.Tensor]]):
716717
n = "mlp.up_gate_proj"

0 commit comments

Comments
 (0)