Skip to content

Commit 71995c0

Browse files
authored
Merge pull request ikawrakow#13 from Thireus/glm-4.5-testing
GLM-4.5 llama.cpp final port
2 parents 292300d + a90aec1 commit 71995c0

File tree

8 files changed

+415
-217
lines changed

8 files changed

+415
-217
lines changed

convert_hf_to_gguf.py

Lines changed: 37 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -618,12 +618,15 @@ def get_vocab_base_pre(self, tokenizer) -> str:
618618
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
619619
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
620620
res = "chatglm-bpe"
621+
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
622+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
623+
res = "chatglm-bpe"
621624
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
622625
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
623626
res = "glm4"
624627
if chkhsh == "9ca2dd618e8afaf09731a7cf6e2105b373ba6a1821559f258b272fe83e6eb902":
625628
# ref: https://huggingface.co/zai-org/GLM-4.5-Air, https://huggingface.co/zai-org/GLM-4.5
626-
res = "gpt-2"
629+
res = "glm4"
627630
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
628631
# ref: https://huggingface.co/LumiOpen/Viking-7B
629632
res = "viking"
@@ -3961,33 +3964,32 @@ class Glm4MoeModel(Model):
39613964
def __init__(self, *args, **kwargs):
39623965
super().__init__(*args, **kwargs)
39633966
# GLM4_MOE has num_hidden_layers + 1 actual layers (including NextN layer)
3964-
self.block_count = self.hparams["num_hidden_layers"] + 1
3967+
self.block_count = self.hparams["num_hidden_layers"] + self.hparams.get("num_nextn_predict_layers", 0)
39653968
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
3966-
3969+
39673970
def set_vocab(self):
39683971
from transformers import AutoTokenizer
39693972

3970-
tokenizer = AutoTokenizer.from_pretrained(
3971-
self.dir_model, trust_remote_code=True
3972-
)
3973+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
39733974
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
39743975
tokens, toktypes, tokpre = self.get_vocab_base()
39753976
self.gguf_writer.add_tokenizer_model("gpt2")
39763977
self.gguf_writer.add_tokenizer_pre(tokpre)
39773978
self.gguf_writer.add_token_list(tokens)
39783979
self.gguf_writer.add_token_types(toktypes)
39793980

3980-
# Set special tokens
3981-
special_vocab._set_special_token(
3982-
"eos", tokenizer.get_added_vocab()["<|endoftext|>"]
3983-
)
3984-
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
3985-
special_vocab._set_special_token(
3986-
"unk", tokenizer.get_added_vocab()["<|endoftext|>"]
3987-
)
3988-
special_vocab._set_special_token(
3989-
"bos", tokenizer.get_added_vocab()["<|endoftext|>"]
3990-
)
3981+
# Special tokens
3982+
# Note: Using <|endoftext|> (151329) for eot causes endless generation
3983+
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) # 151331
3984+
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # 151336
3985+
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329
3986+
special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338
3987+
3988+
# Patch broken chat template
3989+
if isinstance(special_vocab.chat_template, str) and "visible_text(m.content).endswith" in special_vocab.chat_template:
3990+
special_vocab.chat_template = special_vocab.chat_template.replace(
3991+
"""{{ visible_text(m.content) }}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}""",
3992+
"""{% set content = visible_text(m.content) %}{{ content }}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not content.endswith("/nothink")) else '' -}}""")
39913993

39923994
special_vocab.add_to_gguf(self.gguf_writer)
39933995

@@ -4001,10 +4003,9 @@ def set_gguf_parameters(self):
40014003
int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))
40024004
)
40034005

4004-
# MoE parameters
4005-
if (n_experts := self.hparams.get("n_routed_experts")) is not None:
4006-
self.gguf_writer.add_expert_count(n_experts)
4007-
# Note: expert_used_count is already set by parent class using num_experts_per_tok
4006+
# MoE parameters - Use only routed expert count (shared experts handled separately)
4007+
if (n_routed_experts := self.hparams.get("n_routed_experts")) is not None:
4008+
self.gguf_writer.add_expert_count(n_routed_experts)
40084009
if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
40094010
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
40104011
if (n_shared_experts := self.hparams.get("n_shared_experts")) is not None:
@@ -4023,8 +4024,11 @@ def set_gguf_parameters(self):
40234024
if (norm_topk_prob := self.hparams.get("norm_topk_prob")) is not None:
40244025
self.gguf_writer.add_expert_weights_norm(norm_topk_prob)
40254026

4027+
# NextN/MTP prediction layers
4028+
if (num_nextn_predict_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
4029+
self.gguf_writer.add_nextn_predict_layers(num_nextn_predict_layers)
4030+
40264031
_experts: list[dict[str, Tensor]] | None = None
4027-
_shared_experts: list[dict[str, Tensor]] | None = None
40284032

40294033
def modify_tensors(
40304034
self, data_torch: Tensor, name: str, bid: int | None
@@ -4035,21 +4039,17 @@ def modify_tensors(
40354039
name = name.replace("language_model.", "") # for multimodal variants
40364040

40374041
# Handle main token embedding (but not layer-specific NextN embeddings)
4038-
if name == "model.embed_tokens.weight":
4042+
if name == "model.embed_tokens.weight" and ".layers." not in name:
40394043
return [(self.map_tensor_name("token_embd.weight"), data_torch)]
40404044

40414045
# Handle routed experts
4042-
if name.find("mlp.experts") != -1 and "shared_experts" not in name:
4046+
if name.find("mlp.experts") != -1:
40434047
n_experts = self.hparams["n_routed_experts"]
40444048
assert bid is not None
40454049

40464050
if self._experts is None:
40474051
self._experts = [{} for _ in range(self.block_count)]
40484052

4049-
# Extend experts array if needed (for models where actual layers > num_hidden_layers)
4050-
while len(self._experts) <= bid:
4051-
self._experts.append({})
4052-
40534053
self._experts[bid][name] = data_torch
40544054

40554055
if len(self._experts[bid]) >= n_experts * 3:
@@ -4065,95 +4065,21 @@ def modify_tensors(
40654065
del self._experts[bid][ename]
40664066

40674067
data_torch = torch.stack(datas, dim=0)
4068-
# Generate GGUF tensor names for merged experts
4069-
if w_name == "down_proj":
4070-
new_name = f"blk.{bid}.ffn_down_exps.weight"
4071-
elif w_name == "gate_proj":
4072-
new_name = f"blk.{bid}.ffn_gate_exps.weight"
4073-
elif w_name == "up_proj":
4074-
new_name = f"blk.{bid}.ffn_up_exps.weight"
4075-
else:
4076-
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
4077-
new_name = self.map_tensor_name(merged_name)
4068+
4069+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
4070+
4071+
new_name = self.map_tensor_name(merged_name)
40784072
tensors.append((new_name, data_torch))
40794073
return tensors
40804074
else:
40814075
return []
40824076

4083-
# Handle expert gating input (routing gate)
4084-
if ".mlp.gate.e_score_correction_bias" in name:
4085-
new_name = name.replace("model.layers.", "blk.").replace(
4086-
".mlp.gate.e_score_correction_bias", ".ffn_gate_inp.bias" # *NOTE* this is ".exp_probs_b" in mainline PR
4087-
)
4088-
return [(new_name, data_torch)]
4089-
elif ".mlp.gate.weight" in name:
4090-
new_name = name.replace("model.layers.", "blk.").replace(
4091-
".mlp.gate.weight", ".ffn_gate_inp.weight"
4092-
)
4093-
return [(new_name, data_torch)]
4094-
4095-
# Handle shared expert tensors
4096-
if ".mlp.shared_experts." in name:
4097-
new_name = name.replace("model.layers.", "blk.").replace(".mlp.shared_experts.", ".ffn_")
4098-
if "gate_proj" in new_name:
4099-
new_name = new_name.replace("gate_proj", "gate_shexp")
4100-
elif "down_proj" in new_name:
4101-
new_name = new_name.replace("down_proj", "down_shexp")
4102-
elif "up_proj" in new_name:
4103-
new_name = new_name.replace("up_proj", "up_shexp")
4104-
return [(new_name, data_torch)]
4105-
4106-
# Handle regular dense FFN layers (for hybrid dense/MoE architecture)
4107-
if ".mlp." in name and "experts" not in name and "_shexp" not in name:
4108-
if "gate_proj" in name:
4109-
new_name = name.replace("model.layers.", "blk.").replace(
4110-
".mlp.gate_proj.weight", ".ffn_gate.weight"
4111-
)
4112-
elif "up_proj" in name:
4113-
new_name = name.replace("model.layers.", "blk.").replace(
4114-
".mlp.up_proj.weight", ".ffn_up.weight"
4115-
)
4116-
elif "down_proj" in name:
4117-
new_name = name.replace("model.layers.", "blk.").replace(
4118-
".mlp.down_proj.weight", ".ffn_down.weight"
4119-
)
4120-
else:
4121-
new_name = name
4122-
return [(self.map_tensor_name(new_name), data_torch)]
4123-
4124-
# Handle special NextN tensors - preserve for future MTP support - See https://github.com/ggml-org/llama.cpp/pull/13236
4125-
if (
4126-
".embed_tokens." in name
4127-
or ".shared_head." in name
4128-
or ".eh_proj." in name
4129-
or ".enorm." in name
4130-
or ".hnorm." in name
4131-
):
4132-
new_name = name.replace("model.layers.", "blk.").replace("model.", "").replace(".weight", "")
4133-
# logger.debug(f"Skipping MTP tensor: {new_name}")
4134-
return [(new_name, data_torch)]
4135-
4136-
# GLM tensor mapping - handle directly without map_tensor_name
4137-
if ".input_layernorm." in name:
4138-
new_name = name.replace("model.layers.", "blk.").replace(".input_layernorm.", ".attn_norm.")
4139-
return [(new_name, data_torch)]
4140-
elif ".post_attention_layernorm." in name:
4141-
new_name = name.replace("model.layers.", "blk.").replace(".post_attention_layernorm.", ".ffn_norm.")
4142-
return [(new_name, data_torch)]
4143-
elif ".self_attn." in name:
4144-
# Map GLM self_attn to standard attention naming
4145-
new_name = name.replace("model.layers.", "blk.").replace(".self_attn.", ".attn_")
4146-
if "q_proj" in new_name:
4147-
new_name = new_name.replace("q_proj", "q")
4148-
elif "k_proj" in new_name:
4149-
new_name = new_name.replace("k_proj", "k")
4150-
elif "v_proj" in new_name:
4151-
new_name = new_name.replace("v_proj", "v")
4152-
elif "o_proj" in new_name:
4153-
new_name = new_name.replace("o_proj", "output")
4154-
return [(new_name, data_torch)]
4077+
if name.endswith("e_score_correction_bias"):
4078+
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
41554079

4156-
return super().modify_tensors(data_torch, name, bid)
4080+
new_name = self.map_tensor_name(name)
4081+
4082+
return [(new_name, data_torch)]
41574083

41584084
def prepare_tensors(self):
41594085
super().prepare_tensors()

convert_hf_to_gguf_update.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ class TOKENIZER_TYPE(IntEnum):
9696
{"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
9797
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
9898
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
99+
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2", },
100+
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.5-Air", "chkhsh": "9ca2dd618e8afaf09731a7cf6e2105b373ba6a1821559f258b272fe83e6eb902", },
99101
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890", },
100102
]
101103

gguf-py/gguf/constants.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ class LLM:
9191
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
9292
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
9393
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
94+
NEXTN_PREDICT_LAYERS = "{arch}.nextn_predict_layers"
9495
POOLING_TYPE = "{arch}.pooling_type"
9596
LOGIT_SCALE = "{arch}.logit_scale"
9697
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -159,6 +160,13 @@ class Tokenizer:
159160
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
160161
CHAT_TEMPLATES = "tokenizer.chat_templates"
161162
# FIM/Infill special tokens constants
163+
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
164+
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
165+
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
166+
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
167+
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
168+
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
169+
# FIM/Infill special tokens constants
162170
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
163171
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
164172
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
@@ -263,9 +271,6 @@ class MODEL_TENSOR(IntEnum):
263271
FFN_GATE_EXP = auto()
264272
FFN_DOWN_EXP = auto()
265273
FFN_UP_EXP = auto()
266-
FFN_GATE_EXPS = auto() # merged experts
267-
FFN_DOWN_EXPS = auto() # merged experts
268-
FFN_UP_EXPS = auto() # merged experts
269274
FFN_GATE_SHEXP = auto()
270275
FFN_DOWN_SHEXP = auto()
271276
FFN_UP_SHEXP = auto()
@@ -415,9 +420,6 @@ class MODEL_TENSOR(IntEnum):
415420
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
416421
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
417422
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
418-
MODEL_TENSOR.FFN_GATE_EXPS: "blk.{bid}.ffn_gate_exps", # merged experts
419-
MODEL_TENSOR.FFN_DOWN_EXPS: "blk.{bid}.ffn_down_exps", # merged experts
420-
MODEL_TENSOR.FFN_UP_EXPS: "blk.{bid}.ffn_up_exps", # merged experts
421423
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
422424
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
423425
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
@@ -465,13 +467,13 @@ class MODEL_TENSOR(IntEnum):
465467
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
466468
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
467469
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
468-
# NextN/MTP tensors (GLM4_MOE)
469-
MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.eh_proj",
470-
MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.embed_tokens",
471-
MODEL_TENSOR.NEXTN_ENORM: "blk.{bid}.enorm",
472-
MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.hnorm",
473-
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.shared_head.head",
474-
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.shared_head.norm",
470+
# NextN/MTP
471+
MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.nextn.eh_proj",
472+
MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.nextn.embed_tokens",
473+
MODEL_TENSOR.NEXTN_ENORM: "blk.{bid}.nextn.enorm",
474+
MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.nextn.hnorm",
475+
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.nextn.shared_head_head",
476+
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.nextn.shared_head_norm",
475477
}
476478

477479
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -1096,23 +1098,24 @@ class MODEL_TENSOR(IntEnum):
10961098
MODEL_TENSOR.OUTPUT_NORM,
10971099
MODEL_TENSOR.OUTPUT,
10981100
MODEL_TENSOR.ATTN_NORM,
1101+
MODEL_TENSOR.ATTN_POST_NORM,
10991102
MODEL_TENSOR.ATTN_Q,
11001103
MODEL_TENSOR.ATTN_K,
11011104
MODEL_TENSOR.ATTN_V,
11021105
MODEL_TENSOR.ATTN_OUT,
11031106
MODEL_TENSOR.ATTN_Q_NORM,
11041107
MODEL_TENSOR.ATTN_K_NORM,
1105-
MODEL_TENSOR.FFN_NORM,
1106-
MODEL_TENSOR.FFN_GATE, # dense layers
1107-
MODEL_TENSOR.FFN_DOWN, # dense layers
1108-
MODEL_TENSOR.FFN_UP, # dense layers
1108+
MODEL_TENSOR.FFN_GATE,
1109+
MODEL_TENSOR.FFN_DOWN,
1110+
MODEL_TENSOR.FFN_UP,
11091111
MODEL_TENSOR.FFN_GATE_INP,
1110-
MODEL_TENSOR.FFN_GATE_EXPS,
1111-
MODEL_TENSOR.FFN_DOWN_EXPS,
1112-
MODEL_TENSOR.FFN_UP_EXPS,
1112+
MODEL_TENSOR.FFN_GATE_EXP,
1113+
MODEL_TENSOR.FFN_DOWN_EXP,
1114+
MODEL_TENSOR.FFN_UP_EXP,
11131115
MODEL_TENSOR.FFN_GATE_SHEXP,
11141116
MODEL_TENSOR.FFN_DOWN_SHEXP,
11151117
MODEL_TENSOR.FFN_UP_SHEXP,
1118+
MODEL_TENSOR.FFN_EXP_PROBS_B,
11161119
# NextN/MTP tensors - preserved but unused
11171120
MODEL_TENSOR.NEXTN_EH_PROJ,
11181121
MODEL_TENSOR.NEXTN_EMBED_TOKENS,
@@ -1684,6 +1687,14 @@ def get_type(val: Any) -> GGUFValueType:
16841687
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
16851688
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
16861689
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1690+
1691+
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
1692+
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
1693+
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
1694+
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
1695+
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
1696+
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
1697+
16871698
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
16881699
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
16891700
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,9 @@ def add_expert_weights_norm(self, value: bool) -> None:
677677
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
678678
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
679679

680+
def add_nextn_predict_layers(self, count: int) -> None:
681+
self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
682+
680683
def add_layer_norm_eps(self, value: float) -> None:
681684
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
682685

gguf-py/gguf/tensor_mapping.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,31 @@ class TensorNameMap:
592592
MODEL_TENSOR.ENC_OUTPUT_NORM: (
593593
"encoder.final_layer_norm", # t5
594594
),
595+
596+
# NextN/MTP tensors for GLM4_MOE
597+
MODEL_TENSOR.NEXTN_EH_PROJ: (
598+
"model.layers.{bid}.eh_proj",
599+
),
600+
601+
MODEL_TENSOR.NEXTN_EMBED_TOKENS: (
602+
"model.layers.{bid}.embed_tokens",
603+
),
604+
605+
MODEL_TENSOR.NEXTN_ENORM: (
606+
"model.layers.{bid}.enorm",
607+
),
608+
609+
MODEL_TENSOR.NEXTN_HNORM: (
610+
"model.layers.{bid}.hnorm",
611+
),
612+
613+
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: (
614+
"model.layers.{bid}.shared_head.head",
615+
),
616+
617+
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: (
618+
"model.layers.{bid}.shared_head.norm",
619+
),
595620
}
596621

597622
# architecture-specific block mappings

0 commit comments

Comments
 (0)