Skip to content

Commit 7930192

Browse files
committed
[Feat] Support Support TQ1_0 and TQ2_0 models.
1 parent 9365da4 commit 7930192

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

3rdparty/llama.cpp

python/t_mac/model_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@
5959
[2, 10240, 3200, 1, 1],
6060
[2, 800, 3200, 1, 1],
6161
],
62+
"trilm-3.9b": [
63+
[2, 3072, 3072, 1, -1],
64+
[2, 3072, 9216, 1, -1],
65+
[2, 9216, 3072, 1, -1],
66+
[2, 768, 3072, 1, -1],
67+
],
6268
"test": [
6369
# Add customized kernels here
6470
],

python/t_mac/weights.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def preprocess_weights(
5555

5656
# (M // bits, K, bits)
5757
w = np.stack([(w >> ib) & 1 for ib in range(bits)], axis=-1)
58-
# (M // bits, K, bits) -> (M // bits, bits, K) -> (M // bits, bits, K) -> (M // bits, bits, K // g, g)
58+
# (M // bits, K, bits) -> (M // bits, bits, K) -> (M // bits, bits, K // g, g)
5959
w = w.transpose(0, 2, 1).reshape(M // bits, bits, K // g, g)
6060
w = sum([(w[:, :, :, ig] << ig) for ig in range(g)])
6161
# 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
@@ -65,7 +65,7 @@ def preprocess_weights(
6565
w = w.reshape(M // bits // simd_n_out, simd_n_out, bits, K // g).transpose(0, 2, 1, 3)
6666
mgroup = ngroups_per_elem * simd_n_in
6767
w = w.reshape(M // mgroup, ngroups_per_elem, simd_n_in, K // g).transpose(0, 2, 1, 3)
68-
# 0 1 2 3 4 5
68+
# 0 1 2 3 4 5
6969
w = w.reshape(M // bm, bm // mgroup, simd_n_in, ngroups_per_elem, K // g // kfactor, kfactor).transpose(0, 4, 1, 5, 2, 3)
7070
w = sum([(w[:, :, :, :, :, ng] << (ng * g)) for ng in range(ngroups_per_elem)])
7171
w = w.reshape(M // bm, K // g // kfactor, bm // mgroup, kfactor, simd_n_in)

0 commit comments

Comments
 (0)