-
Notifications
You must be signed in to change notification settings - Fork 70
Description
Running environment: raspberry pi 5 6.6.74+rpt-rpi-2712 #1 SMP PREEMPT Debian 1:6.6.74-1+rpt1 (2025-01-27) aarch64
I ran the official example and results in segmentation fault during llama.cpp inference:
huggingface-cli download ChenMnZ/Llama-3-8b-instruct-EfficientQAT-w2g128-GPTQ --local-dir ${model_dir}
python tools/run_pipeline.py -o ${model_dir} -m llama-3-8b-2bit -q int_n
I then compiled llama.cpp in debug mode and ran:
gdb ./T-MAC/3rdparty/llama.cpp/build/bin/llama-cli core
And below is the debug information:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00007ffec98e149c in ggml_tmac_transform_tensor (tensor=0x55564563c2e0) at /home/pi/T-MAC/3rdparty/llama.cpp/ggml/src/ggml-tmac.cpp:343
343 scales[i] = (tmac_float_type) i2_scales[i];
[Current thread is 1 (Thread 0x7ffec9e65ec0 (LWP 14750))]
(gdb) frame 0
#0 0x00007ffec98e149c in ggml_tmac_transform_tensor (tensor=0x55564563c2e0) at /home/pi/T-MAC/3rdparty/llama.cpp/ggml/src/ggml-tmac.cpp:343
343 scales[i] = (tmac_float_type) i2_scales[i];
(gdb) list
338 GGML_ASSERT(sizeof(tmac_float_type) <= sizeof(float));
339 qweights = (uint8_t *) tensor->data;
340 scales = (tmac_float_type *) (qweights + k * m / 8);
341 float * i2_scales = (float * )(qweights + k * m / 8);
342 for (int i = 0; i < scales_size; i++) {
343 scales[i] = (tmac_float_type) i2_scales[i];
344 }
345 }
346
347 tensor->extra = tmac_tensor_extras + tmac_tensor_extras_index;
(gdb) info locals
i = 0
i2_scales = 0x7ffea6bffe80
bits = 4
g = 4
ngroups_per_elem = 2
k = 2048
m = 8192
kcfg = {bm = 256, simd_n_in = 16, simd_n_out = 8, kfactor = 16, group_size = 128, lut_scales_size = 32, scales_size = 32768, n_tile_num = 32}
bm = 256
simd_n_in = 16
simd_n_out = 8
kfactor = 16
group_size = 128
lut_scales_size = 32
scales_size = 32768
n_tile_num = 32
mgroup = 32
qweights = 0x7ffea69ffe80 "\337\r\351z\217\375}\372h\360~\226|-\201k\327\305\354\2510\277.C\f?\021|\301O\360\224\241- \016\316L$@x\325\322\3334\264Ӹ\033"
scales = 0x7ffea6bffe80