We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 62cef26 commit 0373486Copy full SHA for 0373486
src/llama-graph.cpp
@@ -1376,7 +1376,7 @@ ggml_tensor * llm_graph_context::build_attn(
1376
1377
// [TAG_NO_CACHE_PAD]
1378
// TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams
1379
- assert(!ubatch.equal_seqs());
+ assert(!ubatch.equal_seqs() || (k_cur->ne[3] == 1 && k_cur->ne[3] == ubatch.n_seqs_unq));
1380
1381
ggml_tensor * q = q_cur;
1382
ggml_tensor * k = k_cur;
0 commit comments