Skip to content

Commit e39e62b

Browse files
committed
replace n_views and n_children in ggml_tensor with a hash table in the allocator
1 parent af7bd42 commit e39e62b

File tree

4 files changed

+51
-32
lines changed

4 files changed

+51
-32
lines changed

ggml-alloc.c

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,35 @@
1414
//#define AT_PRINTF printf
1515
#define AT_PRINTF(...) ((void)0)
1616

17+
struct hash_node {
18+
struct ggml_tensor * t;
19+
int n_children;
20+
int n_views;
21+
};
22+
23+
static size_t hash(void * p) {
24+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
25+
}
26+
27+
static struct hash_node * hash_get(struct hash_node hash_table[], struct ggml_tensor * t) {
28+
size_t h = hash(t);
29+
30+
// linear probing
31+
size_t i = h;
32+
while (hash_table[i].t != NULL) {
33+
if (hash_table[i].t == t) {
34+
return &hash_table[i];
35+
}
36+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
37+
if (i == h) {
38+
// hash table is full
39+
GGML_ASSERT(false);
40+
}
41+
}
42+
43+
hash_table[i].t = t;
44+
return &hash_table[i];
45+
}
1746

1847
// TODO: GGML_PAD ?
1948
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
@@ -35,6 +64,7 @@ struct ggml_allocator {
3564
size_t alignment;
3665
int n_free_blocks;
3766
struct free_block free_blocks[MAX_FREE_BLOCKS];
67+
struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
3868
size_t max_size;
3969
bool measure;
4070

@@ -215,6 +245,7 @@ struct ggml_allocator * ggml_allocator_new(void * data, size_t size, size_t alig
215245
/*.alignment = */ alignment,
216246
/*.n_free_blocks = */ 0,
217247
/*.free_blocks = */ {{0}},
248+
/*.hash_table = */ {{0}},
218249
/*.max_size = */ 0,
219250
/*.measure = */ false,
220251
#ifdef GGML_ALLOCATOR_DEBUG
@@ -241,6 +272,7 @@ struct ggml_allocator * ggml_allocator_new_measure(size_t alignment) {
241272
/*.alignment = */ alignment,
242273
/*.n_free_blocks = */ 0,
243274
/*.free_blocks = */ {{0}},
275+
/*.hash_table = */ {{0}},
244276
/*.max_size = */ 0,
245277
/*.measure = */ true,
246278
#ifdef GGML_ALLOCATOR_DEBUG
@@ -305,7 +337,7 @@ static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
305337
return parent;
306338
}
307339

308-
bool ggml_op_can_inplace(enum ggml_op op) {
340+
static bool ggml_op_can_inplace(enum ggml_op op) {
309341
switch (op) {
310342
case GGML_OP_SCALE:
311343
case GGML_OP_DIAG_MASK_ZERO:
@@ -333,6 +365,7 @@ bool ggml_op_can_inplace(enum ggml_op op) {
333365
}
334366

335367
static void allocate_node(struct ggml_allocator * alloc, struct ggml_tensor * node) {
368+
struct hash_node * ht = alloc->hash_table;
336369
if (node->data == NULL) {
337370
if (ggml_is_view(node)) {
338371
size_t offset;
@@ -360,10 +393,12 @@ static void allocate_node(struct ggml_allocator * alloc, struct ggml_tensor * no
360393
if (parent == NULL) {
361394
break;
362395
}
363-
if (parent->data != NULL && parent->n_children == 1 && parent->n_views == 0 && ggml_are_same_layout(node, parent) && ggml_op_can_inplace(node->op)) {
396+
struct hash_node * p_hn = hash_get(ht, parent);
397+
if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent) && ggml_op_can_inplace(node->op)) {
364398
if (ggml_is_view(parent)) {
365399
struct ggml_tensor * view_src = get_view_source(parent);
366-
if (view_src->n_views == 1 && view_src->n_children == 0 && view_src->data == parent->data) {
400+
struct hash_node * view_src_hn = hash_get(ht, view_src);
401+
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
367402
// TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite
368403
// the parent's data that it will need later (same layout requirement). the problem is that then
369404
// we cannot free the tensor because the original address of the allocation is lost.
@@ -391,21 +426,9 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
391426
struct ggml_cgraph ** graphs, int n_graphs,
392427
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs) {
393428

394-
// reset counters
395-
for (int g = 0; g < n_graphs; g++) {
396-
struct ggml_cgraph * gf = graphs[g];
397-
for (int i = 0; i < gf->n_nodes; i++) {
398-
struct ggml_tensor * node = gf->nodes[i];
399-
node->n_children = 0;
400-
node->n_views = 0;
401-
}
402-
403-
for (int i = 0; i < gf->n_leafs; i++) {
404-
struct ggml_tensor * leaf = gf->leafs[i];
405-
leaf->n_children = 0;
406-
leaf->n_views = 0;
407-
}
408-
}
429+
// reset hash table
430+
struct hash_node * ht = alloc->hash_table;
431+
memset(ht, 0, sizeof(struct hash_node) * GGML_GRAPH_HASHTABLE_SIZE);
409432

410433
// count number of children and views
411434
for (int g = 0; g < n_graphs; g++) {
@@ -415,15 +438,15 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
415438

416439
if (ggml_is_view(node)) {
417440
struct ggml_tensor * view_src = get_view_source(node);
418-
view_src->n_views += 1;
441+
hash_get(ht, view_src)->n_views += 1;
419442
}
420443

421444
for (int j = 0; j < GGML_MAX_SRC; j++) {
422445
struct ggml_tensor * parent = node->src[j];
423446
if (parent == NULL) {
424447
break;
425448
}
426-
parent->n_children += 1;
449+
hash_get(ht, parent)->n_children += 1;
427450
}
428451
}
429452
}
@@ -474,16 +497,18 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
474497
if (parent == NULL) {
475498
break;
476499
}
477-
parent->n_children -= 1;
500+
struct hash_node * p_hn = hash_get(ht, parent);
501+
p_hn->n_children -= 1;
478502

479503
//AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views);
480504

481-
if (parent->n_children == 0 && parent->n_views == 0) {
505+
if (p_hn->n_children == 0 && p_hn->n_views == 0) {
482506
if (ggml_is_view(parent)) {
483507
struct ggml_tensor * view_src = get_view_source(parent);
484-
view_src->n_views -= 1;
508+
struct hash_node * view_src_hn = hash_get(ht, view_src);
509+
view_src_hn->n_views -= 1;
485510
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src->n_children, view_src->n_views);
486-
if (view_src->n_views == 0 && view_src->n_children == 0 && view_src->data != node->data) {
511+
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
487512
ggml_allocator_free_tensor(alloc, view_src);
488513
}
489514
}

ggml.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4612,8 +4612,6 @@ static struct ggml_tensor * ggml_new_tensor_impl(
46124612
/*.data =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data,
46134613
/*.name =*/ { 0 },
46144614
/*.extra =*/ NULL,
4615-
/*.n_children =*/ 0,
4616-
/*.n_views =*/ 0,
46174615
/*.padding =*/ { 0 },
46184616
};
46194617

ggml.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,7 @@ extern "C" {
451451

452452
void * extra; // extra things e.g. for ggml-cuda.cu
453453

454-
// temp - used by allocator
455-
int n_children;
456-
int n_views;
457-
458-
char padding[16];
454+
char padding[4];
459455
};
460456

461457
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1813,7 +1813,7 @@ static bool llama_eval_internal(
18131813
ggml_allocator_alloc_graph_tensors(lctx.alloc, gf);
18141814
#endif
18151815

1816-
// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf.n_nodes, gf.n_leafs);
1816+
// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
18171817

18181818
// for big prompts, if BLAS is enabled, it is better to use only one thread
18191819
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance

0 commit comments

Comments
 (0)