14
14
//#define AT_PRINTF printf
15
15
#define AT_PRINTF (...) ((void)0)
16
16
17
+ struct hash_node {
18
+ struct ggml_tensor * t ;
19
+ int n_children ;
20
+ int n_views ;
21
+ };
22
+
23
+ static size_t hash (void * p ) {
24
+ return (size_t )p % GGML_GRAPH_HASHTABLE_SIZE ;
25
+ }
26
+
27
+ static struct hash_node * hash_get (struct hash_node hash_table [], struct ggml_tensor * t ) {
28
+ size_t h = hash (t );
29
+
30
+ // linear probing
31
+ size_t i = h ;
32
+ while (hash_table [i ].t != NULL ) {
33
+ if (hash_table [i ].t == t ) {
34
+ return & hash_table [i ];
35
+ }
36
+ i = (i + 1 ) % GGML_GRAPH_HASHTABLE_SIZE ;
37
+ if (i == h ) {
38
+ // hash table is full
39
+ GGML_ASSERT (false);
40
+ }
41
+ }
42
+
43
+ hash_table [i ].t = t ;
44
+ return & hash_table [i ];
45
+ }
17
46
18
47
// TODO: GGML_PAD ?
19
48
static size_t aligned_offset (const void * buffer , size_t offset , size_t alignment ) {
@@ -35,6 +64,7 @@ struct ggml_allocator {
35
64
size_t alignment ;
36
65
int n_free_blocks ;
37
66
struct free_block free_blocks [MAX_FREE_BLOCKS ];
67
+ struct hash_node hash_table [GGML_GRAPH_HASHTABLE_SIZE ];
38
68
size_t max_size ;
39
69
bool measure ;
40
70
@@ -215,6 +245,7 @@ struct ggml_allocator * ggml_allocator_new(void * data, size_t size, size_t alig
215
245
/*.alignment = */ alignment ,
216
246
/*.n_free_blocks = */ 0 ,
217
247
/*.free_blocks = */ {{0 }},
248
+ /*.hash_table = */ {{0 }},
218
249
/*.max_size = */ 0 ,
219
250
/*.measure = */ false,
220
251
#ifdef GGML_ALLOCATOR_DEBUG
@@ -241,6 +272,7 @@ struct ggml_allocator * ggml_allocator_new_measure(size_t alignment) {
241
272
/*.alignment = */ alignment ,
242
273
/*.n_free_blocks = */ 0 ,
243
274
/*.free_blocks = */ {{0 }},
275
+ /*.hash_table = */ {{0 }},
244
276
/*.max_size = */ 0 ,
245
277
/*.measure = */ true,
246
278
#ifdef GGML_ALLOCATOR_DEBUG
@@ -305,7 +337,7 @@ static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
305
337
return parent ;
306
338
}
307
339
308
- bool ggml_op_can_inplace (enum ggml_op op ) {
340
+ static bool ggml_op_can_inplace (enum ggml_op op ) {
309
341
switch (op ) {
310
342
case GGML_OP_SCALE :
311
343
case GGML_OP_DIAG_MASK_ZERO :
@@ -333,6 +365,7 @@ bool ggml_op_can_inplace(enum ggml_op op) {
333
365
}
334
366
335
367
static void allocate_node (struct ggml_allocator * alloc , struct ggml_tensor * node ) {
368
+ struct hash_node * ht = alloc -> hash_table ;
336
369
if (node -> data == NULL ) {
337
370
if (ggml_is_view (node )) {
338
371
size_t offset ;
@@ -360,10 +393,12 @@ static void allocate_node(struct ggml_allocator * alloc, struct ggml_tensor * no
360
393
if (parent == NULL ) {
361
394
break ;
362
395
}
363
- if (parent -> data != NULL && parent -> n_children == 1 && parent -> n_views == 0 && ggml_are_same_layout (node , parent ) && ggml_op_can_inplace (node -> op )) {
396
+ struct hash_node * p_hn = hash_get (ht , parent );
397
+ if (parent -> data != NULL && p_hn -> n_children == 1 && p_hn -> n_views == 0 && ggml_are_same_layout (node , parent ) && ggml_op_can_inplace (node -> op )) {
364
398
if (ggml_is_view (parent )) {
365
399
struct ggml_tensor * view_src = get_view_source (parent );
366
- if (view_src -> n_views == 1 && view_src -> n_children == 0 && view_src -> data == parent -> data ) {
400
+ struct hash_node * view_src_hn = hash_get (ht , view_src );
401
+ if (view_src_hn -> n_views == 1 && view_src_hn -> n_children == 0 && view_src -> data == parent -> data ) {
367
402
// TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite
368
403
// the parent's data that it will need later (same layout requirement). the problem is that then
369
404
// we cannot free the tensor because the original address of the allocation is lost.
@@ -391,21 +426,9 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
391
426
struct ggml_cgraph * * graphs , int n_graphs ,
392
427
struct ggml_tensor * * * inputs , struct ggml_tensor * * * outputs ) {
393
428
394
- // reset counters
395
- for (int g = 0 ; g < n_graphs ; g ++ ) {
396
- struct ggml_cgraph * gf = graphs [g ];
397
- for (int i = 0 ; i < gf -> n_nodes ; i ++ ) {
398
- struct ggml_tensor * node = gf -> nodes [i ];
399
- node -> n_children = 0 ;
400
- node -> n_views = 0 ;
401
- }
402
-
403
- for (int i = 0 ; i < gf -> n_leafs ; i ++ ) {
404
- struct ggml_tensor * leaf = gf -> leafs [i ];
405
- leaf -> n_children = 0 ;
406
- leaf -> n_views = 0 ;
407
- }
408
- }
429
+ // reset hash table
430
+ struct hash_node * ht = alloc -> hash_table ;
431
+ memset (ht , 0 , sizeof (struct hash_node ) * GGML_GRAPH_HASHTABLE_SIZE );
409
432
410
433
// count number of children and views
411
434
for (int g = 0 ; g < n_graphs ; g ++ ) {
@@ -415,15 +438,15 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
415
438
416
439
if (ggml_is_view (node )) {
417
440
struct ggml_tensor * view_src = get_view_source (node );
418
- view_src -> n_views += 1 ;
441
+ hash_get ( ht , view_src ) -> n_views += 1 ;
419
442
}
420
443
421
444
for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
422
445
struct ggml_tensor * parent = node -> src [j ];
423
446
if (parent == NULL ) {
424
447
break ;
425
448
}
426
- parent -> n_children += 1 ;
449
+ hash_get ( ht , parent ) -> n_children += 1 ;
427
450
}
428
451
}
429
452
}
@@ -474,16 +497,18 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
474
497
if (parent == NULL ) {
475
498
break ;
476
499
}
477
- parent -> n_children -= 1 ;
500
+ struct hash_node * p_hn = hash_get (ht , parent );
501
+ p_hn -> n_children -= 1 ;
478
502
479
503
//AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views);
480
504
481
- if (parent -> n_children == 0 && parent -> n_views == 0 ) {
505
+ if (p_hn -> n_children == 0 && p_hn -> n_views == 0 ) {
482
506
if (ggml_is_view (parent )) {
483
507
struct ggml_tensor * view_src = get_view_source (parent );
484
- view_src -> n_views -= 1 ;
508
+ struct hash_node * view_src_hn = hash_get (ht , view_src );
509
+ view_src_hn -> n_views -= 1 ;
485
510
AT_PRINTF ("view_src %s: %d children, %d views\n" , view_src -> name , view_src -> n_children , view_src -> n_views );
486
- if (view_src -> n_views == 0 && view_src -> n_children == 0 && view_src -> data != node -> data ) {
511
+ if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src -> data != node -> data ) {
487
512
ggml_allocator_free_tensor (alloc , view_src );
488
513
}
489
514
}
0 commit comments