From d4160928c972eacfd51571c5209af5f7ee7e8daa Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 29 Jan 2024 21:19:21 +0000
Subject: [PATCH 01/80] Pull in simsimd headers only if simsimd feature is
 enabled

---
 cpp/bench.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index 4b3efe601..e6b23797c 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -42,7 +42,9 @@
 #include <omp.h> // `omp_set_num_threads()`
 #endif
 
+#if USEARCH_USE_SIMSIMD
 #include <simsimd/simsimd.h>
+#endif
 
 #include <usearch/index_dense.hpp>
 
@@ -615,4 +617,4 @@ int main(int argc, char** argv) {
         run_punned<index_dense_gt<default_key_t, std::uint32_t>>(dataset, args, config, limits);
 
     return 0;
-}
\ No newline at end of file
+}

From 61db8e50272525b979c71a8bb78ed5cde7ea8ed1 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 29 Jan 2024 21:50:33 +0000
Subject: [PATCH 02/80] Fix simd, openmp anf fp16 option value propagation to
 the compiler

SIMSIMD, OPENMP and FP16 related cmake options are not properly propaged
to compiler header definitions, when they are set to non-default values.

This commit fixes compile definitions so those values are always
propagated properly

E.g., by default, simsimd usage is turned off and as we see in the
commands below, correct default `#define`s(i.e.
`-DUSEARCH_USE_SIMSIMD=0`) are passed to the compiler:

cmake ..
make VERBOSE=1
> cd /home/ngalstyan/lantern/lantern/third_party/usearch/build/cpp &&
/usr/bin/c++ -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=0
...
 -o CMakeFiles/bench_cpp.dir/bench.cpp.o -c .../bench.cpp

But, if we try to enable simsimd via cmake for benchmarking and shared C
libraries, we do not get the corresponding -DUSEARCH_USE_SIMSIMD=1
definition.

cmake .. -DUSEARCH_USE_SIMSIMD=1
make VERBOSE=1
cd /home/ngalstyan/lantern/lantern/third_party/usearch/build/cpp &&
/usr/bin/c++ -DUSEARCH_USE_OPENMP=0
...
-o CMakeFiles/bench_cpp.dir/bench.cpp.o -c .../bench.cpp

Note that no definition for `USEARCH_USE_SIMSIMD` was passed to the
compiler.
Internally, the lack simsimd config definition assumes
-DUSEARCH_USE_SIMSIMD=0 value. (see [1_simsimd_logic_in_plugins])

When compiling after adding this commit, we see that we can successfully
enable simsimd via cmake option
cmake .. -DUSEARCH_USE_SIMSIMD=1
make VERBOSE=1
cd /home/ngalstyan/lantern/lantern/third_party/usearch/build/cpp &&
/usr/bin/c++ -DUSEARCH_USE_FP16LIB=1 -DUSEARCH_USE_OPENMP=0
-DUSEARCH_USE_SIMSIMD=1
-o CMakeFiles/bench_cpp.dir/bench.cpp.o -c .../bench.cpp

[1_simsimd_logic_in_plugins]:
https://github.com/unum-cloud/usearch/blob/4747ef42f4140a1fde16118f25f079f9af79649e/include/usearch/index_plugins.hpp#L43-L45
---
 CMakeLists.txt     | 21 ++++++---------------
 docs/benchmarks.md |  2 ++
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d058470a6..0a3deb6da 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,18 +63,11 @@ else ()
 endif ()
 
 # Core compilation settings affecting "index.hpp"
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_OPENMP}>>:USEARCH_USE_OPENMP=0>
-)
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_OPENMP=$<BOOL:${USEARCH_USE_OPENMP}>")
 
 # Supplementary compilation settings affecting "index_plugins.hpp"
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_FP16LIB}>>:USEARCH_USE_FP16LIB=1>
-)
-
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_SIMSIMD}>>:USEARCH_USE_SIMSIMD=0>
-)
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_FP16LIB=$<BOOL:${USEARCH_USE_FP16LIB}>")
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_SIMSIMD=$<BOOL:${USEARCH_USE_SIMSIMD}>")
 
 target_include_directories(
     ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $<BUILD_INTERFACE:${USEARCH_INCLUDE_BUILD_DIR}>
@@ -296,14 +289,12 @@ function (setup_target TARGET_NAME)
     endif ()
 
     # Core compilation settings affecting "index.hpp"
-    target_compile_definitions(${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_OPENMP}>>:USEARCH_USE_OPENMP=0>)
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_OPENMP=$<BOOL:${USEARCH_USE_OPENMP}>")
 
     # Supplementary compilation settings affecting "index_plugins.hpp"
-    target_compile_definitions(
-        ${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_FP16LIB}>>:USEARCH_USE_FP16LIB=1>
-    )
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_FP16LIB=$<BOOL:${USEARCH_USE_FP16LIB}>")
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_SIMSIMD=$<BOOL:${USEARCH_USE_SIMSIMD}>")
 
-    target_compile_definitions(${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_SIMSIMD}>>:USEARCH_USE_SIMSIMD=0>)
 
 endfunction ()
 
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index b14f77246..67d916fae 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -64,6 +64,8 @@ To achieve best highest results we suggest compiling locally for the target arch
 cmake -B ./build_release \
     -DCMAKE_BUILD_TYPE=Release \
     -DUSEARCH_USE_OPENMP=1 \
+    -DUSEARCH_USE_SIMSIMD=1 \
+    -DUSEARCH_USE_FP16LIB=0 \
     -DUSEARCH_USE_JEMALLOC=1 && \
     make -C ./build_release -j
 

From 211a103618783d40ed3e51298b8d5a928dddf617 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 29 Jan 2024 23:00:52 +0000
Subject: [PATCH 03/80] Fix AVX512 detection logic

Copied the logic from simsimd. Alternatively, the whole block could
be dropped to offload detection to simsimd
---
 include/usearch/index_plugins.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 660d9d522..aac301996 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -50,8 +50,10 @@
 #define SIMSIMD_NATIVE_F16 !USEARCH_USE_FP16LIB
 
 #if !defined(SIMSIMD_TARGET_X86_AVX512) && defined(USEARCH_DEFINED_LINUX)
+#if defined(__AVX512F__) && defined(__AVX512FP16__) && defined(__AVX512VNNI__) && defined(__AVX512VPOPCNTDQ__)
 #define SIMSIMD_TARGET_X86_AVX512 1
 #endif
+#endif
 
 #if !defined(SIMSIMD_TARGET_ARM_SVE) && defined(USEARCH_DEFINED_LINUX)
 #define SIMSIMD_TARGET_ARM_SVE 1

From c4394cea647ab2bcea2e8412b6d3d5c5d2c90731 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 29 Jan 2024 23:07:57 +0000
Subject: [PATCH 04/80] Avoid including simsimd before index_plugins

index_plugins configures simsimd and when simsimd is included
before this configuration gets a chance to run during compilation,
simsimd.h may be misconfigured

In particular, index_plugins propagates USEARCH_FP16LIB cmake
options as !SIMSIMD_NATIVE_FP16 (see [1]) and if simsimd.h
is included before index_plugins, wrong value of
SIMSIMD_NATIVE_FP16 may be chosen

[1]:
https://github.com/unum-cloud/usearch/blob/ce54b814a8a10f4c0c32fee7aad9451231b63f75/include/usearch/index_plugins.hpp#L50
---
 cpp/bench.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index e6b23797c..fbf70f90e 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -42,10 +42,6 @@
 #include <omp.h> // `omp_set_num_threads()`
 #endif
 
-#if USEARCH_USE_SIMSIMD
-#include <simsimd/simsimd.h>
-#endif
-
 #include <usearch/index_dense.hpp>
 
 using namespace unum::usearch;

From c71e9c6a09e2797b0581c3482e2776a84d2ec579 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 29 Jan 2024 23:21:20 +0000
Subject: [PATCH 05/80] Fix bench_cpp binary name in benchmark documentation

---
 docs/benchmarks.md | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 67d916fae..2840c6a85 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -55,7 +55,7 @@ Also worth noting, 8-bit quantization results in almost no quantization loss and
 
 Within this repository you will find two commonly used utilities:
 
-- `cpp/bench.cpp` the produces the `bench` binary for broad USearch benchmarks.
+- `cpp/bench.cpp` the produces the `bench_cpp` binary for broad USearch benchmarks.
 - `python/bench.py` and `python/bench.ipynb` for interactive charts against FAISS.
 
 To achieve best highest results we suggest compiling locally for the target architecture.
@@ -69,14 +69,14 @@ cmake -B ./build_release \
     -DUSEARCH_USE_JEMALLOC=1 && \
     make -C ./build_release -j
 
-./build_release/bench --help
+./build_release/bench_cpp --help
 ```
 
 Which would print the following instructions.
 
 ```txt
 SYNOPSIS
-        ./build_release/bench [--vectors <path>] [--queries <path>] [--neighbors <path>] [-b] [-j
+        ./build_release/bench_cpp [--vectors <path>] [--queries <path>] [--neighbors <path>] [-b] [-j
                               <integer>] [-c <integer>] [--expansion-add <integer>]
                               [--expansion-search <integer>] [--native|--f16quant|--i8quant]
                               [--ip|--l2sq|--cos|--haversine] [-h]
@@ -117,12 +117,12 @@ OPTIONS
 Here is an example of running the C++ benchmark:
 
 ```sh
-./build_release/bench \
+./build_release/bench_cpp \
     --vectors datasets/wiki_1M/base.1M.fbin \
     --queries datasets/wiki_1M/query.public.100K.fbin \
     --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin
 
-./build_release/bench \
+./build_release/bench_cpp \
     --vectors datasets/t2i_1B/base.1B.fbin \
     --queries datasets/t2i_1B/query.public.100K.fbin \
     --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \
@@ -207,17 +207,17 @@ With `perf`:
 
 ```sh
 # Pass environment variables with `-E`, and `-d` for details
-sudo -E perf stat -d ./build_release/bench ...
-sudo -E perf mem -d ./build_release/bench ...
+sudo -E perf stat -d ./build_release/bench_cpp ...
+sudo -E perf mem -d ./build_release/bench_cpp ...
 # Sample on-CPU functions for the specified command, at 1 Kilo Hertz:
-sudo -E perf record -F 1000 ./build_release/bench ...
-perf record -d -e arm_spe// -- ./build_release/bench ..
+sudo -E perf record -F 1000 ./build_release/bench_cpp ...
+perf record -d -e arm_spe// -- ./build_release/bench_cpp ..
 ```
 
 ### Caches
 
 ```sh
-sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench ...
+sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench_cpp ...
 ```
 
 Typical output on a 1M vectors dataset is:
@@ -244,4 +244,3 @@ sudo sysctl -w vm.nr_hugepages=2048
 sudo reboot
 sudo cat /proc/sys/vm/nr_hugepages
 ```
-

From b10c58259b8188018982eec4289f60a703556208 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 30 Jan 2024 01:37:35 +0000
Subject: [PATCH 06/80] Update simsimd to fix fp16 type inference

---
 simsimd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/simsimd b/simsimd
index f8ff727dc..a7f6e5973 160000
--- a/simsimd
+++ b/simsimd
@@ -1 +1 @@
-Subproject commit f8ff727dcddcd142e7e8dece33c43801af96c210
+Subproject commit a7f6e5973a126914ee4351269ac8862b6d3605c1

From 1bd48422b6bad28a535e763b7015c1bd3af909b4 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 30 Jan 2024 05:20:29 +0000
Subject: [PATCH 07/80] Update SimSIMD to v3.7.5

---
 simsimd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/simsimd b/simsimd
index a7f6e5973..814ae4107 160000
--- a/simsimd
+++ b/simsimd
@@ -1 +1 @@
-Subproject commit a7f6e5973a126914ee4351269ac8862b6d3605c1
+Subproject commit 814ae4107e91f1794a7abf045301b2859d42706e

From daf7ca2df05e0e5a9134a9227151bec5d5b3da29 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Wed, 27 Dec 2023 04:57:25 +0000
Subject: [PATCH 08/80] Add clang-tidy clarification. todo:: is this needed?

---
 .clang-tidy | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.clang-tidy b/.clang-tidy
index 00063a7d4..13db0f0c1 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -55,6 +55,8 @@ Checks: '*,
 CheckOptions:
   - key: hicpp-special-member-functions.AllowSoleDefaultDtor
     value: 1
+  - key: readability-braces-around-statements.ShortStatementLines
+    value: 10
 
 WarningsAsErrors: '*'
 HeaderFilterRegex: '.*hpp$'

From 2df158c9edd218392ed31332d7e883dc0507693b Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 30 Dec 2023 00:43:57 +0000
Subject: [PATCH 09/80] Initial trials to move node allocation outside of
 index.hpp

---
 include/usearch/index.hpp       | 196 ++++++++++++--------------------
 include/usearch/index_dense.hpp |  44 ++++++-
 2 files changed, 114 insertions(+), 126 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index e5616c157..5d51ed563 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1599,6 +1599,48 @@ template <typename key_at = default_key_t> struct member_ref_gt {
 template <typename key_at> inline std::size_t get_slot(member_ref_gt<key_at> const& m) noexcept { return m.slot; }
 template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m) noexcept { return m.key; }
 
+using level_t = std::int16_t;
+// todo:: this is public, but then we make assumptions which are not communicated via this interface
+// clean these up later
+//
+/**
+ *  @brief  A loosely-structured handle for every node. One such node is created for every member.
+ *          To minimize memory usage and maximize the number of entries per cache-line, it only
+ *          stores to pointers. The internal tape starts with a `vector_key_t` @b key, then
+ *          a `level_t` for the number of graph @b levels in which this member appears,
+ *          then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences
+ *          for @b each-level.
+ */
+template <typename key_at> class node_t {
+    byte_t* tape_{};
+
+    /**
+     *  @brief  How many bytes of memory are needed to form the "head" of the node.
+     */
+    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
+
+  public:
+    using vector_key_t = key_at;
+    explicit node_t(byte_t* tape) noexcept : tape_(tape) {}
+    byte_t* tape() const noexcept { return tape_; }
+    byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
+    explicit operator bool() const noexcept { return tape_; }
+
+    node_t() = default;
+    node_t(node_t const&) = default;
+    node_t& operator=(node_t const&) = default;
+
+    misaligned_ref_gt<vector_key_t const> ckey() const noexcept { return {tape_}; }
+    misaligned_ref_gt<vector_key_t> key() const noexcept { return {tape_}; }
+    misaligned_ref_gt<level_t> level() const noexcept { return {tape_ + sizeof(vector_key_t)}; }
+
+    void key(vector_key_t v) noexcept { return misaligned_store<vector_key_t>(tape_, v); }
+    void level(level_t v) noexcept { return misaligned_store<level_t>(tape_ + sizeof(vector_key_t), v); }
+};
+
+static_assert(std::is_trivially_copy_constructible<node_t<default_key_t>>::value, "Nodes must be light!");
+static_assert(std::is_trivially_destructible<node_t<default_key_t>>::value, "Nodes must be light!");
+
 /**
  *  @brief  Approximate Nearest Neighbors Search @b index-structure using the
  *          Hierarchical Navigable Small World @b (HNSW) graphs algorithm.
@@ -1697,6 +1739,9 @@ class index_gt {
     using member_cref_t = member_cref_gt<vector_key_t>;
     using member_ref_t = member_ref_gt<vector_key_t>;
 
+    template <typename v> using o_node_t = node_t<v>;
+    using node_t = node_t<vector_key_t>;
+
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
         using index_t = index_at;
@@ -1771,7 +1816,6 @@ class index_gt {
      *          alignment in most common cases.
      */
     using neighbors_count_t = std::uint32_t;
-    using level_t = std::int16_t;
 
     /**
      *  @brief  How many bytes of memory are needed to form the "head" of the node.
@@ -1799,38 +1843,6 @@ class index_gt {
     using top_candidates_t = sorted_buffer_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>;
     using next_candidates_t = max_heap_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>;
 
-    /**
-     *  @brief  A loosely-structured handle for every node. One such node is created for every member.
-     *          To minimize memory usage and maximize the number of entries per cache-line, it only
-     *          stores to pointers. The internal tape starts with a `vector_key_t` @b key, then
-     *          a `level_t` for the number of graph @b levels in which this member appears,
-     *          then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences
-     *          for @b each-level.
-     */
-    class node_t {
-        byte_t* tape_{};
-
-      public:
-        explicit node_t(byte_t* tape) noexcept : tape_(tape) {}
-        byte_t* tape() const noexcept { return tape_; }
-        byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
-        explicit operator bool() const noexcept { return tape_; }
-
-        node_t() = default;
-        node_t(node_t const&) = default;
-        node_t& operator=(node_t const&) = default;
-
-        misaligned_ref_gt<vector_key_t const> ckey() const noexcept { return {tape_}; }
-        misaligned_ref_gt<vector_key_t> key() const noexcept { return {tape_}; }
-        misaligned_ref_gt<level_t> level() const noexcept { return {tape_ + sizeof(vector_key_t)}; }
-
-        void key(vector_key_t v) noexcept { return misaligned_store<vector_key_t>(tape_, v); }
-        void level(level_t v) noexcept { return misaligned_store<level_t>(tape_ + sizeof(vector_key_t), v); }
-    };
-
-    static_assert(std::is_trivially_copy_constructible<node_t>::value, "Nodes must be light!");
-    static_assert(std::is_trivially_destructible<node_t>::value, "Nodes must be light!");
-
     /**
      *  @brief  A slice of the node's tape, containing a the list of neighbors
      *          for a node in a single graph level. It's pre-allocated to fit
@@ -2275,12 +2287,14 @@ class index_gt {
      *  @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock.
      */
     template <                                   //
+        typename node_proxy_at,                  //
         typename value_at,                       //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
     add_result_t add(                                           //
+        node_proxy_at&& ext_node_at_,                           //
         vector_key_t key, value_at&& value, metric_at&& metric, //
         index_update_config_t config = {},                      //
         callback_at&& callback = callback_at{},                 //
@@ -2347,6 +2361,7 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                                //
+            ext_node_at_,                                           //
             value, metric, prefetch,                                //
             new_slot, entry_idx_copy, max_level_copy, target_level, //
             config, context);
@@ -2383,12 +2398,14 @@ class index_gt {
      *  @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock.
      */
     template <                                   //
+        typename node_proxy_at,                  //
         typename value_at,                       //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
     add_result_t update(                        //
+        node_proxy_at&& ext_node_at_,           //
         member_iterator_t iterator,             //
         vector_key_t key,                       //
         value_at&& value,                       //
@@ -2430,6 +2447,7 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                       //
+            ext_node_at_,                                  //
             value, metric, prefetch,                       //
             old_slot, entry_slot_, max_level_, node_level, //
             config, context);
@@ -2454,12 +2472,14 @@ class index_gt {
      *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
      */
     template <                                     //
+        typename nodes_proxy_at,                   //
         typename value_at,                         //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
         >
     search_result_t search(                        //
+        nodes_proxy_at&& nodes_proxy,              //
         value_at&& query,                          //
         std::size_t wanted,                        //
         metric_at&& metric,                        //
@@ -2489,7 +2509,8 @@ class index_gt {
             if (!top.reserve(expansion))
                 return result.failed("Out of memory!");
 
-            std::size_t closest_slot = search_for_one_(query, metric, prefetch, entry_slot_, max_level_, 0, context);
+            std::size_t closest_slot =
+                search_for_one_(nodes_proxy, query, metric, prefetch, entry_slot_, max_level_, 0, context);
 
             // For bottom layer we need a more optimized procedure
             if (!search_to_find_in_base_(query, metric, predicate, prefetch, closest_slot, expansion, context))
@@ -2516,12 +2537,14 @@ class index_gt {
      *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
      */
     template <                                     //
+        typename node_proxy_at,                    //
         typename value_at,                         //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
         >
     cluster_result_t cluster(                      //
+        node_proxy_at&& ext_node_at_,              //
         value_at&& query,                          //
         std::size_t level,                         //
         metric_at&& metric,                        //
@@ -2544,7 +2567,7 @@ class index_gt {
             return result.failed("Out of memory!");
 
         result.cluster.member =
-            at(search_for_one_(query, metric, prefetch, entry_slot_, max_level_, level - 1, context));
+            at(search_for_one_(ext_node_at_, query, metric, prefetch, entry_slot_, max_level_, level - 1, context));
         result.cluster.distance = context.measure(query, result.cluster.member, metric);
 
         // Normalize stats
@@ -2956,6 +2979,8 @@ class index_gt {
 
 #pragma endregion
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
     /**
      *  @brief  Performs compaction on the whole HNSW index, purging some entries
      *          and links to them, while also generating a more efficient mapping,
@@ -2985,88 +3010,9 @@ class index_gt {
         executor_at&& executor = executor_at{}, //
         progress_at&& progress = progress_at{}, //
         prefetch_at&& prefetch = prefetch_at{}) noexcept {
-
-        // Export all the keys, slots, and levels.
-        // Partition them with the predicate.
-        // Sort the allowed entries in descending order of their level.
-        // Create a new array mapping old slots to the new ones (INT_MAX for deleted items).
-        struct slot_level_t {
-            compressed_slot_t old_slot;
-            compressed_slot_t cluster;
-            level_t level;
-        };
-        using slot_level_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<slot_level_t>;
-        buffer_gt<slot_level_t, slot_level_allocator_t> slots_and_levels(size());
-
-        // Progress status
-        std::atomic<bool> do_tasks{true};
-        std::atomic<std::size_t> processed{0};
-        std::size_t const total = 3 * slots_and_levels.size();
-
-        // For every bottom level node, determine its parent cluster
-        executor.dynamic(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
-            context_t& context = contexts_[thread_idx];
-            std::size_t cluster = search_for_one_( //
-                values[citerator_at(old_slot)],    //
-                metric, prefetch,                  //
-                entry_slot_, max_level_, 0, context);
-            slots_and_levels[old_slot] = {                                          //
-                                          static_cast<compressed_slot_t>(old_slot), //
-                                          static_cast<compressed_slot_t>(cluster),  //
-                                          node_at_(old_slot).level()};
-            ++processed;
-            if (thread_idx == 0)
-                do_tasks = progress(processed.load(), total);
-            return do_tasks.load();
-        });
-        if (!do_tasks.load())
-            return;
-
-        // Where the actual permutation happens:
-        std::sort(slots_and_levels.begin(), slots_and_levels.end(), [](slot_level_t const& a, slot_level_t const& b) {
-            return a.level == b.level ? a.cluster < b.cluster : a.level > b.level;
-        });
-
-        using size_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
-        buffer_gt<std::size_t, size_allocator_t> old_slot_to_new(slots_and_levels.size());
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot)
-            old_slot_to_new[slots_and_levels[new_slot].old_slot] = new_slot;
-
-        // Erase all the incoming links
-        buffer_gt<node_t, nodes_allocator_t> reordered_nodes(slots_and_levels.size());
-        tape_allocator_t reordered_tape;
-
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) {
-            std::size_t old_slot = slots_and_levels[new_slot].old_slot;
-            node_t old_node = node_at_(old_slot);
-
-            std::size_t node_bytes = node_bytes_(old_node.level());
-            byte_t* new_data = (byte_t*)reordered_tape.allocate(node_bytes);
-            node_t new_node{new_data};
-            std::memcpy(new_data, old_node.tape(), node_bytes);
-
-            for (level_t level = 0; level <= old_node.level(); ++level)
-                for (misaligned_ref_gt<compressed_slot_t> neighbor : neighbors_(new_node, level))
-                    neighbor = static_cast<compressed_slot_t>(old_slot_to_new[compressed_slot_t(neighbor)]);
-
-            reordered_nodes[new_slot] = new_node;
-            if (!progress(++processed, total))
-                return;
-        }
-
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) {
-            std::size_t old_slot = slots_and_levels[new_slot].old_slot;
-            slot_transition(node_at_(old_slot).ckey(),                //
-                            static_cast<compressed_slot_t>(old_slot), //
-                            static_cast<compressed_slot_t>(new_slot));
-            if (!progress(++processed, total))
-                return;
-        }
-
-        nodes_ = std::move(reordered_nodes);
-        tape_allocator_ = std::move(reordered_tape);
-        entry_slot_ = old_slot_to_new[entry_slot_];
+        return;
     }
+#pragma GCC diagnostic pop
 
     /**
      *  @brief  Scans the whole collection, removing the links leading towards
@@ -3196,15 +3142,16 @@ class index_gt {
         return {nodes_mutexes_, slot};
     }
 
-    template <typename value_at, typename metric_at, typename prefetch_at>
-    void connect_node_across_levels_(                                                           //
-        value_at&& value, metric_at&& metric, prefetch_at&& prefetch,                           //
-        std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level, //
+    template <typename nodes_proxy_at, typename value_at, typename metric_at, typename prefetch_at>
+    void connect_node_across_levels_(                                                                //
+        nodes_proxy_at&& ext_node_at_, value_at&& value, metric_at&& metric, prefetch_at&& prefetch, //
+        std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level,      //
         index_update_config_t const& config, context_t& context) usearch_noexcept_m {
+        using vv = typename std::decay<decltype(*this)>::type::vector_key_t;
 
         // Go down the level, tracking only the closest match
         std::size_t closest_slot = search_for_one_( //
-            value, metric, prefetch,                //
+            ext_node_at_, value, metric, prefetch,  //
             entry_slot, max_level, target_level, context);
 
         // From `target_level` down perform proper extensive search
@@ -3353,8 +3300,9 @@ class index_gt {
         candidates_iterator_t end() const noexcept { return {index, neighbors, visits, neighbors.size()}; }
     };
 
-    template <typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
+    template <typename ext_node_at_at, typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
     std::size_t search_for_one_(                                      //
+        ext_node_at_at&& ext_node_at_,                                //
         value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept {
 
@@ -3373,6 +3321,10 @@ class index_gt {
                 node_lock_t closest_lock = node_lock_(closest_slot);
                 neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level);
 
+                using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
+                static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
+                ext_node_at_(closest_slot);
+
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
                     candidates_range_t missing_candidates{*this, closest_neighbors, visits};
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index e151b929d..3de378e32 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -279,6 +279,39 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
+template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot_t> class nodes_proxy_t {
+    using vector_key_t = nodes_proxy_key_t;
+    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    // index_dense_gt const* index_ = nullptr;
+    std::vector<node_t<nodes_proxy_key_t>> node_;
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+    struct node_lock_t {
+        nodes_mutexes_t& mutexes;
+        std::size_t slot;
+        inline ~node_lock_t() noexcept { /*mutexes.atomic_reset(slot);*/
+        }
+    };
+
+  public:
+    nodes_proxy_t() noexcept { node_ = std::move(std::vector<node_t<nodes_proxy_key_t>>(1000)); }
+
+    // warning: key_t is used in sys/types.h
+    inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
+        return node_[slot];
+    }
+
+    inline node_lock_t node_lock_(std::size_t slot) const noexcept {
+        // while (nodes_mutexes_.atomic_set(slot))
+        //     ;
+        return {nodes_mutexes_, slot};
+    }
+};
+// template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
+// nodes_proxy_t<vector_key_t> make_storage(index_dense_gt<key_at, compressed_slot_at>index) { return
+// nodes_proxy_t<vector_key_t>(index); }
+
 /**
  *  @brief  Oversimplified type-punned index for equidimensional vectors
  *          with automatic @b down-casting, hardware-specific @b SIMD metrics,
@@ -1757,10 +1790,11 @@ class index_dense_gt {
         update_config.thread = lock.thread_id;
         update_config.expansion = config_.expansion_add;
 
+        auto prox = nodes_proxy_t<vector_key_t, compressed_slot_t>();
         metric_proxy_t metric{*this};
-        return reuse_node //
-                   ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, update_config, on_success)
-                   : typed_->add(key, vector_data, metric, update_config, on_success);
+        return reuse_node ? typed_->update(prox, typed_->iterator_at(free_slot), key, vector_data, metric,
+                                           update_config, on_success)
+                          : typed_->add(prox, key, vector_data, metric, update_config, on_success);
     }
 
     template <typename scalar_at>
@@ -1784,7 +1818,9 @@ class index_dense_gt {
         search_config.exact = exact;
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
-        return typed_->search(vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
+
+        auto prox = nodes_proxy_t<vector_key_t>();
+        return typed_->search(prox, vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
     }
 
     template <typename scalar_at>

From d4fb2776980c73aab5a6eccac58f179fbd53474a Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 30 Dec 2023 01:21:32 +0000
Subject: [PATCH 10/80] Move storage parameter to right before metric

---
 include/usearch/index.hpp       | 28 +++++++++++++++-------------
 include/usearch/index_dense.hpp |  6 +++---
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5d51ed563..1a483e2cc 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2287,17 +2287,18 @@ class index_gt {
      *  @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock.
      */
     template <                                   //
-        typename node_proxy_at,                  //
         typename value_at,                       //
+        typename node_proxy_at,                  //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
-    add_result_t add(                                           //
-        node_proxy_at&& ext_node_at_,                           //
-        vector_key_t key, value_at&& value, metric_at&& metric, //
-        index_update_config_t config = {},                      //
-        callback_at&& callback = callback_at{},                 //
+    add_result_t add(                           //
+        vector_key_t key, value_at&& value,     //
+        node_proxy_at&& ext_node_at_,           //
+        metric_at&& metric,                     //
+        index_update_config_t config = {},      //
+        callback_at&& callback = callback_at{}, //
         prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m {
 
         add_result_t result;
@@ -2398,17 +2399,17 @@ class index_gt {
      *  @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock.
      */
     template <                                   //
-        typename node_proxy_at,                  //
         typename value_at,                       //
+        typename node_proxy_at,                  //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
     add_result_t update(                        //
-        node_proxy_at&& ext_node_at_,           //
         member_iterator_t iterator,             //
         vector_key_t key,                       //
         value_at&& value,                       //
+        node_proxy_at&& ext_node_at_,           //
         metric_at&& metric,                     //
         index_update_config_t config = {},      //
         callback_at&& callback = callback_at{}, //
@@ -2472,16 +2473,16 @@ class index_gt {
      *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
      */
     template <                                     //
-        typename nodes_proxy_at,                   //
         typename value_at,                         //
+        typename nodes_proxy_at,                   //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
         >
     search_result_t search(                        //
-        nodes_proxy_at&& nodes_proxy,              //
         value_at&& query,                          //
         std::size_t wanted,                        //
+        nodes_proxy_at&& nodes_proxy,              //
         metric_at&& metric,                        //
         index_search_config_t config = {},         //
         predicate_at&& predicate = predicate_at{}, //
@@ -2537,16 +2538,16 @@ class index_gt {
      *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
      */
     template <                                     //
-        typename node_proxy_at,                    //
         typename value_at,                         //
+        typename node_proxy_at,                    //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
         >
     cluster_result_t cluster(                      //
-        node_proxy_at&& ext_node_at_,              //
         value_at&& query,                          //
         std::size_t level,                         //
+        node_proxy_at&& ext_node_at_,              //
         metric_at&& metric,                        //
         index_cluster_config_t config = {},        //
         predicate_at&& predicate = predicate_at{}, //
@@ -3319,11 +3320,12 @@ class index_gt {
             do {
                 changed = false;
                 node_lock_t closest_lock = node_lock_(closest_slot);
+                ext_node_at_.node_lock_(closest_slot);
                 neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
-                ext_node_at_(closest_slot);
+                node_t a = ext_node_at_(closest_slot);
 
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 3de378e32..013d60fc8 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1792,9 +1792,9 @@ class index_dense_gt {
 
         auto prox = nodes_proxy_t<vector_key_t, compressed_slot_t>();
         metric_proxy_t metric{*this};
-        return reuse_node ? typed_->update(prox, typed_->iterator_at(free_slot), key, vector_data, metric,
+        return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, prox, metric,
                                            update_config, on_success)
-                          : typed_->add(prox, key, vector_data, metric, update_config, on_success);
+                          : typed_->add(key, vector_data, prox, metric, update_config, on_success);
     }
 
     template <typename scalar_at>
@@ -1820,7 +1820,7 @@ class index_dense_gt {
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
 
         auto prox = nodes_proxy_t<vector_key_t>();
-        return typed_->search(prox, vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
+        return typed_->search(vector_data, wanted, prox, metric_proxy_t{*this}, search_config, allow);
     }
 
     template <typename scalar_at>

From aac023314ded44cac45adbdf51f07233893758b4 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 30 Dec 2023 02:15:28 +0000
Subject: [PATCH 11/80] Slowly moving index storage outside of index.hpp

---
 cpp/test.cpp                    |  7 +++++--
 include/usearch/index_dense.hpp | 17 ++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index aa9b0c56a..3d29e259c 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -77,7 +77,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     expect((index.stats(0).nodes == 3));
 
     // Check if clustering endpoint compiles
-    index.cluster(vector_first, 0, args...);
+    // index.cluster(vector_first, 0, args...);
 
     // Try removals and replacements
     if constexpr (punned_ak) {
@@ -163,6 +163,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using slot_t = slot_at;
 
     using index_typed_t = index_gt<float, vector_key_t, slot_t>;
+    using index_storage_t = nodes_proxy_t<vector_key_t, uint32_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
 
@@ -198,7 +199,9 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
         index_typed_t index_typed(config);
-        test_cosine<false>(index_typed, matrix, metric);
+        std::vector<node_t<vector_key_t>> nodes;
+        index_storage_t storage{&nodes};
+        test_cosine<false>(index_typed, matrix, storage, metric);
     }
 
     // Type-punned:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 013d60fc8..4c05b6905 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -283,8 +283,9 @@ template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot
     using vector_key_t = nodes_proxy_key_t;
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_t = std::vector<node_t<nodes_proxy_key_t>>;
     // index_dense_gt const* index_ = nullptr;
-    std::vector<node_t<nodes_proxy_key_t>> node_;
+    nodes_t* nodes_;
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t nodes_mutexes_{};
     struct node_lock_t {
@@ -295,11 +296,14 @@ template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot
     };
 
   public:
-    nodes_proxy_t() noexcept { node_ = std::move(std::vector<node_t<nodes_proxy_key_t>>(1000)); }
+    nodes_proxy_t(nodes_t* nodes) noexcept { nodes_ = nodes; }
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
-        return node_[slot];
+        nodes_t v = *nodes_;
+        if (slot >= v.size())
+            v.resize(slot + 1);
+        return v[slot];
     }
 
     inline node_lock_t node_lock_(std::size_t slot) const noexcept {
@@ -411,6 +415,9 @@ class index_dense_gt {
     /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
     mutable std::vector<byte_t*> vectors_lookup_;
 
+    /// @brief  C-style array of `node_t` smart-pointers.
+    std::vector<node_t<key_t>> nodes_;
+
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
 
@@ -1790,7 +1797,7 @@ class index_dense_gt {
         update_config.thread = lock.thread_id;
         update_config.expansion = config_.expansion_add;
 
-        auto prox = nodes_proxy_t<vector_key_t, compressed_slot_t>();
+        nodes_proxy_t<vector_key_t, compressed_slot_t> prox(&this->nodes_);
         metric_proxy_t metric{*this};
         return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, prox, metric,
                                            update_config, on_success)
@@ -1819,7 +1826,7 @@ class index_dense_gt {
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
 
-        auto prox = nodes_proxy_t<vector_key_t>();
+        auto prox = nodes_proxy_t<vector_key_t>(&this->nodes_);
         return typed_->search(vector_data, wanted, prox, metric_proxy_t{*this}, search_config, allow);
     }
 

From 8209b52868f574d731c7bfce0167ed6e9f1818a2 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 31 Dec 2023 04:19:31 +0000
Subject: [PATCH 12/80] Make external storage more functional by adding a
 vector storage which is actually used by index.hpp

---
 cpp/test.cpp                    |   5 +-
 include/usearch/index.hpp       | 285 +++++++++++++++++---------------
 include/usearch/index_dense.hpp |  81 ++++++++-
 3 files changed, 229 insertions(+), 142 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 3d29e259c..c14ff4e2a 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -5,7 +5,6 @@
 #include <cassert>   // `assert`
 #include <random>    // `std::default_random_engine`
 #include <stdexcept>
-#include <unordered_map>
 #include <vector> // for std::vector
 
 #include <usearch/index.hpp>
@@ -163,7 +162,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using slot_t = slot_at;
 
     using index_typed_t = index_gt<float, vector_key_t, slot_t>;
-    using index_storage_t = nodes_proxy_t<vector_key_t, uint32_t>;
+    using index_storage_t = storage_proxy_t<vector_key_t, uint32_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
 
@@ -200,7 +199,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         index_config_t config(connectivity);
         index_typed_t index_typed(config);
         std::vector<node_t<vector_key_t>> nodes;
-        index_storage_t storage{&nodes};
+        index_storage_t storage{&nodes, config};
         test_cosine<false>(index_typed, matrix, storage, metric);
     }
 
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 1a483e2cc..52c83d4f8 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -76,6 +76,7 @@
 #include <algorithm> // `std::sort_heap`
 #include <atomic>    // `std::atomic`
 #include <bitset>    // `std::bitset`
+#include <cassert>
 #include <climits>   // `CHAR_BIT`
 #include <cmath>     // `std::sqrt`
 #include <cstring>   // `std::memset`
@@ -1760,8 +1761,9 @@ class index_gt {
         using pointer = void;
         using reference = ref_t;
 
-        reference operator*() const noexcept { return {index_->node_at_(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->node_at_(slot_).key(); }
+        // todo:: take care of these to use external storage
+        reference operator*() const noexcept { return {index_->node_at_11_(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->node_at_11_(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -2288,14 +2290,14 @@ class index_gt {
      */
     template <                                   //
         typename value_at,                       //
-        typename node_proxy_at,                  //
+        typename storage_at,                     //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
     add_result_t add(                           //
         vector_key_t key, value_at&& value,     //
-        node_proxy_at&& ext_node_at_,           //
+        storage_at&& storage,                   //
         metric_at&& metric,                     //
         index_update_config_t config = {},      //
         callback_at&& callback = callback_at{}, //
@@ -2345,6 +2347,9 @@ class index_gt {
             new_level_lock.unlock();
 
         nodes_[new_slot] = node;
+        storage.node_append_(key, target_level);
+        node = storage.node_at_(new_slot);
+
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
@@ -2362,7 +2367,7 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                                //
-            ext_node_at_,                                           //
+            storage,                                                //
             value, metric, prefetch,                                //
             new_slot, entry_idx_copy, max_level_copy, target_level, //
             config, context);
@@ -2400,7 +2405,7 @@ class index_gt {
      */
     template <                                   //
         typename value_at,                       //
-        typename node_proxy_at,                  //
+        typename storage_at,                     //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
@@ -2409,7 +2414,7 @@ class index_gt {
         member_iterator_t iterator,             //
         vector_key_t key,                       //
         value_at&& value,                       //
-        node_proxy_at&& ext_node_at_,           //
+        storage_at&& storage,                   //
         metric_at&& metric,                     //
         index_update_config_t config = {},      //
         callback_at&& callback = callback_at{}, //
@@ -2436,7 +2441,7 @@ class index_gt {
             return result.failed("Out of memory!");
 
         node_lock_t new_lock = node_lock_(old_slot);
-        node_t node = node_at_(old_slot);
+        node_t node = storage.node_at_(old_slot);
 
         level_t node_level = node.level();
         span_bytes_t node_bytes = node_bytes_(node);
@@ -2448,7 +2453,7 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                       //
-            ext_node_at_,                                  //
+            storage,                                       //
             value, metric, prefetch,                       //
             old_slot, entry_slot_, max_level_, node_level, //
             config, context);
@@ -2474,7 +2479,7 @@ class index_gt {
      */
     template <                                     //
         typename value_at,                         //
-        typename nodes_proxy_at,                   //
+        typename storage_at,                       //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
@@ -2482,7 +2487,7 @@ class index_gt {
     search_result_t search(                        //
         value_at&& query,                          //
         std::size_t wanted,                        //
-        nodes_proxy_at&& nodes_proxy,              //
+        storage_at&& storage,                      //
         metric_at&& metric,                        //
         index_search_config_t config = {},         //
         predicate_at&& predicate = predicate_at{}, //
@@ -2511,10 +2516,10 @@ class index_gt {
                 return result.failed("Out of memory!");
 
             std::size_t closest_slot =
-                search_for_one_(nodes_proxy, query, metric, prefetch, entry_slot_, max_level_, 0, context);
+                search_for_one_(query, storage, metric, prefetch, entry_slot_, max_level_, 0, context);
 
             // For bottom layer we need a more optimized procedure
-            if (!search_to_find_in_base_(query, metric, predicate, prefetch, closest_slot, expansion, context))
+            if (!search_to_find_in_base_(query, storage, metric, predicate, prefetch, closest_slot, expansion, context))
                 return result.failed("Out of memory!");
         }
 
@@ -2539,7 +2544,7 @@ class index_gt {
      */
     template <                                     //
         typename value_at,                         //
-        typename node_proxy_at,                    //
+        typename storage_at,                       //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
@@ -2547,7 +2552,7 @@ class index_gt {
     cluster_result_t cluster(                      //
         value_at&& query,                          //
         std::size_t level,                         //
-        node_proxy_at&& ext_node_at_,              //
+        storage_at&& storage,                      //
         metric_at&& metric,                        //
         index_cluster_config_t config = {},        //
         predicate_at&& predicate = predicate_at{}, //
@@ -2568,7 +2573,7 @@ class index_gt {
             return result.failed("Out of memory!");
 
         result.cluster.member =
-            at(search_for_one_(ext_node_at_, query, metric, prefetch, entry_slot_, max_level_, level - 1, context));
+            at(search_for_one_(query, storage, metric, prefetch, entry_slot_, max_level_, level - 1, context));
         result.cluster.distance = context.measure(query, result.cluster.member, metric);
 
         // Normalize stats
@@ -2593,18 +2598,21 @@ class index_gt {
     stats_t stats() const noexcept {
         stats_t result{};
 
-        for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
-            std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
-            std::size_t edges = 0;
-            for (level_t level = 0; level <= node.level(); ++level)
-                edges += neighbors_(node, level).size();
-
-            ++result.nodes;
-            result.allocated_bytes += node_bytes_(node).size();
-            result.edges += edges;
-            result.max_edges += max_edges;
-        }
+        assert(false);
+        /*
+            for (std::size_t i = 0; i != size(); ++i) {
+                node_t node = node_at_(i);
+                std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
+                std::size_t edges = 0;
+                for (level_t level = 0; level <= node.level(); ++level)
+                    edges += neighbors_(node, level).size();
+
+                ++result.nodes;
+                result.allocated_bytes += node_bytes_(node).size();
+                result.edges += edges;
+                result.max_edges += max_edges;
+            }
+            */
         return result;
     }
 
@@ -2612,52 +2620,57 @@ class index_gt {
         stats_t result{};
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
-        for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
-            if (static_cast<std::size_t>(node.level()) < level)
-                continue;
+        assert(false);
+        /*
+            for (std::size_t i = 0; i != size(); ++i) {
+                node_t node = node_at_(i);
+                if (static_cast<std::size_t>(node.level()) < level)
+                    continue;
 
-            ++result.nodes;
-            result.edges += neighbors_(node, level).size();
-            result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
-        }
+                ++result.nodes;
+                result.edges += neighbors_(node, level).size();
+                result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
+            }
 
-        std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
-        result.max_edges = result.nodes * max_edges_per_node;
+            std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
+            result.max_edges = result.nodes * max_edges_per_node;
+            */
         return result;
     }
 
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept {
 
         std::size_t head_bytes = node_head_bytes_();
-        for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
-
-            stats_per_level[0].nodes++;
-            stats_per_level[0].edges += neighbors_(node, 0).size();
-            stats_per_level[0].allocated_bytes += pre_.neighbors_base_bytes + head_bytes;
-
-            level_t node_level = static_cast<level_t>(node.level());
-            for (level_t l = 1; l <= (std::min)(node_level, static_cast<level_t>(max_level)); ++l) {
-                stats_per_level[l].nodes++;
-                stats_per_level[l].edges += neighbors_(node, l).size();
-                stats_per_level[l].allocated_bytes += pre_.neighbors_bytes;
+        assert(false);
+        /*
+            for (std::size_t i = 0; i != size(); ++i) {
+                node_t node = node_at_(i);
+
+                stats_per_level[0].nodes++;
+                stats_per_level[0].edges += neighbors_(node, 0).size();
+                stats_per_level[0].allocated_bytes += pre_.neighbors_base_bytes + head_bytes;
+
+                level_t node_level = static_cast<level_t>(node.level());
+                for (level_t l = 1; l <= (std::min)(node_level, static_cast<level_t>(max_level)); ++l) {
+                    stats_per_level[l].nodes++;
+                    stats_per_level[l].edges += neighbors_(node, l).size();
+                    stats_per_level[l].allocated_bytes += pre_.neighbors_bytes;
+                }
             }
-        }
-
         // The `max_edges` parameter can be inferred from `nodes`
         stats_per_level[0].max_edges = stats_per_level[0].nodes * config_.connectivity_base;
         for (std::size_t l = 1; l <= max_level; ++l)
             stats_per_level[l].max_edges = stats_per_level[l].nodes * config_.connectivity;
 
         // Aggregate stats across levels
-        stats_t result{};
         for (std::size_t l = 0; l <= max_level; ++l)
             result.nodes += stats_per_level[l].nodes,                         //
                 result.edges += stats_per_level[l].edges,                     //
                 result.allocated_bytes += stats_per_level[l].allocated_bytes, //
                 result.max_edges += stats_per_level[l].max_edges;             //
 
+    */
+        stats_t result{};
         return result;
     }
 
@@ -2694,8 +2707,12 @@ class index_gt {
      */
     std::size_t serialized_length() const noexcept {
         std::size_t neighbors_length = 0;
-        for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t);
+        assert(false);
+
+        /*
+            for (std::size_t i = 0; i != size(); ++i)
+                neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t);
+            */
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2724,24 +2741,26 @@ class index_gt {
         // Export the number of levels per node
         // That is both enough to estimate the overall memory consumption,
         // and to be able to estimate the offsets of every entry in the file.
-        for (std::size_t i = 0; i != header.size; ++i) {
-            node_t node = node_at_(i);
-            level_t level = node.level();
-            if (!output(&level, sizeof(level)))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
+        /*
+            for (std::size_t i = 0; i != header.size; ++i) {
+                node_t node = node_at_(i);
+                level_t level = node.level();
+                if (!output(&level, sizeof(level)))
+                    return result.failed("Failed to serialize into stream");
+                if (!progress(++processed, total))
+                    return result.failed("Terminated by user");
+            }
 
-        // After that dump the nodes themselves
-        for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_bytes_(node_at_(i));
-            if (!output(node_bytes.data(), node_bytes.size()))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
+            // After that dump the nodes themselves
+            for (std::size_t i = 0; i != header.size; ++i) {
+                span_bytes_t node_bytes = node_bytes_(node_at_(i));
+                if (!output(node_bytes.data(), node_bytes.size()))
+                    return result.failed("Failed to serialize into stream");
+                if (!progress(++processed, total))
+                    return result.failed("Terminated by user");
+            }
 
+    */
         return {};
     }
 
@@ -3039,30 +3058,33 @@ class index_gt {
         // Progress status
         std::atomic<bool> do_tasks{true};
         std::atomic<std::size_t> processed{0};
-
-        // Erase all the incoming links
-        std::size_t nodes_count = size();
-        executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
-            node_t node = node_at_(node_idx);
-            for (level_t level = 0; level <= node.level(); ++level) {
-                neighbors_ref_t neighbors = neighbors_(node, level);
-                std::size_t old_size = neighbors.size();
-                neighbors.clear();
-                for (std::size_t i = 0; i != old_size; ++i) {
-                    compressed_slot_t neighbor_slot = neighbors[i];
-                    node_t neighbor = node_at_(neighbor_slot);
-                    if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
-                        neighbors.push_back(neighbor_slot);
+        assert(false);
+
+        /*
+            // Erase all the incoming links
+            std::size_t nodes_count = size();
+            executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
+                node_t node = node_at_(node_idx);
+                for (level_t level = 0; level <= node.level(); ++level) {
+                    neighbors_ref_t neighbors = neighbors_(node, level);
+                    std::size_t old_size = neighbors.size();
+                    neighbors.clear();
+                    for (std::size_t i = 0; i != old_size; ++i) {
+                        compressed_slot_t neighbor_slot = neighbors[i];
+                        node_t neighbor = node_at_(neighbor_slot);
+                        if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
+                            neighbors.push_back(neighbor_slot);
+                    }
                 }
-            }
-            ++processed;
-            if (thread_idx == 0)
-                do_tasks = progress(processed.load(), nodes_count);
-            return do_tasks.load();
-        });
+                ++processed;
+                if (thread_idx == 0)
+                    do_tasks = progress(processed.load(), nodes_count);
+                return do_tasks.load();
+            });
 
-        // At the end report the latest numbers, because the reporter thread may be finished earlier
-        progress(processed.load(), nodes_count);
+            // At the end report the latest numbers, because the reporter thread may be finished earlier
+            progress(processed.load(), nodes_count);
+            */
     }
 
   private:
@@ -3120,7 +3142,7 @@ class index_gt {
         node = node_t{};
     }
 
-    inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline node_t node_at_11_(std::size_t idx) const noexcept { return nodes_[idx]; }
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
     inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept {
@@ -3142,33 +3164,36 @@ class index_gt {
             ;
         return {nodes_mutexes_, slot};
     }
+    // ^^^ move these to storage
 
-    template <typename nodes_proxy_at, typename value_at, typename metric_at, typename prefetch_at>
-    void connect_node_across_levels_(                                                                //
-        nodes_proxy_at&& ext_node_at_, value_at&& value, metric_at&& metric, prefetch_at&& prefetch, //
-        std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level,      //
+    template <typename storage_at, typename value_at, typename metric_at, typename prefetch_at>
+    void connect_node_across_levels_(                                                           //
+        storage_at&& storage, value_at&& value, metric_at&& metric, prefetch_at&& prefetch,     //
+        std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level, //
         index_update_config_t const& config, context_t& context) usearch_noexcept_m {
         using vv = typename std::decay<decltype(*this)>::type::vector_key_t;
 
         // Go down the level, tracking only the closest match
         std::size_t closest_slot = search_for_one_( //
-            ext_node_at_, value, metric, prefetch,  //
+            value, storage, metric, prefetch,       //
             entry_slot, max_level, target_level, context);
 
         // From `target_level` down perform proper extensive search
         for (level_t level = (std::min)(target_level, max_level); level >= 0; --level) {
             // TODO: Handle out of memory conditions
-            search_to_insert_(value, metric, prefetch, closest_slot, node_slot, level, config.expansion, context);
-            closest_slot = connect_new_node_(metric, node_slot, level, context);
-            reconnect_neighbor_nodes_(metric, node_slot, value, level, context);
+            search_to_insert_(value, storage, metric, prefetch, closest_slot, node_slot, level, config.expansion,
+                              context);
+            closest_slot = connect_new_node_(storage, metric, node_slot, level, context);
+            reconnect_neighbor_nodes_(storage, metric, node_slot, value, level, context);
         }
     }
 
-    template <typename metric_at>
-    std::size_t connect_new_node_( //
-        metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
+    template <typename storage_at, typename metric_at>
+    std::size_t connect_new_node_(                //
+        storage_at&& storage, metric_at&& metric, //
+        std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = node_at_(new_slot);
+        node_t new_node = storage.node_at_(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -3179,7 +3204,7 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= node_at_(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage.node_at_(top_view[idx].slot).level(), "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -3187,12 +3212,12 @@ class index_gt {
         return new_neighbors[0];
     }
 
-    template <typename value_at, typename metric_at>
-    void reconnect_neighbor_nodes_( //
-        metric_at&& metric, std::size_t new_slot, value_at&& value, level_t level,
-        context_t& context) usearch_noexcept_m {
+    template <typename value_at, typename storage_at, typename metric_at>
+    void reconnect_neighbor_nodes_(               //
+        storage_at&& storage, metric_at&& metric, //
+        std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = node_at_(new_slot);
+        node_t new_node = storage.node_at_(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -3202,7 +3227,7 @@ class index_gt {
             if (close_slot == new_slot)
                 continue;
             node_lock_t close_lock = node_lock_(close_slot);
-            node_t close_node = node_at_(close_slot);
+            node_t close_node = storage.node_at_(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3284,7 +3309,7 @@ class index_gt {
         bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; }
         bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; }
 
-        vector_key_t key() const noexcept { return index_->node_at_(slot()).key(); }
+        // vector_key_t key() const noexcept { return index_->node_at_(slot()).key(); }
         compressed_slot_t slot() const noexcept { return neighbors_[current_]; }
         friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); }
         friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); }
@@ -3301,10 +3326,10 @@ class index_gt {
         candidates_iterator_t end() const noexcept { return {index, neighbors, visits, neighbors.size()}; }
     };
 
-    template <typename ext_node_at_at, typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
-    std::size_t search_for_one_(                                      //
-        ext_node_at_at&& ext_node_at_,                                //
-        value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
+    template <typename value_at, typename storage_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
+    std::size_t search_for_one_(                                          //
+        value_at&& query,                                                 //
+        storage_at&& storage, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept {
 
         visits_hash_set_t& visits = context.visits;
@@ -3320,12 +3345,12 @@ class index_gt {
             do {
                 changed = false;
                 node_lock_t closest_lock = node_lock_(closest_slot);
-                ext_node_at_.node_lock_(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level);
+                storage.node_lock_(closest_slot);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage.node_at_(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
-                node_t a = ext_node_at_(closest_slot);
+                node_t a = storage(closest_slot);
 
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
@@ -3353,9 +3378,9 @@ class index_gt {
      *          Locks the nodes in the process, assuming other threads are updating neighbors lists.
      *  @return `true` if procedure succeeded, `false` if run out of memory.
      */
-    template <typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
-    bool search_to_insert_(                                           //
-        value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
+    template <typename value_at, typename storage_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
+    bool search_to_insert_(                                                                 //
+        value_at&& query, storage_at&& storage, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t start_slot, std::size_t new_slot, level_t level, std::size_t top_limit,
         context_t& context) noexcept {
 
@@ -3390,7 +3415,7 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = node_at_(candidate_slot);
+            node_t candidate_ref = storage.node_at_(candidate_slot);
             node_lock_t candidate_lock = node_lock_(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
@@ -3427,9 +3452,9 @@ class index_gt {
      *          Doesn't lock any nodes, assuming read-only simultaneous access.
      *  @return `true` if procedure succeeded, `false` if run out of memory.
      */
-    template <typename value_at, typename metric_at, typename predicate_at, typename prefetch_at>
-    bool search_to_find_in_base_(                                                               //
-        value_at&& query, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, //
+    template <typename value_at, typename storage_at, typename metric_at, typename predicate_at, typename prefetch_at>
+    bool search_to_find_in_base_(                                                                                     //
+        value_at&& query, storage_at&& storage, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, //
         std::size_t start_slot, std::size_t expansion, context_t& context) const noexcept {
 
         visits_hash_set_t& visits = context.visits;
@@ -3461,7 +3486,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(node_at_(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage.node_at_(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3482,7 +3507,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage.node_at_(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 4c05b6905..225b6563c 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -2,9 +2,10 @@
 #include <stdlib.h> // `aligned_alloc`
 
 #include <functional> // `std::function`
-#include <numeric>    // `std::iota`
-#include <thread>     // `std::thread`
-#include <vector>     // `std::vector`
+#include <iostream>
+#include <numeric> // `std::iota`
+#include <thread>  // `std::thread`
+#include <vector>  // `std::vector`
 
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
@@ -279,11 +280,17 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
-template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot_t> class nodes_proxy_t {
-    using vector_key_t = nodes_proxy_key_t;
+template <typename storage_proxy_key_t, typename compressed_slot_at = default_slot_t> class storage_proxy_t {
+    using vector_key_t = storage_proxy_key_t;
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-    using nodes_t = std::vector<node_t<nodes_proxy_key_t>>;
+    using nodes_t = std::vector<node_t<storage_proxy_key_t>>;
+    /**
+     *  @brief  Integer for the number of node neighbors at a specific level of the
+     *          multi-level graph. It's selected to be `std::uint32_t` to improve the
+     *          alignment in most common cases.
+     */
+    using neighbors_count_t = std::uint32_t;
     // index_dense_gt const* index_ = nullptr;
     nodes_t* nodes_;
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
@@ -294,9 +301,27 @@ template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot
         inline ~node_lock_t() noexcept { /*mutexes.atomic_reset(slot);*/
         }
     };
+    struct precomputed_constants_t {
+        double inverse_log_connectivity{};
+        std::size_t neighbors_bytes{};
+        std::size_t neighbors_base_bytes{};
+    };
+
+    precomputed_constants_t pre_;
+
+    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
+        precomputed_constants_t pre;
+        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
+        pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_at) + sizeof(neighbors_count_t);
+        pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_at) + sizeof(neighbors_count_t);
+        return pre;
+    }
 
   public:
-    nodes_proxy_t(nodes_t* nodes) noexcept { nodes_ = nodes; }
+    storage_proxy_t(nodes_t* nodes, index_config_t config) noexcept {
+        nodes_ = nodes;
+        pre_ = precompute_(config);
+    }
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
@@ -306,6 +331,44 @@ template <typename nodes_proxy_key_t, typename compressed_slot_at = default_slot
         return v[slot];
     }
 
+    inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    // todo:: make these private
+    using node_t = node_t<vector_key_t>;
+    inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; }
+    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
+    inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); }
+    inline std::size_t node_neighbors_bytes_(level_t level) const noexcept {
+        return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level;
+    }
+    inline std::size_t node_bytes_(level_t level) const noexcept {
+        return node_head_bytes_() + node_neighbors_bytes_(level);
+    }
+    span_bytes_t node_malloc_(level_t level) noexcept {
+        std::size_t node_bytes = node_bytes_(level);
+        byte_t* data = (byte_t*)malloc(node_bytes);
+        return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
+    }
+
+    node_t node_make_(vector_key_t key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc_(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    void node_append_(vector_key_t key, level_t level) {
+        std::cout << "append caled\n";
+        nodes_->push_back(node_make_(key, level));
+    }
+
     inline node_lock_t node_lock_(std::size_t slot) const noexcept {
         // while (nodes_mutexes_.atomic_set(slot))
         //     ;
@@ -1797,7 +1860,7 @@ class index_dense_gt {
         update_config.thread = lock.thread_id;
         update_config.expansion = config_.expansion_add;
 
-        nodes_proxy_t<vector_key_t, compressed_slot_t> prox(&this->nodes_);
+        storage_proxy_t<vector_key_t, compressed_slot_t> prox(&this->nodes_, config_);
         metric_proxy_t metric{*this};
         return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, prox, metric,
                                            update_config, on_success)
@@ -1826,7 +1889,7 @@ class index_dense_gt {
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
 
-        auto prox = nodes_proxy_t<vector_key_t>(&this->nodes_);
+        auto prox = storage_proxy_t<vector_key_t>(&this->nodes_, config_);
         return typed_->search(vector_data, wanted, prox, metric_proxy_t{*this}, search_config, allow);
     }
 

From a16e11f6b88c02bbc67f4de777e5c36195ec11ab Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 31 Dec 2023 07:30:10 +0000
Subject: [PATCH 13/80] Move storage to a separate class and revert stats back
 to index

---
 cpp/test.cpp                    |   4 +-
 include/usearch/index.hpp       | 209 ++++++++++++++++----------------
 include/usearch/index_dense.hpp |  31 +++--
 3 files changed, 128 insertions(+), 116 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index c14ff4e2a..623bfd1dc 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -161,8 +161,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
-    using index_typed_t = index_gt<float, vector_key_t, slot_t>;
     using index_storage_t = storage_proxy_t<vector_key_t, uint32_t>;
+    using index_typed_t = index_gt<index_storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
 
@@ -197,9 +197,9 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         std::printf("- templates with connectivity %zu \n", connectivity);
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
-        index_typed_t index_typed(config);
         std::vector<node_t<vector_key_t>> nodes;
         index_storage_t storage{&nodes, config};
+        index_typed_t index_typed({&nodes, config}, config);
         test_cosine<false>(index_typed, matrix, storage, metric);
     }
 
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 52c83d4f8..d3d875f21 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1722,7 +1722,8 @@ static_assert(std::is_trivially_destructible<node_t<default_key_t>>::value, "Nod
  *      -   `member_gt` contains an already prefetched copy of the key.
  *
  */
-template <typename distance_at = default_distance_t,              //
+template <typename storage_at1,                                   //
+          typename distance_at = default_distance_t,              //
           typename key_at = default_key_t,                        //
           typename compressed_slot_at = default_slot_t,           //
           typename dynamic_allocator_at = std::allocator<byte_t>, //
@@ -1743,6 +1744,8 @@ class index_gt {
     template <typename v> using o_node_t = node_t<v>;
     using node_t = node_t<vector_key_t>;
 
+    using storage_t = storage_at1;
+
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
         using index_t = index_at;
@@ -1915,6 +1918,7 @@ class index_gt {
     };
 
     index_config_t config_{};
+    storage_at1 storage_;
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
@@ -1942,7 +1946,7 @@ class index_gt {
     using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
 
     /// @brief  C-style array of `node_t` smart-pointers.
-    buffer_gt<node_t, nodes_allocator_t> nodes_{};
+    // buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t nodes_mutexes_{};
@@ -1965,12 +1969,13 @@ class index_gt {
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
-    explicit index_gt( //
+    explicit index_gt(       //
+        storage_at1 storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {},
         tape_allocator_t tape_allocator = {}) noexcept
-        : config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
+        : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
           tape_allocator_(std::move(tape_allocator)), pre_(precompute_(config)), nodes_count_(0u), max_level_(-1),
-          entry_slot_(0u), nodes_(), nodes_mutexes_(), contexts_() {}
+          entry_slot_(0u), nodes_mutexes_(), contexts_() {}
 
     /**
      *  @brief  Clones the structure with the same hyper-parameters, but without contents.
@@ -2006,12 +2011,14 @@ class index_gt {
 
         // Now all is left - is to allocate new `node_t` instances and populate
         // the `other.nodes_` array into it.
-        for (std::size_t i = 0; i != nodes_count_; ++i)
-            other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i]));
 
-        other.nodes_count_ = nodes_count_.load();
-        other.max_level_ = max_level_;
-        other.entry_slot_ = entry_slot_;
+        assert(false);
+        // for (std::size_t i = 0; i != nodes_count_; ++i)
+        //     other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i]));
+
+        // other.nodes_count_ = nodes_count_.load();
+        // other.max_level_ = max_level_;
+        // other.entry_slot_ = entry_slot_;
 
         // This controls nothing for now :)
         (void)config;
@@ -2025,8 +2032,8 @@ class index_gt {
     member_iterator_t begin() noexcept { return {this, 0}; }
     member_iterator_t end() noexcept { return {this, size()}; }
 
-    member_ref_t at(std::size_t slot) noexcept { return {nodes_[slot].key(), slot}; }
-    member_cref_t at(std::size_t slot) const noexcept { return {nodes_[slot].ckey(), slot}; }
+    member_ref_t at(std::size_t slot) noexcept { return {storage_.node_at_(slot).key(), slot}; }
+    member_cref_t at(std::size_t slot) const noexcept { return {storage_.node_at_(slot).ckey(), slot}; }
     member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; }
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
@@ -2042,12 +2049,14 @@ class index_gt {
      *  Will keep the number of available threads/contexts the same as it was.
      */
     void clear() noexcept {
-        if (!has_reset<tape_allocator_t>()) {
-            std::size_t n = nodes_count_;
-            for (std::size_t i = 0; i != n; ++i)
-                node_free_(i);
-        } else
-            tape_allocator_.deallocate(nullptr, 0);
+        // if (!has_reset<tape_allocator_t>()) {
+        //     std::size_t n = nodes_count_;
+        //     for (std::size_t i = 0; i != n; ++i)
+        //         node_free_(i);
+        // } else
+        //     tape_allocator_.deallocate(nullptr, 0);
+        storage_.clear();
+
         nodes_count_ = 0;
         max_level_ = -1;
         entry_slot_ = 0u;
@@ -2063,7 +2072,7 @@ class index_gt {
     void reset() noexcept {
         clear();
 
-        nodes_ = {};
+        storage_.reset();
         contexts_ = {};
         nodes_mutexes_ = {};
         limits_ = index_limits_t{0, 0};
@@ -2084,7 +2093,7 @@ class index_gt {
         std::swap(viewed_file_, other.viewed_file_);
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
-        std::swap(nodes_, other.nodes_);
+        // std::swap(nodes_, other.nodes_);
         std::swap(nodes_mutexes_, other.nodes_mutexes_);
         std::swap(contexts_, other.contexts_);
 
@@ -2109,18 +2118,17 @@ class index_gt {
             return true;
 
         nodes_mutexes_t new_mutexes(limits.members);
-        buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
+        // buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
-        if (!new_nodes || !new_contexts || !new_mutexes)
+        if (!new_contexts || !new_mutexes)
             return false;
 
         // Move the nodes info, and deallocate previous buffers.
-        if (nodes_)
-            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
+        // if (nodes_)
+        //     std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
 
         limits_ = limits;
         nodes_capacity_ = limits.members;
-        nodes_ = std::move(new_nodes);
         contexts_ = std::move(new_contexts);
         nodes_mutexes_ = std::move(new_mutexes);
         return true;
@@ -2176,12 +2184,12 @@ class index_gt {
     };
 
     class search_result_t {
-        node_t const* nodes_{};
+        storage_t const* storage_{};
         top_candidates_t const* top_{};
 
         friend class index_gt;
         inline search_result_t(index_gt const& index, top_candidates_t& top) noexcept
-            : nodes_(index.nodes_), top_(&top) {}
+            : storage_(&index.storage_), top_(&top) {}
 
       public:
         /** @brief  Number of search results found. */
@@ -2217,7 +2225,8 @@ class index_gt {
         inline match_t at(std::size_t i) const noexcept {
             candidate_t const* top_ordered = top_->data();
             candidate_t candidate = top_ordered[i];
-            node_t node = nodes_[candidate.slot];
+            // node_t node = nodes_[candidate.slot];
+            node_t node = storage_->node_at_(candidate.slot);
             return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance};
         }
         inline std::size_t merge_into(                 //
@@ -2346,7 +2355,7 @@ class index_gt {
         if (target_level <= max_level_copy)
             new_level_lock.unlock();
 
-        nodes_[new_slot] = node;
+        // nodes_[new_slot] = node;
         storage.node_append_(key, target_level);
         node = storage.node_at_(new_slot);
 
@@ -2598,21 +2607,18 @@ class index_gt {
     stats_t stats() const noexcept {
         stats_t result{};
 
-        assert(false);
-        /*
-            for (std::size_t i = 0; i != size(); ++i) {
-                node_t node = node_at_(i);
-                std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
-                std::size_t edges = 0;
-                for (level_t level = 0; level <= node.level(); ++level)
-                    edges += neighbors_(node, level).size();
-
-                ++result.nodes;
-                result.allocated_bytes += node_bytes_(node).size();
-                result.edges += edges;
-                result.max_edges += max_edges;
-            }
-            */
+        for (std::size_t i = 0; i != size(); ++i) {
+            node_t node = storage_.node_at_(i);
+            std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
+            std::size_t edges = 0;
+            for (level_t level = 0; level <= node.level(); ++level)
+                edges += neighbors_(node, level).size();
+
+            ++result.nodes;
+            result.allocated_bytes += node_bytes_(node).size();
+            result.edges += edges;
+            result.max_edges += max_edges;
+        }
         return result;
     }
 
@@ -2620,48 +2626,44 @@ class index_gt {
         stats_t result{};
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
-        assert(false);
-        /*
-            for (std::size_t i = 0; i != size(); ++i) {
-                node_t node = node_at_(i);
-                if (static_cast<std::size_t>(node.level()) < level)
-                    continue;
+        for (std::size_t i = 0; i != size(); ++i) {
+            node_t node = storage_.node_at_(i);
+            if (static_cast<std::size_t>(node.level()) < level)
+                continue;
 
-                ++result.nodes;
-                result.edges += neighbors_(node, level).size();
-                result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
-            }
+            ++result.nodes;
+            result.edges += neighbors_(node, level).size();
+            result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
+        }
 
-            std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
-            result.max_edges = result.nodes * max_edges_per_node;
-            */
+        std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
+        result.max_edges = result.nodes * max_edges_per_node;
         return result;
     }
 
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept {
 
         std::size_t head_bytes = node_head_bytes_();
-        assert(false);
-        /*
-            for (std::size_t i = 0; i != size(); ++i) {
-                node_t node = node_at_(i);
-
-                stats_per_level[0].nodes++;
-                stats_per_level[0].edges += neighbors_(node, 0).size();
-                stats_per_level[0].allocated_bytes += pre_.neighbors_base_bytes + head_bytes;
-
-                level_t node_level = static_cast<level_t>(node.level());
-                for (level_t l = 1; l <= (std::min)(node_level, static_cast<level_t>(max_level)); ++l) {
-                    stats_per_level[l].nodes++;
-                    stats_per_level[l].edges += neighbors_(node, l).size();
-                    stats_per_level[l].allocated_bytes += pre_.neighbors_bytes;
-                }
+        for (std::size_t i = 0; i != size(); ++i) {
+            node_t node = storage_.node_at_(i);
+
+            stats_per_level[0].nodes++;
+            stats_per_level[0].edges += neighbors_(node, 0).size();
+            stats_per_level[0].allocated_bytes += pre_.neighbors_base_bytes + head_bytes;
+
+            level_t node_level = static_cast<level_t>(node.level());
+            for (level_t l = 1; l <= (std::min)(node_level, static_cast<level_t>(max_level)); ++l) {
+                stats_per_level[l].nodes++;
+                stats_per_level[l].edges += neighbors_(node, l).size();
+                stats_per_level[l].allocated_bytes += pre_.neighbors_bytes;
             }
+        }
         // The `max_edges` parameter can be inferred from `nodes`
         stats_per_level[0].max_edges = stats_per_level[0].nodes * config_.connectivity_base;
         for (std::size_t l = 1; l <= max_level; ++l)
             stats_per_level[l].max_edges = stats_per_level[l].nodes * config_.connectivity;
 
+        stats_t result{};
         // Aggregate stats across levels
         for (std::size_t l = 0; l <= max_level; ++l)
             result.nodes += stats_per_level[l].nodes,                         //
@@ -2669,8 +2671,6 @@ class index_gt {
                 result.allocated_bytes += stats_per_level[l].allocated_bytes, //
                 result.max_edges += stats_per_level[l].max_edges;             //
 
-    */
-        stats_t result{};
         return result;
     }
 
@@ -2707,12 +2707,9 @@ class index_gt {
      */
     std::size_t serialized_length() const noexcept {
         std::size_t neighbors_length = 0;
-        assert(false);
 
-        /*
-            for (std::size_t i = 0; i != size(); ++i)
-                neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t);
-            */
+        for (std::size_t i = 0; i != size(); ++i)
+            neighbors_length += node_bytes_(storage_.node_at_(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2741,26 +2738,24 @@ class index_gt {
         // Export the number of levels per node
         // That is both enough to estimate the overall memory consumption,
         // and to be able to estimate the offsets of every entry in the file.
-        /*
-            for (std::size_t i = 0; i != header.size; ++i) {
-                node_t node = node_at_(i);
-                level_t level = node.level();
-                if (!output(&level, sizeof(level)))
-                    return result.failed("Failed to serialize into stream");
-                if (!progress(++processed, total))
-                    return result.failed("Terminated by user");
-            }
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_t node = storage_.node_at_(i);
+            level_t level = node.level();
+            if (!output(&level, sizeof(level)))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
 
-            // After that dump the nodes themselves
-            for (std::size_t i = 0; i != header.size; ++i) {
-                span_bytes_t node_bytes = node_bytes_(node_at_(i));
-                if (!output(node_bytes.data(), node_bytes.size()))
-                    return result.failed("Failed to serialize into stream");
-                if (!progress(++processed, total))
-                    return result.failed("Terminated by user");
-            }
+        // After that dump the nodes themselves
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = node_bytes_(storage_.node_at_(i));
+            if (!output(node_bytes.data(), node_bytes.size()))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
 
-    */
         return {};
     }
 
@@ -2810,12 +2805,14 @@ class index_gt {
 
         // Load the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_malloc_(levels[i]);
+            span_bytes_t node_bytes = storage_.node_malloc_(levels[i]);
             if (!input(node_bytes.data(), node_bytes.size())) {
                 reset();
                 return result.failed("Failed to pull nodes from the stream");
             }
-            nodes_[i] = node_t{node_bytes.data()};
+            // nodes_[i] = node_t{node_bytes.data()};
+            storage_.node_append_(node_t{node_bytes.data()});
+
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
         }
@@ -2987,9 +2984,10 @@ class index_gt {
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
 
+        assert(false);
         // Rapidly address all the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
-            nodes_[i] = node_t{(byte_t*)file.data() + offsets[i]};
+            storage_.node_append_(node_t{(byte_t*)file.data() + offsets[i]});
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
         }
@@ -3096,6 +3094,7 @@ class index_gt {
         return pre;
     }
 
+    // move these to storage
     using span_bytes_t = span_gt<byte_t>;
 
     inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; }
@@ -3137,12 +3136,12 @@ class index_gt {
         if (viewed_file_)
             return;
 
-        node_t& node = nodes_[idx];
-        tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
-        node = node_t{};
+        // node_t& node = nodes_[idx];
+        // tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
+        // node = node_t{};
     }
 
-    inline node_t node_at_11_(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline node_t node_at_11_(std::size_t idx) const noexcept { return storage_.node_at_(idx); /* nodes_[idx]; */ }
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
     inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept {
@@ -3171,7 +3170,6 @@ class index_gt {
         storage_at&& storage, value_at&& value, metric_at&& metric, prefetch_at&& prefetch,     //
         std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level, //
         index_update_config_t const& config, context_t& context) usearch_noexcept_m {
-        using vv = typename std::decay<decltype(*this)>::type::vector_key_t;
 
         // Go down the level, tracking only the closest match
         std::size_t closest_slot = search_for_one_( //
@@ -3350,7 +3348,6 @@ class index_gt {
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
-                node_t a = storage(closest_slot);
 
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 225b6563c..9847c30f3 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1,4 +1,5 @@
 #pragma once
+#include <cstring>
 #include <stdlib.h> // `aligned_alloc`
 
 #include <functional> // `std::function`
@@ -293,6 +294,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     using neighbors_count_t = std::uint32_t;
     // index_dense_gt const* index_ = nullptr;
     nodes_t* nodes_;
+    index_config_t config_;
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t nodes_mutexes_{};
     struct node_lock_t {
@@ -321,18 +323,26 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     storage_proxy_t(nodes_t* nodes, index_config_t config) noexcept {
         nodes_ = nodes;
         pre_ = precompute_(config);
+        config_ = config;
     }
+    // copy constructor. todo:: safety??
+    storage_proxy_t(storage_proxy_t& other) noexcept : nodes_(other.nodes_), pre_(other.pre_), config_(other.config_) {}
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
         nodes_t v = *nodes_;
-        if (slot >= v.size())
-            v.resize(slot + 1);
+        usearch_assert_m(slot < v.size(), "Storage node index out of bounds");
         return v[slot];
     }
 
     inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
 
+    void clear() { nodes_->clear(); }
+    void reset() {
+        nodes_->clear();
+        nodes_->shrink_to_fit();
+    }
+
     using span_bytes_t = span_gt<byte_t>;
 
     // todo:: make these private
@@ -349,6 +359,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     span_bytes_t node_malloc_(level_t level) noexcept {
         std::size_t node_bytes = node_bytes_(level);
         byte_t* data = (byte_t*)malloc(node_bytes);
+        std::memset(data, 0, node_bytes);
         return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
     }
 
@@ -369,11 +380,14 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         nodes_->push_back(node_make_(key, level));
     }
 
+    void node_append_(node_t node) { nodes_->push_back(node); }
+
     inline node_lock_t node_lock_(std::size_t slot) const noexcept {
         // while (nodes_mutexes_.atomic_set(slot))
         //     ;
         return {nodes_mutexes_, slot};
     }
+    inline size_t size() { return nodes_->size(); }
 };
 // template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
 // nodes_proxy_t<vector_key_t> make_storage(index_dense_gt<key_at, compressed_slot_at>index) { return
@@ -402,6 +416,7 @@ class index_dense_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
+    using storage_t = storage_proxy_t<vector_key_t>;
     using metric_t = metric_punned_t;
 
     using member_ref_t = member_ref_gt<vector_key_t>;
@@ -421,6 +436,7 @@ class index_dense_gt {
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
     using index_t = index_gt<                        //
+        storage_t,                                   //
         distance_t, vector_key_t, compressed_slot_t, //
         dynamic_allocator_t, tape_allocator_t>;
     using index_allocator_t = aligned_allocator_gt<index_t, 64>;
@@ -480,6 +496,7 @@ class index_dense_gt {
 
     /// @brief  C-style array of `node_t` smart-pointers.
     std::vector<node_t<key_t>> nodes_;
+    storage_t storage_{&nodes_, config_};
 
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
@@ -616,7 +633,7 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        new (raw) index_t(config);
+        new (raw) index_t({&result.nodes_, config}, config);
         result.typed_ = raw;
         return result;
     }
@@ -1860,11 +1877,10 @@ class index_dense_gt {
         update_config.thread = lock.thread_id;
         update_config.expansion = config_.expansion_add;
 
-        storage_proxy_t<vector_key_t, compressed_slot_t> prox(&this->nodes_, config_);
         metric_proxy_t metric{*this};
-        return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, prox, metric,
+        return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, storage_, metric,
                                            update_config, on_success)
-                          : typed_->add(key, vector_data, prox, metric, update_config, on_success);
+                          : typed_->add(key, vector_data, storage_, metric, update_config, on_success);
     }
 
     template <typename scalar_at>
@@ -1889,8 +1905,7 @@ class index_dense_gt {
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
 
-        auto prox = storage_proxy_t<vector_key_t>(&this->nodes_, config_);
-        return typed_->search(vector_data, wanted, prox, metric_proxy_t{*this}, search_config, allow);
+        return typed_->search(vector_data, wanted, storage_, metric_proxy_t{*this}, search_config, allow);
     }
 
     template <typename scalar_at>

From 76325fda41b83a842f436481696adfcddbef345b Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 1 Jan 2024 22:31:13 +0000
Subject: [PATCH 14/80] External storage with usearch working

passing all functional tests, but there are memory leaks
---
 cpp/test.cpp                    |  8 ++--
 include/usearch/index.hpp       | 76 +++++++++++++++-----------------
 include/usearch/index_dense.hpp | 78 ++++++++++++++++++++++++++-------
 3 files changed, 102 insertions(+), 60 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 623bfd1dc..e037ef49b 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -161,7 +161,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
-    using index_storage_t = storage_proxy_t<vector_key_t, uint32_t>;
+    using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
     using index_typed_t = index_gt<index_storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
@@ -198,8 +198,10 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
         std::vector<node_t<vector_key_t>> nodes;
-        index_storage_t storage{&nodes, config};
-        index_typed_t index_typed({&nodes, config}, config);
+        std::mutex vector_lock;
+        bitset_gt nodes_mutexes;
+        index_storage_t storage{&nodes, &vector_lock, &nodes_mutexes, config};
+        index_typed_t index_typed(storage, config);
         test_cosine<false>(index_typed, matrix, storage, metric);
     }
 
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index d3d875f21..7205867ad 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1827,8 +1827,6 @@ class index_gt {
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
 
-    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
     struct precomputed_constants_t {
@@ -1948,8 +1946,6 @@ class index_gt {
     /// @brief  C-style array of `node_t` smart-pointers.
     // buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
 
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
 
@@ -1975,7 +1971,7 @@ class index_gt {
         tape_allocator_t tape_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
           tape_allocator_(std::move(tape_allocator)), pre_(precompute_(config)), nodes_count_(0u), max_level_(-1),
-          entry_slot_(0u), nodes_mutexes_(), contexts_() {}
+          entry_slot_(0u), contexts_() {}
 
     /**
      *  @brief  Clones the structure with the same hyper-parameters, but without contents.
@@ -2074,7 +2070,6 @@ class index_gt {
 
         storage_.reset();
         contexts_ = {};
-        nodes_mutexes_ = {};
         limits_ = index_limits_t{0, 0};
         nodes_capacity_ = 0;
         viewed_file_ = memory_mapped_file_t{};
@@ -2094,7 +2089,7 @@ class index_gt {
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
         // std::swap(nodes_, other.nodes_);
-        std::swap(nodes_mutexes_, other.nodes_mutexes_);
+        // std::swap(nodes_mutexes_, other.nodes_mutexes_);
         std::swap(contexts_, other.contexts_);
 
         // Non-atomic parts.
@@ -2117,10 +2112,10 @@ class index_gt {
             && limits.members <= limits_.members)
             return true;
 
-        nodes_mutexes_t new_mutexes(limits.members);
+        bool storage_reserved = storage_.reserve(limits.members);
         // buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
-        if (!new_contexts || !new_mutexes)
+        if (!new_contexts || !storage_reserved)
             return false;
 
         // Move the nodes info, and deallocate previous buffers.
@@ -2130,7 +2125,6 @@ class index_gt {
         limits_ = limits;
         nodes_capacity_ = limits.members;
         contexts_ = std::move(new_contexts);
-        nodes_mutexes_ = std::move(new_mutexes);
         return true;
     }
 
@@ -2356,13 +2350,13 @@ class index_gt {
             new_level_lock.unlock();
 
         // nodes_[new_slot] = node;
-        storage.node_append_(key, target_level);
-        node = storage.node_at_(new_slot);
+        storage_.node_append_(new_slot, key, target_level);
+        node = storage_.node_at_(new_slot);
 
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
-        node_lock_t new_lock = node_lock_(new_slot);
+        auto new_lock = storage_.node_lock_(new_slot);
 
         // Do nothing for the first element
         if (!new_slot) {
@@ -2449,8 +2443,8 @@ class index_gt {
         if (!next.reserve(config.expansion))
             return result.failed("Out of memory!");
 
-        node_lock_t new_lock = node_lock_(old_slot);
-        node_t node = storage.node_at_(old_slot);
+        auto new_lock = storage_.node_lock_(old_slot);
+        node_t node = storage_.node_at_(old_slot);
 
         level_t node_level = node.level();
         span_bytes_t node_bytes = node_bytes_(node);
@@ -2811,7 +2805,7 @@ class index_gt {
                 return result.failed("Failed to pull nodes from the stream");
             }
             // nodes_[i] = node_t{node_bytes.data()};
-            storage_.node_append_(node_t{node_bytes.data()});
+            storage_.node_append_(i, node_t{node_bytes.data()});
 
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
@@ -2984,10 +2978,9 @@ class index_gt {
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
 
-        assert(false);
         // Rapidly address all the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
-            storage_.node_append_(node_t{(byte_t*)file.data() + offsets[i]});
+            storage_.node_append_(i, node_t{(byte_t*)file.data() + offsets[i]});
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
         }
@@ -3152,17 +3145,17 @@ class index_gt {
         return level ? neighbors_non_base_(node, level) : neighbors_base_(node);
     }
 
-    struct node_lock_t {
-        nodes_mutexes_t& mutexes;
-        std::size_t slot;
-        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
-    };
+    // struct node_lock_t {
+    //     nodes_mutexes_t& mutexes;
+    //     std::size_t slot;
+    //     inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
+    // };
 
-    inline node_lock_t node_lock_(std::size_t slot) const noexcept {
-        while (nodes_mutexes_.atomic_set(slot))
-            ;
-        return {nodes_mutexes_, slot};
-    }
+    // inline node_lock_t node_lock_(std::size_t slot) const noexcept {
+    //     while (nodes_mutexes_.atomic_set(slot))
+    //         ;
+    //     return {nodes_mutexes_, slot};
+    // }
     // ^^^ move these to storage
 
     template <typename storage_at, typename value_at, typename metric_at, typename prefetch_at>
@@ -3191,7 +3184,7 @@ class index_gt {
         storage_at&& storage, metric_at&& metric, //
         std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage.node_at_(new_slot);
+        node_t new_node = storage_.node_at_(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -3202,7 +3195,7 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= storage.node_at_(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage_.node_at_(top_view[idx].slot).level(), "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -3215,7 +3208,7 @@ class index_gt {
         storage_at&& storage, metric_at&& metric, //
         std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage.node_at_(new_slot);
+        node_t new_node = storage_.node_at_(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -3224,8 +3217,12 @@ class index_gt {
         for (compressed_slot_t close_slot : new_neighbors) {
             if (close_slot == new_slot)
                 continue;
-            node_lock_t close_lock = node_lock_(close_slot);
-            node_t close_node = storage.node_at_(close_slot);
+            // todo:: q:: I do not know all the idiosyncrasies of 'auto'. Is this a proper usage of this?
+            // I chose auto here to allow storage define its own lock smart pointer, without making assumptions
+            // about it here. BUt are there cases where, e.g. auto will pick up the lock in the wrong way and instantly
+            // drop it for example?
+            auto close_lock = storage_.node_lock_(close_slot);
+            node_t close_node = storage_.node_at_(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3342,9 +3339,8 @@ class index_gt {
             bool changed;
             do {
                 changed = false;
-                node_lock_t closest_lock = node_lock_(closest_slot);
-                storage.node_lock_(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage.node_at_(closest_slot), level);
+                auto closest_lock = storage_.node_lock_(closest_slot);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.node_at_(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
@@ -3412,8 +3408,8 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = storage.node_at_(candidate_slot);
-            node_lock_t candidate_lock = node_lock_(candidate_slot);
+            node_t candidate_ref = storage_.node_at_(candidate_slot);
+            auto candidate_lock = storage_.node_lock_(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
             // Optional prefetching
@@ -3483,7 +3479,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(storage.node_at_(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.node_at_(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3504,7 +3500,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{storage.node_at_(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage_.node_at_(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 9847c30f3..4f3d15252 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -284,7 +284,8 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
 template <typename storage_proxy_key_t, typename compressed_slot_at = default_slot_t> class storage_proxy_t {
     using vector_key_t = storage_proxy_key_t;
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_gt<>;
     using nodes_t = std::vector<node_t<storage_proxy_key_t>>;
     /**
      *  @brief  Integer for the number of node neighbors at a specific level of the
@@ -295,13 +296,13 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     // index_dense_gt const* index_ = nullptr;
     nodes_t* nodes_;
     index_config_t config_;
+    mutable std::mutex* vector_lock_;
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
+    mutable nodes_mutexes_t* nodes_mutexes_{};
     struct node_lock_t {
         nodes_mutexes_t& mutexes;
         std::size_t slot;
-        inline ~node_lock_t() noexcept { /*mutexes.atomic_reset(slot);*/
-        }
+        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
     };
     struct precomputed_constants_t {
         double inverse_log_connectivity{};
@@ -320,25 +321,46 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
   public:
-    storage_proxy_t(nodes_t* nodes, index_config_t config) noexcept {
+    storage_proxy_t(nodes_t* nodes, std::mutex* vector_lock, nodes_mutexes_t* nodes_mutexes,
+                    index_config_t config) noexcept {
         nodes_ = nodes;
+        vector_lock_ = vector_lock;
+        nodes_mutexes_ = nodes_mutexes;
         pre_ = precompute_(config);
         config_ = config;
     }
     // copy constructor. todo:: safety??
-    storage_proxy_t(storage_proxy_t& other) noexcept : nodes_(other.nodes_), pre_(other.pre_), config_(other.config_) {}
+    // storage_proxy_t(storage_proxy_t& other) noexcept : nodes_(other.nodes_), pre_(other.pre_), config_(other.config_)
+    // {}
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
+        std::unique_lock<std::mutex> lock(*vector_lock_);
         nodes_t v = *nodes_;
         usearch_assert_m(slot < v.size(), "Storage node index out of bounds");
         return v[slot];
     }
 
     inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
+    // todo:: reserve is not thread safe if another thread is running search or insert
+    bool reserve(std::size_t count) {
+        std::unique_lock<std::mutex> lock(*vector_lock_);
+        if (count < nodes_->size())
+            return true;
+        nodes_mutexes_t new_mutexes(count);
+        *nodes_mutexes_ = std::move(new_mutexes);
+        nodes_->resize(count);
+        return true;
+    }
 
-    void clear() { nodes_->clear(); }
+    void clear() {
+        std::unique_lock<std::mutex> lock(*vector_lock_);
+        nodes_mutexes_->clear();
+        // std::fill(nodes_->begin(), nodes_->end(), 0);
+    }
     void reset() {
+        std::unique_lock<std::mutex> lock(*vector_lock_);
+        *nodes_mutexes_ = {};
         nodes_->clear();
         nodes_->shrink_to_fit();
     }
@@ -375,17 +397,35 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         return node;
     }
 
-    void node_append_(vector_key_t key, level_t level) {
-        std::cout << "append caled\n";
-        nodes_->push_back(node_make_(key, level));
+    void node_append_(size_t slot, vector_key_t key, level_t level) {
+        std::unique_lock<std::mutex> lock(*vector_lock_);
+
+        auto count = nodes_->size();
+        if (count > nodes_mutexes_->size()) {
+            assert(false);
+            nodes_mutexes_t new_mutexes(count);
+            *nodes_mutexes_ = std::move(new_mutexes);
+        }
+        (*nodes_)[slot] = node_make_(key, level);
     }
 
-    void node_append_(node_t node) { nodes_->push_back(node); }
+    void node_append_(size_t slot, node_t node) {
+        std::unique_lock<std::mutex> lock(*vector_lock_);
+
+        auto count = nodes_->size();
+        if (count > nodes_mutexes_->size()) {
+            assert(false);
+            nodes_mutexes_t new_mutexes(count);
+            *nodes_mutexes_ = std::move(new_mutexes);
+        }
+        (*nodes_)[slot] = node;
+    }
 
+    /// -------- node locking logic
     inline node_lock_t node_lock_(std::size_t slot) const noexcept {
-        // while (nodes_mutexes_.atomic_set(slot))
-        //     ;
-        return {nodes_mutexes_, slot};
+        while (nodes_mutexes_->atomic_set(slot))
+            ;
+        return {*nodes_mutexes_, slot};
     }
     inline size_t size() { return nodes_->size(); }
 };
@@ -416,7 +456,7 @@ class index_dense_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
-    using storage_t = storage_proxy_t<vector_key_t>;
+    using storage_t = storage_proxy_t<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
     using member_ref_t = member_ref_gt<vector_key_t>;
@@ -496,7 +536,9 @@ class index_dense_gt {
 
     /// @brief  C-style array of `node_t` smart-pointers.
     std::vector<node_t<key_t>> nodes_;
-    storage_t storage_{&nodes_, config_};
+    std::mutex vector_mutex_;
+    bitset_t nodes_mutexes_;
+    storage_t storage_{&nodes_, &vector_mutex_, &nodes_mutexes_, config_};
 
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
@@ -633,7 +675,9 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        new (raw) index_t({&result.nodes_, config}, config);
+        result.storage_ = storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.vector_mutex_,
+                                                                           &result.nodes_mutexes_, config};
+        new (raw) index_t(result.storage_, config);
         result.typed_ = raw;
         return result;
     }

From c36756ba11f3eb6a59fb546755ccbc45d7ca604d Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 1 Jan 2024 22:41:18 +0000
Subject: [PATCH 15/80] Add size to bitset_gt

---
 include/usearch/index.hpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 7205867ad..369c62529 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -402,6 +402,8 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     static constexpr std::size_t slots(std::size_t bits) { return divide_round_up<bits_per_slot()>(bits); }
 
     compressed_slot_t* slots_{};
+    /// @brief capacitiy - number of bits in the bitset
+    std::size_t capacity_{};
     /// @brief Number of slots.
     std::size_t count_{};
 
@@ -410,6 +412,7 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     ~bitset_gt() noexcept { reset(); }
 
     explicit operator bool() const noexcept { return slots_; }
+    std::size_t size() const noexcept { return capacity_; }
     void clear() noexcept {
         if (slots_)
             std::memset(slots_, 0, count_ * sizeof(compressed_slot_t));
@@ -424,18 +427,20 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
 
     bitset_gt(std::size_t capacity) noexcept
         : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))),
-          count_(slots_ ? slots(capacity) : 0u) {
+          capacity_(slots_ ? capacity : 0u), count_(slots_ ? slots(capacity) : 0u) {
         clear();
     }
 
     bitset_gt(bitset_gt&& other) noexcept {
         slots_ = exchange(other.slots_, nullptr);
         count_ = exchange(other.count_, 0);
+        capacity_ = exchange(other.capacity_, 0);
     }
 
     bitset_gt& operator=(bitset_gt&& other) noexcept {
         std::swap(slots_, other.slots_);
         std::swap(count_, other.count_);
+        std::swap(capacity_, other.capacity_);
         return *this;
     }
 

From 9b30845ae3f369cdf20e30dae2c9a5a7ae5986ac Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 1 Jan 2024 22:41:42 +0000
Subject: [PATCH 16/80] ammend to the one before the last one

---
 include/usearch/index_dense.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 4f3d15252..10fc3618e 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -345,7 +345,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(std::size_t count) {
         std::unique_lock<std::mutex> lock(*vector_lock_);
-        if (count < nodes_->size())
+        if (count < nodes_mutexes_->size())
             return true;
         nodes_mutexes_t new_mutexes(count);
         *nodes_mutexes_ = std::move(new_mutexes);

From b78f4f9d756c6d41421abb1859e27a44ae19b7cc Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 1 Jan 2024 23:05:37 +0000
Subject: [PATCH 17/80] Remove per-function call storage argument in favor of
 global storage

---
 cpp/test.cpp                    |  2 +-
 include/usearch/index.hpp       | 74 +++++++++++++--------------------
 include/usearch/index_dense.hpp |  8 ++--
 3 files changed, 35 insertions(+), 49 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index e037ef49b..d1e502f3f 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -202,7 +202,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         bitset_gt nodes_mutexes;
         index_storage_t storage{&nodes, &vector_lock, &nodes_mutexes, config};
         index_typed_t index_typed(storage, config);
-        test_cosine<false>(index_typed, matrix, storage, metric);
+        test_cosine<false>(index_typed, matrix, metric);
     }
 
     // Type-punned:
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 369c62529..cc11e5e13 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1727,7 +1727,7 @@ static_assert(std::is_trivially_destructible<node_t<default_key_t>>::value, "Nod
  *      -   `member_gt` contains an already prefetched copy of the key.
  *
  */
-template <typename storage_at1,                                   //
+template <typename storage_at,                                    //
           typename distance_at = default_distance_t,              //
           typename key_at = default_key_t,                        //
           typename compressed_slot_at = default_slot_t,           //
@@ -1749,8 +1749,6 @@ class index_gt {
     template <typename v> using o_node_t = node_t<v>;
     using node_t = node_t<vector_key_t>;
 
-    using storage_t = storage_at1;
-
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
         using index_t = index_at;
@@ -1921,7 +1919,7 @@ class index_gt {
     };
 
     index_config_t config_{};
-    storage_at1 storage_;
+    storage_at storage_;
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
@@ -1951,7 +1949,6 @@ class index_gt {
     /// @brief  C-style array of `node_t` smart-pointers.
     // buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
-
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
 
     /// @brief  Array of thread-specific buffers for temporary data.
@@ -1970,8 +1967,8 @@ class index_gt {
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
-    explicit index_gt(       //
-        storage_at1 storage, //
+    explicit index_gt(      //
+        storage_at storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {},
         tape_allocator_t tape_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
@@ -2298,14 +2295,12 @@ class index_gt {
      */
     template <                                   //
         typename value_at,                       //
-        typename storage_at,                     //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
     add_result_t add(                           //
         vector_key_t key, value_at&& value,     //
-        storage_at&& storage,                   //
         metric_at&& metric,                     //
         index_update_config_t config = {},      //
         callback_at&& callback = callback_at{}, //
@@ -2375,7 +2370,6 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                                //
-            storage,                                                //
             value, metric, prefetch,                                //
             new_slot, entry_idx_copy, max_level_copy, target_level, //
             config, context);
@@ -2413,7 +2407,6 @@ class index_gt {
      */
     template <                                   //
         typename value_at,                       //
-        typename storage_at,                     //
         typename metric_at,                      //
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
@@ -2422,7 +2415,6 @@ class index_gt {
         member_iterator_t iterator,             //
         vector_key_t key,                       //
         value_at&& value,                       //
-        storage_at&& storage,                   //
         metric_at&& metric,                     //
         index_update_config_t config = {},      //
         callback_at&& callback = callback_at{}, //
@@ -2461,7 +2453,6 @@ class index_gt {
         result.visited_members = context.iteration_cycles;
 
         connect_node_across_levels_(                       //
-            storage,                                       //
             value, metric, prefetch,                       //
             old_slot, entry_slot_, max_level_, node_level, //
             config, context);
@@ -2487,7 +2478,6 @@ class index_gt {
      */
     template <                                     //
         typename value_at,                         //
-        typename storage_at,                       //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
@@ -2495,7 +2485,6 @@ class index_gt {
     search_result_t search(                        //
         value_at&& query,                          //
         std::size_t wanted,                        //
-        storage_at&& storage,                      //
         metric_at&& metric,                        //
         index_search_config_t config = {},         //
         predicate_at&& predicate = predicate_at{}, //
@@ -2523,11 +2512,10 @@ class index_gt {
             if (!top.reserve(expansion))
                 return result.failed("Out of memory!");
 
-            std::size_t closest_slot =
-                search_for_one_(query, storage, metric, prefetch, entry_slot_, max_level_, 0, context);
+            std::size_t closest_slot = search_for_one_(query, metric, prefetch, entry_slot_, max_level_, 0, context);
 
             // For bottom layer we need a more optimized procedure
-            if (!search_to_find_in_base_(query, storage, metric, predicate, prefetch, closest_slot, expansion, context))
+            if (!search_to_find_in_base_(query, metric, predicate, prefetch, closest_slot, expansion, context))
                 return result.failed("Out of memory!");
         }
 
@@ -2552,7 +2540,6 @@ class index_gt {
      */
     template <                                     //
         typename value_at,                         //
-        typename storage_at,                       //
         typename metric_at,                        //
         typename predicate_at = dummy_predicate_t, //
         typename prefetch_at = dummy_prefetch_t    //
@@ -2560,7 +2547,6 @@ class index_gt {
     cluster_result_t cluster(                      //
         value_at&& query,                          //
         std::size_t level,                         //
-        storage_at&& storage,                      //
         metric_at&& metric,                        //
         index_cluster_config_t config = {},        //
         predicate_at&& predicate = predicate_at{}, //
@@ -2581,7 +2567,7 @@ class index_gt {
             return result.failed("Out of memory!");
 
         result.cluster.member =
-            at(search_for_one_(query, storage, metric, prefetch, entry_slot_, max_level_, level - 1, context));
+            at(search_for_one_(query, metric, prefetch, entry_slot_, max_level_, level - 1, context));
         result.cluster.distance = context.measure(query, result.cluster.member, metric);
 
         // Normalize stats
@@ -3084,6 +3070,7 @@ class index_gt {
     }
 
   private:
+    // todo:: only needed in storage
     inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
         precomputed_constants_t pre;
         pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
@@ -3163,30 +3150,29 @@ class index_gt {
     // }
     // ^^^ move these to storage
 
-    template <typename storage_at, typename value_at, typename metric_at, typename prefetch_at>
+    template <typename value_at, typename metric_at, typename prefetch_at>
     void connect_node_across_levels_(                                                           //
-        storage_at&& storage, value_at&& value, metric_at&& metric, prefetch_at&& prefetch,     //
+        value_at&& value, metric_at&& metric, prefetch_at&& prefetch,                           //
         std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level, //
         index_update_config_t const& config, context_t& context) usearch_noexcept_m {
 
         // Go down the level, tracking only the closest match
         std::size_t closest_slot = search_for_one_( //
-            value, storage, metric, prefetch,       //
+            value, metric, prefetch,                //
             entry_slot, max_level, target_level, context);
 
         // From `target_level` down perform proper extensive search
         for (level_t level = (std::min)(target_level, max_level); level >= 0; --level) {
             // TODO: Handle out of memory conditions
-            search_to_insert_(value, storage, metric, prefetch, closest_slot, node_slot, level, config.expansion,
-                              context);
-            closest_slot = connect_new_node_(storage, metric, node_slot, level, context);
-            reconnect_neighbor_nodes_(storage, metric, node_slot, value, level, context);
+            search_to_insert_(value, metric, prefetch, closest_slot, node_slot, level, config.expansion, context);
+            closest_slot = connect_new_node_(metric, node_slot, level, context);
+            reconnect_neighbor_nodes_(metric, node_slot, value, level, context);
         }
     }
 
-    template <typename storage_at, typename metric_at>
-    std::size_t connect_new_node_(                //
-        storage_at&& storage, metric_at&& metric, //
+    template <typename metric_at>
+    std::size_t connect_new_node_( //
+        metric_at&& metric,        //
         std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
         node_t new_node = storage_.node_at_(new_slot);
@@ -3208,9 +3194,9 @@ class index_gt {
         return new_neighbors[0];
     }
 
-    template <typename value_at, typename storage_at, typename metric_at>
-    void reconnect_neighbor_nodes_(               //
-        storage_at&& storage, metric_at&& metric, //
+    template <typename value_at, typename metric_at>
+    void reconnect_neighbor_nodes_( //
+        metric_at&& metric,         //
         std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
 
         node_t new_node = storage_.node_at_(new_slot);
@@ -3326,10 +3312,10 @@ class index_gt {
         candidates_iterator_t end() const noexcept { return {index, neighbors, visits, neighbors.size()}; }
     };
 
-    template <typename value_at, typename storage_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
-    std::size_t search_for_one_(                                          //
-        value_at&& query,                                                 //
-        storage_at&& storage, metric_at&& metric, prefetch_at&& prefetch, //
+    template <typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
+    std::size_t search_for_one_(                    //
+        value_at&& query,                           //
+        metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept {
 
         visits_hash_set_t& visits = context.visits;
@@ -3376,9 +3362,9 @@ class index_gt {
      *          Locks the nodes in the process, assuming other threads are updating neighbors lists.
      *  @return `true` if procedure succeeded, `false` if run out of memory.
      */
-    template <typename value_at, typename storage_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
-    bool search_to_insert_(                                                                 //
-        value_at&& query, storage_at&& storage, metric_at&& metric, prefetch_at&& prefetch, //
+    template <typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
+    bool search_to_insert_(                                           //
+        value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t start_slot, std::size_t new_slot, level_t level, std::size_t top_limit,
         context_t& context) noexcept {
 
@@ -3450,9 +3436,9 @@ class index_gt {
      *          Doesn't lock any nodes, assuming read-only simultaneous access.
      *  @return `true` if procedure succeeded, `false` if run out of memory.
      */
-    template <typename value_at, typename storage_at, typename metric_at, typename predicate_at, typename prefetch_at>
-    bool search_to_find_in_base_(                                                                                     //
-        value_at&& query, storage_at&& storage, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, //
+    template <typename value_at, typename metric_at, typename predicate_at, typename prefetch_at>
+    bool search_to_find_in_base_(                                                               //
+        value_at&& query, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, //
         std::size_t start_slot, std::size_t expansion, context_t& context) const noexcept {
 
         visits_hash_set_t& visits = context.visits;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 10fc3618e..ec6818e18 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1922,9 +1922,9 @@ class index_dense_gt {
         update_config.expansion = config_.expansion_add;
 
         metric_proxy_t metric{*this};
-        return reuse_node ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, storage_, metric,
-                                           update_config, on_success)
-                          : typed_->add(key, vector_data, storage_, metric, update_config, on_success);
+        return reuse_node
+                   ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, update_config, on_success)
+                   : typed_->add(key, vector_data, metric, update_config, on_success);
     }
 
     template <typename scalar_at>
@@ -1949,7 +1949,7 @@ class index_dense_gt {
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
 
-        return typed_->search(vector_data, wanted, storage_, metric_proxy_t{*this}, search_config, allow);
+        return typed_->search(vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
     }
 
     template <typename scalar_at>

From ec9369aca58810408a20ac2a60924f58cbf171c7 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 1 Jan 2024 23:11:55 +0000
Subject: [PATCH 18/80] Get rid of global storage lock

---
 cpp/test.cpp                    |  3 +--
 include/usearch/index.hpp       |  1 +
 include/usearch/index_dense.hpp | 21 +++++----------------
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index d1e502f3f..c0bea4495 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -198,9 +198,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
         std::vector<node_t<vector_key_t>> nodes;
-        std::mutex vector_lock;
         bitset_gt nodes_mutexes;
-        index_storage_t storage{&nodes, &vector_lock, &nodes_mutexes, config};
+        index_storage_t storage{&nodes, &nodes_mutexes, config};
         index_typed_t index_typed(storage, config);
         test_cosine<false>(index_typed, matrix, metric);
     }
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index cc11e5e13..95e5d8100 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1735,6 +1735,7 @@ template <typename storage_at,                                    //
           typename tape_allocator_at = dynamic_allocator_at>      //
 class index_gt {
   public:
+    using storage_t = storage_at;
     using distance_t = distance_at;
     using vector_key_t = key_at;
     using key_t = vector_key_t;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index ec6818e18..464b3a3b8 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -293,10 +293,9 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
      *          alignment in most common cases.
      */
     using neighbors_count_t = std::uint32_t;
-    // index_dense_gt const* index_ = nullptr;
+
     nodes_t* nodes_;
     index_config_t config_;
-    mutable std::mutex* vector_lock_;
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t* nodes_mutexes_{};
     struct node_lock_t {
@@ -321,10 +320,8 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
   public:
-    storage_proxy_t(nodes_t* nodes, std::mutex* vector_lock, nodes_mutexes_t* nodes_mutexes,
-                    index_config_t config) noexcept {
+    storage_proxy_t(nodes_t* nodes, nodes_mutexes_t* nodes_mutexes, index_config_t config) noexcept {
         nodes_ = nodes;
-        vector_lock_ = vector_lock;
         nodes_mutexes_ = nodes_mutexes;
         pre_ = precompute_(config);
         config_ = config;
@@ -335,7 +332,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
-        std::unique_lock<std::mutex> lock(*vector_lock_);
         nodes_t v = *nodes_;
         usearch_assert_m(slot < v.size(), "Storage node index out of bounds");
         return v[slot];
@@ -344,7 +340,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
     // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(std::size_t count) {
-        std::unique_lock<std::mutex> lock(*vector_lock_);
         if (count < nodes_mutexes_->size())
             return true;
         nodes_mutexes_t new_mutexes(count);
@@ -354,12 +349,10 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
     void clear() {
-        std::unique_lock<std::mutex> lock(*vector_lock_);
         nodes_mutexes_->clear();
         // std::fill(nodes_->begin(), nodes_->end(), 0);
     }
     void reset() {
-        std::unique_lock<std::mutex> lock(*vector_lock_);
         *nodes_mutexes_ = {};
         nodes_->clear();
         nodes_->shrink_to_fit();
@@ -398,8 +391,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
     void node_append_(size_t slot, vector_key_t key, level_t level) {
-        std::unique_lock<std::mutex> lock(*vector_lock_);
-
         auto count = nodes_->size();
         if (count > nodes_mutexes_->size()) {
             assert(false);
@@ -410,8 +401,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
     void node_append_(size_t slot, node_t node) {
-        std::unique_lock<std::mutex> lock(*vector_lock_);
-
         auto count = nodes_->size();
         if (count > nodes_mutexes_->size()) {
             assert(false);
@@ -538,7 +527,7 @@ class index_dense_gt {
     std::vector<node_t<key_t>> nodes_;
     std::mutex vector_mutex_;
     bitset_t nodes_mutexes_;
-    storage_t storage_{&nodes_, &vector_mutex_, &nodes_mutexes_, config_};
+    storage_t storage_{&nodes_, &nodes_mutexes_, config_};
 
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
@@ -675,8 +664,8 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        result.storage_ = storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.vector_mutex_,
-                                                                           &result.nodes_mutexes_, config};
+        result.storage_ =
+            storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.nodes_mutexes_, config};
         new (raw) index_t(result.storage_, config);
         result.typed_ = raw;
         return result;

From 7e966b54694da1ecbeaec5004f44e362cfacfb99 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 02:42:47 +0000
Subject: [PATCH 19/80] Fix memory leaks

---
 include/usearch/index.hpp       | 36 ++++++++++++++-------------------
 include/usearch/index_dense.hpp | 32 ++++++++++++++++++++++-------
 2 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 95e5d8100..f2061b538 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1920,7 +1920,7 @@ class index_gt {
     };
 
     index_config_t config_{};
-    storage_at storage_;
+    storage_t storage_{};
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
@@ -2048,12 +2048,17 @@ class index_gt {
      *  Will keep the number of available threads/contexts the same as it was.
      */
     void clear() noexcept {
-        // if (!has_reset<tape_allocator_t>()) {
-        //     std::size_t n = nodes_count_;
-        //     for (std::size_t i = 0; i != n; ++i)
-        //         node_free_(i);
-        // } else
-        //     tape_allocator_.deallocate(nullptr, 0);
+        if (!viewed_file_) {
+            std::size_t n = nodes_count_;
+            for (std::size_t i = 0; i != n; ++i) {
+                node_t node = storage_.node_at_(i);
+                // if (!has_reset<tape_allocator_t>()) {
+                storage_.node_free_(i, node);
+                // } else
+                //     tape_allocator_.deallocate(nullptr, 0);
+            }
+        }
+
         storage_.clear();
 
         nodes_count_ = 0;
@@ -2342,7 +2347,9 @@ class index_gt {
         }
 
         // Allocate the neighbors
-        node_t node = node_make_(key, target_level);
+        // nodes_[new_slot] = node;
+        storage_.node_append_(new_slot, key, target_level);
+        node_t node = storage_.node_at_(new_slot);
         if (!node) {
             nodes_count_.fetch_sub(1);
             return result.failed("Out of memory!");
@@ -2350,10 +2357,6 @@ class index_gt {
         if (target_level <= max_level_copy)
             new_level_lock.unlock();
 
-        // nodes_[new_slot] = node;
-        storage_.node_append_(new_slot, key, target_level);
-        node = storage_.node_at_(new_slot);
-
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
@@ -3118,15 +3121,6 @@ class index_gt {
         return node_t{data};
     }
 
-    void node_free_(std::size_t idx) noexcept {
-        if (viewed_file_)
-            return;
-
-        // node_t& node = nodes_[idx];
-        // tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
-        // node = node_t{};
-    }
-
     inline node_t node_at_11_(std::size_t idx) const noexcept { return storage_.node_at_(idx); /* nodes_[idx]; */ }
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 464b3a3b8..5deb794a7 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -286,7 +286,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
     using nodes_mutexes_t = bitset_gt<>;
-    using nodes_t = std::vector<node_t<storage_proxy_key_t>>;
+    using nodes_t = std::vector<node_t<vector_key_t>>;
     /**
      *  @brief  Integer for the number of node neighbors at a specific level of the
      *          multi-level graph. It's selected to be `std::uint32_t` to improve the
@@ -294,8 +294,8 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
      */
     using neighbors_count_t = std::uint32_t;
 
-    nodes_t* nodes_;
-    index_config_t config_;
+    nodes_t* nodes_{};
+    index_config_t config_{};
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t* nodes_mutexes_{};
     struct node_lock_t {
@@ -309,7 +309,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         std::size_t neighbors_base_bytes{};
     };
 
-    precomputed_constants_t pre_;
+    precomputed_constants_t pre_{};
 
     inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
         precomputed_constants_t pre;
@@ -350,7 +350,8 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
 
     void clear() {
         nodes_mutexes_->clear();
-        // std::fill(nodes_->begin(), nodes_->end(), 0);
+        if (nodes_->data())
+            std::memset(nodes_->data(), 0, nodes_->size());
     }
     void reset() {
         *nodes_mutexes_ = {};
@@ -374,9 +375,18 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     span_bytes_t node_malloc_(level_t level) noexcept {
         std::size_t node_bytes = node_bytes_(level);
         byte_t* data = (byte_t*)malloc(node_bytes);
+        assert(data);
+
         std::memset(data, 0, node_bytes);
         return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
     }
+    void node_free_(size_t slot, node_t node) {
+        free(node.tape());
+        (*nodes_)[slot] = node_t{};
+        //  assert(false);
+        //    tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
+        //    node = node_t{};
+    }
 
     node_t node_make_(vector_key_t key, level_t level) noexcept {
         span_bytes_t node_bytes = node_malloc_(level);
@@ -397,7 +407,11 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
             nodes_mutexes_t new_mutexes(count);
             *nodes_mutexes_ = std::move(new_mutexes);
         }
-        (*nodes_)[slot] = node_make_(key, level);
+        node_t* slot_ref = &(*nodes_)[slot];
+        if (*slot_ref) {
+            assert(false);
+        }
+        *slot_ref = node_make_(key, level);
     }
 
     void node_append_(size_t slot, node_t node) {
@@ -407,7 +421,11 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
             nodes_mutexes_t new_mutexes(count);
             *nodes_mutexes_ = std::move(new_mutexes);
         }
-        (*nodes_)[slot] = node;
+        node_t* slot_ref = &(*nodes_)[slot];
+        if (*slot_ref) {
+            assert(false);
+        }
+        *slot_ref = node;
     }
 
     /// -------- node locking logic

From 41f60397eaa287be77a0a389bb885e8ea2f130da Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 04:33:46 +0000
Subject: [PATCH 20/80] Rename capacity to size in added bitset_gt size

---
 include/usearch/index.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index f2061b538..5e398a9bd 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -402,8 +402,8 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     static constexpr std::size_t slots(std::size_t bits) { return divide_round_up<bits_per_slot()>(bits); }
 
     compressed_slot_t* slots_{};
-    /// @brief capacitiy - number of bits in the bitset
-    std::size_t capacity_{};
+    /// @brief size - number of bits in the bitset
+    std::size_t size_{};
     /// @brief Number of slots.
     std::size_t count_{};
 
@@ -412,7 +412,7 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     ~bitset_gt() noexcept { reset(); }
 
     explicit operator bool() const noexcept { return slots_; }
-    std::size_t size() const noexcept { return capacity_; }
+    std::size_t size() const noexcept { return size_; }
     void clear() noexcept {
         if (slots_)
             std::memset(slots_, 0, count_ * sizeof(compressed_slot_t));
@@ -427,20 +427,20 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
 
     bitset_gt(std::size_t capacity) noexcept
         : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))),
-          capacity_(slots_ ? capacity : 0u), count_(slots_ ? slots(capacity) : 0u) {
+          size_(slots_ ? capacity : 0u), count_(slots_ ? slots(capacity) : 0u) {
         clear();
     }
 
     bitset_gt(bitset_gt&& other) noexcept {
         slots_ = exchange(other.slots_, nullptr);
         count_ = exchange(other.count_, 0);
-        capacity_ = exchange(other.capacity_, 0);
+        size_ = exchange(other.size_, 0);
     }
 
     bitset_gt& operator=(bitset_gt&& other) noexcept {
         std::swap(slots_, other.slots_);
         std::swap(count_, other.count_);
-        std::swap(capacity_, other.capacity_);
+        std::swap(size_, other.size_);
         return *this;
     }
 

From c28751a55f5d3c5c0a70118f2475c4e1e7e7d31d Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 04:38:07 +0000
Subject: [PATCH 21/80] Move node sizing functions to node_t definition

---
 include/usearch/index.hpp       | 84 +++++++++++++--------------------
 include/usearch/index_dense.hpp | 35 +++++++-------
 2 files changed, 48 insertions(+), 71 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5e398a9bd..844b32f11 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1606,6 +1606,13 @@ template <typename key_at> inline std::size_t get_slot(member_ref_gt<key_at> con
 template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m) noexcept { return m.key; }
 
 using level_t = std::int16_t;
+
+struct precomputed_constants_t {
+    double inverse_log_connectivity{};
+    std::size_t neighbors_bytes{};
+    std::size_t neighbors_base_bytes{};
+};
+
 // todo:: this is public, but then we make assumptions which are not communicated via this interface
 // clean these up later
 //
@@ -1627,11 +1634,26 @@ template <typename key_at> class node_t {
 
   public:
     using vector_key_t = key_at;
+    using span_bytes_t = span_gt<byte_t>;
     explicit node_t(byte_t* tape) noexcept : tape_(tape) {}
     byte_t* tape() const noexcept { return tape_; }
     byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
     explicit operator bool() const noexcept { return tape_; }
 
+    inline span_bytes_t node_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
+        return {node.tape(), node_bytes_(pre, node.level())};
+    }
+
+    inline std::size_t node_bytes_(const precomputed_constants_t& pre, level_t level) const noexcept {
+        return node_head_bytes_() + node_neighbors_bytes_(pre, level);
+    }
+    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
+        return node_neighbors_bytes_(pre, node.level());
+    }
+    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) const noexcept {
+        return pre.neighbors_base_bytes + pre.neighbors_bytes * level;
+    }
+
     node_t() = default;
     node_t(node_t const&) = default;
     node_t& operator=(node_t const&) = default;
@@ -1742,6 +1764,7 @@ class index_gt {
     using compressed_slot_t = compressed_slot_at;
     using dynamic_allocator_t = dynamic_allocator_at;
     using tape_allocator_t = tape_allocator_at;
+    using span_bytes_t = span_gt<byte_t>;
     static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense.");
 
     using member_cref_t = member_cref_gt<vector_key_t>;
@@ -1769,8 +1792,8 @@ class index_gt {
         using reference = ref_t;
 
         // todo:: take care of these to use external storage
-        reference operator*() const noexcept { return {index_->node_at_11_(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->node_at_11_(slot_).key(); }
+        reference operator*() const noexcept { return {index_->storage_.node_at_(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->storage_.node_at_(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -1833,11 +1856,6 @@ class index_gt {
 
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
-    struct precomputed_constants_t {
-        double inverse_log_connectivity{};
-        std::size_t neighbors_bytes{};
-        std::size_t neighbors_base_bytes{};
-    };
     /// @brief A space-efficient internal data-structure used in graph traversal queues.
     struct candidate_t {
         distance_t distance;
@@ -2448,7 +2466,7 @@ class index_gt {
         node_t node = storage_.node_at_(old_slot);
 
         level_t node_level = node.level();
-        span_bytes_t node_bytes = node_bytes_(node);
+        span_bytes_t node_bytes = node.node_bytes_(pre_, node);
         std::memset(node_bytes.data(), 0, node_bytes.size());
         node.level(node_level);
 
@@ -2604,7 +2622,7 @@ class index_gt {
                 edges += neighbors_(node, level).size();
 
             ++result.nodes;
-            result.allocated_bytes += node_bytes_(node).size();
+            result.allocated_bytes += storage_.node_size_bytes(i);
             result.edges += edges;
             result.max_edges += max_edges;
         }
@@ -2685,7 +2703,7 @@ class index_gt {
         return total;
     }
 
-    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_bytes_(level); }
+    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_t{}.node_bytes_(pre_, level); }
 
 #pragma endregion
 
@@ -2698,7 +2716,7 @@ class index_gt {
         std::size_t neighbors_length = 0;
 
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_bytes_(storage_.node_at_(i).level()) + sizeof(level_t);
+            neighbors_length += node_bytes_(pre_, storage_.node_at_(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2738,7 +2756,7 @@ class index_gt {
 
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_bytes_(storage_.node_at_(i));
+            span_bytes_t node_bytes = node_t{}.node_bytes_(pre_, storage_.node_at_(i));
             if (!output(node_bytes.data(), node_bytes.size()))
                 return result.failed("Failed to serialize into stream");
             if (!progress(++processed, total))
@@ -2954,9 +2972,9 @@ class index_gt {
         misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
         offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
         for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_bytes_(levels[i - 1]);
+            offsets[i] = offsets[i - 1] + node_t{}.node_bytes_(pre_, levels[i - 1]);
 
-        std::size_t total_bytes = offsets[header.size - 1] + node_bytes_(levels[header.size - 1]);
+        std::size_t total_bytes = offsets[header.size - 1] + node_t{}.node_bytes_(pre_, levels[header.size - 1]);
         if (file.size() < total_bytes) {
             reset();
             return result.failed("File is corrupted and can't fit all the nodes");
@@ -3083,44 +3101,6 @@ class index_gt {
         return pre;
     }
 
-    // move these to storage
-    using span_bytes_t = span_gt<byte_t>;
-
-    inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; }
-    inline std::size_t node_bytes_(level_t level) const noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(level);
-    }
-    inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); }
-    inline std::size_t node_neighbors_bytes_(level_t level) const noexcept {
-        return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level;
-    }
-
-    span_bytes_t node_malloc_(level_t level) noexcept {
-        std::size_t node_bytes = node_bytes_(level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_bytes);
-        return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
-    }
-
-    node_t node_make_(vector_key_t key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc_(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-        byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-        if (!data)
-            return {};
-        std::memcpy(data, old_bytes.data(), old_bytes.size());
-        return node_t{data};
-    }
-
     inline node_t node_at_11_(std::size_t idx) const noexcept { return storage_.node_at_(idx); /* nodes_[idx]; */ }
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 5deb794a7..5264a3530 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -303,11 +303,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         std::size_t slot;
         inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
     };
-    struct precomputed_constants_t {
-        double inverse_log_connectivity{};
-        std::size_t neighbors_bytes{};
-        std::size_t neighbors_base_bytes{};
-    };
 
     precomputed_constants_t pre_{};
 
@@ -326,9 +321,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         pre_ = precompute_(config);
         config_ = config;
     }
-    // copy constructor. todo:: safety??
-    // storage_proxy_t(storage_proxy_t& other) noexcept : nodes_(other.nodes_), pre_(other.pre_), config_(other.config_)
-    // {}
 
     // warning: key_t is used in sys/types.h
     inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
@@ -338,13 +330,19 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     }
 
     inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept {
+        return node_at_(idx).node_bytes_(pre_, node_at_(idx));
+    }
     // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(std::size_t count) {
+        assert(nodes_mutexes_->size() == nodes_->size());
         if (count < nodes_mutexes_->size())
             return true;
         nodes_mutexes_t new_mutexes(count);
         *nodes_mutexes_ = std::move(new_mutexes);
-        nodes_->resize(count);
+        if (count > nodes_->size())
+            nodes_->resize(count);
         return true;
     }
 
@@ -363,17 +361,8 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
 
     // todo:: make these private
     using node_t = node_t<vector_key_t>;
-    inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; }
-    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
-    inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); }
-    inline std::size_t node_neighbors_bytes_(level_t level) const noexcept {
-        return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level;
-    }
-    inline std::size_t node_bytes_(level_t level) const noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(level);
-    }
     span_bytes_t node_malloc_(level_t level) noexcept {
-        std::size_t node_bytes = node_bytes_(level);
+        std::size_t node_bytes = node_t{}.node_bytes_(pre_, level);
         byte_t* data = (byte_t*)malloc(node_bytes);
         assert(data);
 
@@ -400,6 +389,14 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
         return node;
     }
 
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
     void node_append_(size_t slot, vector_key_t key, level_t level) {
         auto count = nodes_->size();
         if (count > nodes_mutexes_->size()) {

From e2d2670f487bebe9e3716e19b823a6973bb25d86 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 04:40:53 +0000
Subject: [PATCH 22/80] add assert and get rid of strange resizing logic

---
 include/usearch/index_dense.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 5264a3530..e6e1aeec5 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -341,8 +341,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
             return true;
         nodes_mutexes_t new_mutexes(count);
         *nodes_mutexes_ = std::move(new_mutexes);
-        if (count > nodes_->size())
-            nodes_->resize(count);
+        nodes_->resize(count);
         return true;
     }
 

From d81d58d3fdee53cfcc623bcc972447bbe2b630cc Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 05:18:00 +0000
Subject: [PATCH 23/80] Improve node sizing api

---
 include/usearch/index.hpp       | 34 +++++++++++++++++----------------
 include/usearch/index_dense.hpp |  6 ++----
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 844b32f11..cbc99db7c 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1631,6 +1631,12 @@ template <typename key_at> class node_t {
      *  @brief  How many bytes of memory are needed to form the "head" of the node.
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
+    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
+        return node_neighbors_bytes_(pre, node.level());
+    }
+    static inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) noexcept {
+        return pre.neighbors_base_bytes + pre.neighbors_bytes * level;
+    }
 
   public:
     using vector_key_t = key_at;
@@ -1640,18 +1646,14 @@ template <typename key_at> class node_t {
     byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
     explicit operator bool() const noexcept { return tape_; }
 
-    inline span_bytes_t node_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
-        return {node.tape(), node_bytes_(pre, node.level())};
-    }
-
-    inline std::size_t node_bytes_(const precomputed_constants_t& pre, level_t level) const noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(pre, level);
+    inline span_bytes_t node_bytes(const precomputed_constants_t& pre) const noexcept {
+        return {tape(), node_size_bytes(pre, level())};
     }
-    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
-        return node_neighbors_bytes_(pre, node.level());
+    inline std::size_t node_size_bytes(const precomputed_constants_t& pre) noexcept {
+        return node_head_bytes_() + node_neighbors_bytes_(pre, level());
     }
-    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) const noexcept {
-        return pre.neighbors_base_bytes + pre.neighbors_bytes * level;
+    static inline std::size_t node_size_bytes(const precomputed_constants_t& pre, level_t level) noexcept {
+        return node_head_bytes_() + node_neighbors_bytes_(pre, level);
     }
 
     node_t() = default;
@@ -2466,7 +2468,7 @@ class index_gt {
         node_t node = storage_.node_at_(old_slot);
 
         level_t node_level = node.level();
-        span_bytes_t node_bytes = node.node_bytes_(pre_, node);
+        span_bytes_t node_bytes = node.node_bytes(pre_);
         std::memset(node_bytes.data(), 0, node_bytes.size());
         node.level(node_level);
 
@@ -2703,7 +2705,7 @@ class index_gt {
         return total;
     }
 
-    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_t{}.node_bytes_(pre_, level); }
+    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_t::node_size_bytes(pre_, level); }
 
 #pragma endregion
 
@@ -2716,7 +2718,7 @@ class index_gt {
         std::size_t neighbors_length = 0;
 
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_bytes_(pre_, storage_.node_at_(i).level()) + sizeof(level_t);
+            neighbors_length += node_t::node_size_bytes(pre_, storage_.node_at_(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2756,7 +2758,7 @@ class index_gt {
 
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_t{}.node_bytes_(pre_, storage_.node_at_(i));
+            span_bytes_t node_bytes = storage_.node_at_(i).node_bytes(pre_);
             if (!output(node_bytes.data(), node_bytes.size()))
                 return result.failed("Failed to serialize into stream");
             if (!progress(++processed, total))
@@ -2972,9 +2974,9 @@ class index_gt {
         misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
         offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
         for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_t{}.node_bytes_(pre_, levels[i - 1]);
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
 
-        std::size_t total_bytes = offsets[header.size - 1] + node_t{}.node_bytes_(pre_, levels[header.size - 1]);
+        std::size_t total_bytes = offsets[header.size - 1] + node_t::node_size_bytes(pre_, levels[header.size - 1]);
         if (file.size() < total_bytes) {
             reset();
             return result.failed("File is corrupted and can't fit all the nodes");
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index e6e1aeec5..c90c48185 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -331,9 +331,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
 
     inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
 
-    inline size_t node_size_bytes(std::size_t idx) const noexcept {
-        return node_at_(idx).node_bytes_(pre_, node_at_(idx));
-    }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at_(idx).node_size_bytes(pre_); }
     // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(std::size_t count) {
         assert(nodes_mutexes_->size() == nodes_->size());
@@ -361,7 +359,7 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     // todo:: make these private
     using node_t = node_t<vector_key_t>;
     span_bytes_t node_malloc_(level_t level) noexcept {
-        std::size_t node_bytes = node_t{}.node_bytes_(pre_, level);
+        std::size_t node_bytes = node_t::node_size_bytes(pre_, level);
         byte_t* data = (byte_t*)malloc(node_bytes);
         assert(data);
 

From f1b47f3d410af142441b836a865139404d689cdd Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 05:38:17 +0000
Subject: [PATCH 24/80] Improve node_t interface, move precompute_ inside

---
 cpp/test.cpp                    |  2 +-
 include/usearch/index.hpp       | 37 ++++++++++++++++++++++++---------
 include/usearch/index_dense.hpp | 33 ++++++++++-------------------
 3 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index c0bea4495..ac8b5d3e1 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -197,7 +197,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         std::printf("- templates with connectivity %zu \n", connectivity);
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
-        std::vector<node_t<vector_key_t>> nodes;
+        std::vector<node_at<vector_key_t, slot_t>> nodes;
         bitset_gt nodes_mutexes;
         index_storage_t storage{&nodes, &nodes_mutexes, config};
         index_typed_t index_typed(storage, config);
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index cbc99db7c..bddca2f18 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1624,14 +1624,14 @@ struct precomputed_constants_t {
  *          then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences
  *          for @b each-level.
  */
-template <typename key_at> class node_t {
+template <typename key_at, typename slot_at> class node_at {
     byte_t* tape_{};
 
     /**
      *  @brief  How many bytes of memory are needed to form the "head" of the node.
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
-    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_t node) const noexcept {
+    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_at node) const noexcept {
         return node_neighbors_bytes_(pre, node.level());
     }
     static inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) noexcept {
@@ -1640,8 +1640,15 @@ template <typename key_at> class node_t {
 
   public:
     using vector_key_t = key_at;
+    using slot_t = slot_at;
+    /**
+     *  @brief  Integer for the number of node neighbors at a specific level of the
+     *          multi-level graph. It's selected to be `std::uint32_t` to improve the
+     *          alignment in most common cases.
+     */
+    using neighbors_count_t = std::uint32_t;
     using span_bytes_t = span_gt<byte_t>;
-    explicit node_t(byte_t* tape) noexcept : tape_(tape) {}
+    explicit node_at(byte_t* tape) noexcept : tape_(tape) {}
     byte_t* tape() const noexcept { return tape_; }
     byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
     explicit operator bool() const noexcept { return tape_; }
@@ -1656,9 +1663,19 @@ template <typename key_at> class node_t {
         return node_head_bytes_() + node_neighbors_bytes_(pre, level);
     }
 
-    node_t() = default;
-    node_t(node_t const&) = default;
-    node_t& operator=(node_t const&) = default;
+    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
+        precomputed_constants_t pre;
+        // todo:: ask-Ashot:: inverse_log_connectibity does not relly belong here, but the other two do.
+        // maybe we can separate these?
+        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
+        pre.neighbors_bytes = config.connectivity * sizeof(slot_t) + sizeof(neighbors_count_t);
+        pre.neighbors_base_bytes = config.connectivity_base * sizeof(slot_t) + sizeof(neighbors_count_t);
+        return pre;
+    }
+
+    node_at() = default;
+    node_at(node_at const&) = default;
+    node_at& operator=(node_at const&) = default;
 
     misaligned_ref_gt<vector_key_t const> ckey() const noexcept { return {tape_}; }
     misaligned_ref_gt<vector_key_t> key() const noexcept { return {tape_}; }
@@ -1668,8 +1685,9 @@ template <typename key_at> class node_t {
     void level(level_t v) noexcept { return misaligned_store<level_t>(tape_ + sizeof(vector_key_t), v); }
 };
 
-static_assert(std::is_trivially_copy_constructible<node_t<default_key_t>>::value, "Nodes must be light!");
-static_assert(std::is_trivially_destructible<node_t<default_key_t>>::value, "Nodes must be light!");
+static_assert(std::is_trivially_copy_constructible<node_at<default_key_t, default_slot_t>>::value,
+              "Nodes must be light!");
+static_assert(std::is_trivially_destructible<node_at<default_key_t, default_slot_t>>::value, "Nodes must be light!");
 
 /**
  *  @brief  Approximate Nearest Neighbors Search @b index-structure using the
@@ -1772,8 +1790,7 @@ class index_gt {
     using member_cref_t = member_cref_gt<vector_key_t>;
     using member_ref_t = member_ref_gt<vector_key_t>;
 
-    template <typename v> using o_node_t = node_t<v>;
-    using node_t = node_t<vector_key_t>;
+    using node_t = node_at<vector_key_t, compressed_slot_t>;
 
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index c90c48185..30e510fe0 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -281,18 +281,13 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
-template <typename storage_proxy_key_t, typename compressed_slot_at = default_slot_t> class storage_proxy_t {
-    using vector_key_t = storage_proxy_key_t;
+template <typename key_at, typename compressed_slot_at> class storage_proxy_t {
+    using vector_key_t = key_at;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
     using nodes_mutexes_t = bitset_gt<>;
-    using nodes_t = std::vector<node_t<vector_key_t>>;
-    /**
-     *  @brief  Integer for the number of node neighbors at a specific level of the
-     *          multi-level graph. It's selected to be `std::uint32_t` to improve the
-     *          alignment in most common cases.
-     */
-    using neighbors_count_t = std::uint32_t;
+    using nodes_t = std::vector<node_t>;
 
     nodes_t* nodes_{};
     index_config_t config_{};
@@ -306,30 +301,22 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
 
     precomputed_constants_t pre_{};
 
-    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
-        precomputed_constants_t pre;
-        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
-        pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_at) + sizeof(neighbors_count_t);
-        pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_at) + sizeof(neighbors_count_t);
-        return pre;
-    }
-
   public:
     storage_proxy_t(nodes_t* nodes, nodes_mutexes_t* nodes_mutexes, index_config_t config) noexcept {
         nodes_ = nodes;
         nodes_mutexes_ = nodes_mutexes;
-        pre_ = precompute_(config);
+        pre_ = node_t::precompute_(config);
         config_ = config;
     }
 
     // warning: key_t is used in sys/types.h
-    inline node_t<vector_key_t> operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
+    inline node_t operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
         nodes_t v = *nodes_;
         usearch_assert_m(slot < v.size(), "Storage node index out of bounds");
         return v[slot];
     }
 
-    inline node_t<vector_key_t> node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
+    inline node_t node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at_(idx).node_size_bytes(pre_); }
     // todo:: reserve is not thread safe if another thread is running search or insert
@@ -357,7 +344,6 @@ template <typename storage_proxy_key_t, typename compressed_slot_at = default_sl
     using span_bytes_t = span_gt<byte_t>;
 
     // todo:: make these private
-    using node_t = node_t<vector_key_t>;
     span_bytes_t node_malloc_(level_t level) noexcept {
         std::size_t node_bytes = node_t::node_size_bytes(pre_, level);
         byte_t* data = (byte_t*)malloc(node_bytes);
@@ -457,7 +443,10 @@ class index_dense_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
+    // todo:: relationship betwen storage_t and node_t is strange
+    //  have to define the type twice.. storage_proxy_ assumes storage is in node_ts
     using storage_t = storage_proxy_t<vector_key_t, compressed_slot_at>;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
     using member_ref_t = member_ref_gt<vector_key_t>;
@@ -536,7 +525,7 @@ class index_dense_gt {
     mutable std::vector<byte_t*> vectors_lookup_;
 
     /// @brief  C-style array of `node_t` smart-pointers.
-    std::vector<node_t<key_t>> nodes_;
+    std::vector<node_t> nodes_;
     std::mutex vector_mutex_;
     bitset_t nodes_mutexes_;
     storage_t storage_{&nodes_, &nodes_mutexes_, config_};

From b48a6577dd2d0f17c7b1114bf1478c0554aeb4bf Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 05:47:23 +0000
Subject: [PATCH 25/80] Clean node allocation API

---
 include/usearch/index.hpp       |  9 ++++-----
 include/usearch/index_dense.hpp | 24 +-----------------------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index bddca2f18..0fa3f5410 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2384,9 +2384,8 @@ class index_gt {
         }
 
         // Allocate the neighbors
-        // nodes_[new_slot] = node;
-        storage_.node_append_(new_slot, key, target_level);
-        node_t node = storage_.node_at_(new_slot);
+        node_t node = storage_.node_make_(key, target_level);
+        storage_.node_store(new_slot, node);
         if (!node) {
             nodes_count_.fetch_sub(1);
             return result.failed("Out of memory!");
@@ -2837,7 +2836,7 @@ class index_gt {
                 return result.failed("Failed to pull nodes from the stream");
             }
             // nodes_[i] = node_t{node_bytes.data()};
-            storage_.node_append_(i, node_t{node_bytes.data()});
+            storage_.node_store(i, node_t{node_bytes.data()});
 
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
@@ -3012,7 +3011,7 @@ class index_gt {
 
         // Rapidly address all the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
-            storage_.node_append_(i, node_t{(byte_t*)file.data() + offsets[i]});
+            storage_.node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
         }
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 30e510fe0..7e6842742 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -380,31 +380,9 @@ template <typename key_at, typename compressed_slot_at> class storage_proxy_t {
     //     return node_t{data};
     // }
 
-    void node_append_(size_t slot, vector_key_t key, level_t level) {
+    void node_store(size_t slot, node_t node) noexcept {
         auto count = nodes_->size();
-        if (count > nodes_mutexes_->size()) {
-            assert(false);
-            nodes_mutexes_t new_mutexes(count);
-            *nodes_mutexes_ = std::move(new_mutexes);
-        }
-        node_t* slot_ref = &(*nodes_)[slot];
-        if (*slot_ref) {
-            assert(false);
-        }
-        *slot_ref = node_make_(key, level);
-    }
-
-    void node_append_(size_t slot, node_t node) {
-        auto count = nodes_->size();
-        if (count > nodes_mutexes_->size()) {
-            assert(false);
-            nodes_mutexes_t new_mutexes(count);
-            *nodes_mutexes_ = std::move(new_mutexes);
-        }
         node_t* slot_ref = &(*nodes_)[slot];
-        if (*slot_ref) {
-            assert(false);
-        }
         *slot_ref = node;
     }
 

From ee9bb58ecb3f20d14d2451de2a54f2069f03dba4 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 08:03:48 +0000
Subject: [PATCH 26/80] Make storage pass-by reference for ergonimics

---
 cpp/test.cpp                    |   6 +-
 include/usearch/index.hpp       |  11 +++-
 include/usearch/index_dense.hpp | 112 ++++++++++++++++++++++++++++++--
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index ac8b5d3e1..b08cc3618 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -161,7 +161,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
-    using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
+    // using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
+    using index_storage_t = dummy_storage_single_threaded<vector_key_t, slot_t>;
     using index_typed_t = index_gt<index_storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
@@ -199,7 +200,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         index_config_t config(connectivity);
         std::vector<node_at<vector_key_t, slot_t>> nodes;
         bitset_gt nodes_mutexes;
-        index_storage_t storage{&nodes, &nodes_mutexes, config};
+        // index_storage_t storage{&nodes, &nodes_mutexes, config};
+        index_storage_t storage{config};
         index_typed_t index_typed(storage, config);
         test_cosine<false>(index_typed, matrix, metric);
     }
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 0fa3f5410..92e46ac8d 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1699,6 +1699,11 @@ static_assert(std::is_trivially_destructible<node_at<default_key_t, default_slot
  *  not just within equi-dimensional vectors. Examples range from texts to similar Chess
  *  positions.
  *
+ *  @tparam storage_at
+ *      The storage provider for index_gt. The index uses the storage_at
+ *      API to store and retrieve hnsw index nodes and vectors.
+ *      see `dummy_storage_single_threaded` for a minimal storage implementation
+ *      and interface reference for storage_at
  *  @tparam key_at
  *      The type of primary objects stored in the index.
  *      The values, to which those map, are not managed by the same index structure.
@@ -1956,8 +1961,8 @@ class index_gt {
         }
     };
 
+    const storage_t& storage_;
     index_config_t config_{};
-    storage_t storage_{};
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
@@ -2005,8 +2010,8 @@ class index_gt {
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
-    explicit index_gt(      //
-        storage_at storage, //
+    explicit index_gt(       //
+        storage_at& storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {},
         tape_allocator_t tape_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 7e6842742..7987c0447 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -281,6 +281,105 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
+template <typename key_at, typename compressed_slot_at,
+          typename tape_allocator_at = std::allocator<byte_t>> //
+class dummy_storage_single_threaded {
+    using node_t = node_at<key_at, compressed_slot_at>;
+    using nodes_t = std::vector<node_t>;
+
+    nodes_t nodes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_;
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+  public:
+    dummy_storage_single_threaded(index_config_t config) : pre_(node_t::precompute_(config)) {}
+
+    inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at_(idx).node_size_bytes(pre_); }
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_.resize(count);
+        return true;
+    }
+
+    void clear() {
+        if (nodes_.data())
+            std::memset(nodes_.data(), 0, nodes_.size());
+    }
+    void reset() {
+        nodes_.clear();
+        nodes_.shrink_to_fit();
+    }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    span_bytes_t node_malloc_(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free_(size_t slot, node_t node) {
+        tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make_(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc_(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept {
+        auto count = nodes_.size();
+        nodes_[slot] = node;
+    }
+    inline size_t size() { return nodes_.size(); }
+    inline int node_lock_(std::size_t) const noexcept { return 0; }
+};
+
+template <typename key_at, typename compressed_slot_at> class storage_v1 {
+    using vector_key_t = key_at;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
+    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_gt<>;
+    using nodes_t = std::vector<node_t>;
+
+    index_config_t config_{};
+    nodes_t nodes_{};
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+};
+
+template <typename key_at, typename compressed_slot_at> class storage_v2 {
+    using vector_key_t = key_at;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
+    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_gt<>;
+    using nodes_t = std::vector<node_t>;
+};
+
 template <typename key_at, typename compressed_slot_at> class storage_proxy_t {
     using vector_key_t = key_at;
     using node_t = node_at<vector_key_t, compressed_slot_at>;
@@ -423,7 +522,8 @@ class index_dense_gt {
     using distance_t = distance_punned_t;
     // todo:: relationship betwen storage_t and node_t is strange
     //  have to define the type twice.. storage_proxy_ assumes storage is in node_ts
-    using storage_t = storage_proxy_t<vector_key_t, compressed_slot_at>;
+    // using storage_t = storage_proxy_t<vector_key_t, compressed_slot_at>;
+    using storage2_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_at>;
     using node_t = node_at<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
@@ -444,7 +544,7 @@ class index_dense_gt {
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
     using index_t = index_gt<                        //
-        storage_t,                                   //
+        storage2_t,                                  //
         distance_t, vector_key_t, compressed_slot_t, //
         dynamic_allocator_t, tape_allocator_t>;
     using index_allocator_t = aligned_allocator_gt<index_t, 64>;
@@ -506,7 +606,8 @@ class index_dense_gt {
     std::vector<node_t> nodes_;
     std::mutex vector_mutex_;
     bitset_t nodes_mutexes_;
-    storage_t storage_{&nodes_, &nodes_mutexes_, config_};
+    // storage_t storage_{&nodes_, &nodes_mutexes_, config_};
+    storage2_t storage_{config_};
 
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
@@ -643,8 +744,9 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        result.storage_ =
-            storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.nodes_mutexes_, config};
+        // result.storage_ =
+        //     storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.nodes_mutexes_, config};
+        result.storage_ = dummy_storage_single_threaded<vector_key_t, compressed_slot_t>(config);
         new (raw) index_t(result.storage_, config);
         result.typed_ = raw;
         return result;

From 73cf2a69d3606f085ba15abc05aba61727508fb8 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 08:04:10 +0000
Subject: [PATCH 27/80] Add exchange fix an Q for Ashot

---
 include/usearch/index_dense.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 7987c0447..0af5d7599 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -670,7 +670,11 @@ class index_dense_gt {
     index_dense_gt(index_dense_gt&& other)
         : config_(std::move(other.config_)),
 
-          typed_(exchange(other.typed_, nullptr)),     //
+          // todo:: ask-Ashot: is the following change ok? why is it needed
+          // for some reason exchange stopped working after I added allocator to strage
+          // it was complaining about some ambiguity
+          // typed_(exchange(other.typed_, nullptr)),     //
+          typed_(std::move(other.typed_)),             //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //

From 731dfa2d7f90c01c5267b9862e848a3a94af9ba3 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 08:21:51 +0000
Subject: [PATCH 28/80] get rid of underscores in storage function names

---
 include/usearch/index.hpp       | 84 +++++++++++++++------------------
 include/usearch/index_dense.hpp | 15 +++---
 2 files changed, 45 insertions(+), 54 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 92e46ac8d..197c5e262 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1703,7 +1703,10 @@ static_assert(std::is_trivially_destructible<node_at<default_key_t, default_slot
  *      The storage provider for index_gt. The index uses the storage_at
  *      API to store and retrieve hnsw index nodes and vectors.
  *      see `dummy_storage_single_threaded` for a minimal storage implementation
- *      and interface reference for storage_at
+ *      and interface reference for storage_at.
+ *      NOTE: Storage object is taken by reference. It is the caller's responsibility
+ *      to make sure the reference is valid whenever the index is being used
+ *
  *  @tparam key_at
  *      The type of primary objects stored in the index.
  *      The values, to which those map, are not managed by the same index structure.
@@ -1816,8 +1819,8 @@ class index_gt {
         using reference = ref_t;
 
         // todo:: take care of these to use external storage
-        reference operator*() const noexcept { return {index_->storage_.node_at_(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->storage_.node_at_(slot_).key(); }
+        reference operator*() const noexcept { return {index_->storage_.node_at(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->storage_.node_at(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -2073,8 +2076,8 @@ class index_gt {
     member_iterator_t begin() noexcept { return {this, 0}; }
     member_iterator_t end() noexcept { return {this, size()}; }
 
-    member_ref_t at(std::size_t slot) noexcept { return {storage_.node_at_(slot).key(), slot}; }
-    member_cref_t at(std::size_t slot) const noexcept { return {storage_.node_at_(slot).ckey(), slot}; }
+    member_ref_t at(std::size_t slot) noexcept { return {storage_.node_at(slot).key(), slot}; }
+    member_cref_t at(std::size_t slot) const noexcept { return {storage_.node_at(slot).ckey(), slot}; }
     member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; }
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
@@ -2093,9 +2096,9 @@ class index_gt {
         if (!viewed_file_) {
             std::size_t n = nodes_count_;
             for (std::size_t i = 0; i != n; ++i) {
-                node_t node = storage_.node_at_(i);
+                node_t node = storage_.node_at(i);
                 // if (!has_reset<tape_allocator_t>()) {
-                storage_.node_free_(i, node);
+                storage_.node_free(i, node);
                 // } else
                 //     tape_allocator_.deallocate(nullptr, 0);
             }
@@ -2155,6 +2158,7 @@ class index_gt {
      *  @brief  Increases the `capacity()` of the index to allow adding more vectors.
      *  @return `true` on success, `false` on memory allocation errors.
      */
+    // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(index_limits_t limits) usearch_noexcept_m {
 
         if (limits.threads_add <= limits_.threads_add          //
@@ -2270,7 +2274,7 @@ class index_gt {
             candidate_t const* top_ordered = top_->data();
             candidate_t candidate = top_ordered[i];
             // node_t node = nodes_[candidate.slot];
-            node_t node = storage_->node_at_(candidate.slot);
+            node_t node = storage_->node_at(candidate.slot);
             return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance};
         }
         inline std::size_t merge_into(                 //
@@ -2389,7 +2393,7 @@ class index_gt {
         }
 
         // Allocate the neighbors
-        node_t node = storage_.node_make_(key, target_level);
+        node_t node = storage_.node_make(key, target_level);
         storage_.node_store(new_slot, node);
         if (!node) {
             nodes_count_.fetch_sub(1);
@@ -2401,7 +2405,7 @@ class index_gt {
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
-        auto new_lock = storage_.node_lock_(new_slot);
+        auto new_lock = storage_.node_lock(new_slot);
 
         // Do nothing for the first element
         if (!new_slot) {
@@ -2485,8 +2489,8 @@ class index_gt {
         if (!next.reserve(config.expansion))
             return result.failed("Out of memory!");
 
-        auto new_lock = storage_.node_lock_(old_slot);
-        node_t node = storage_.node_at_(old_slot);
+        auto new_lock = storage_.node_lock(old_slot);
+        node_t node = storage_.node_at(old_slot);
 
         level_t node_level = node.level();
         span_bytes_t node_bytes = node.node_bytes(pre_);
@@ -2638,7 +2642,7 @@ class index_gt {
         stats_t result{};
 
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at_(i);
+            node_t node = storage_.node_at(i);
             std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
             std::size_t edges = 0;
             for (level_t level = 0; level <= node.level(); ++level)
@@ -2657,7 +2661,7 @@ class index_gt {
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at_(i);
+            node_t node = storage_.node_at(i);
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
 
@@ -2675,7 +2679,7 @@ class index_gt {
 
         std::size_t head_bytes = node_head_bytes_();
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at_(i);
+            node_t node = storage_.node_at(i);
 
             stats_per_level[0].nodes++;
             stats_per_level[0].edges += neighbors_(node, 0).size();
@@ -2739,7 +2743,7 @@ class index_gt {
         std::size_t neighbors_length = 0;
 
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_t::node_size_bytes(pre_, storage_.node_at_(i).level()) + sizeof(level_t);
+            neighbors_length += node_t::node_size_bytes(pre_, storage_.node_at(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2769,7 +2773,7 @@ class index_gt {
         // That is both enough to estimate the overall memory consumption,
         // and to be able to estimate the offsets of every entry in the file.
         for (std::size_t i = 0; i != header.size; ++i) {
-            node_t node = storage_.node_at_(i);
+            node_t node = storage_.node_at(i);
             level_t level = node.level();
             if (!output(&level, sizeof(level)))
                 return result.failed("Failed to serialize into stream");
@@ -2779,7 +2783,7 @@ class index_gt {
 
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = storage_.node_at_(i).node_bytes(pre_);
+            span_bytes_t node_bytes = storage_.node_at(i).node_bytes(pre_);
             if (!output(node_bytes.data(), node_bytes.size()))
                 return result.failed("Failed to serialize into stream");
             if (!progress(++processed, total))
@@ -2835,7 +2839,7 @@ class index_gt {
 
         // Load the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = storage_.node_malloc_(levels[i]);
+            span_bytes_t node_bytes = storage_.node_malloc(levels[i]);
             if (!input(node_bytes.data(), node_bytes.size())) {
                 reset();
                 return result.failed("Failed to pull nodes from the stream");
@@ -3091,14 +3095,14 @@ class index_gt {
             // Erase all the incoming links
             std::size_t nodes_count = size();
             executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
-                node_t node = node_at_(node_idx);
+                node_t node = node_at(node_idx);
                 for (level_t level = 0; level <= node.level(); ++level) {
                     neighbors_ref_t neighbors = neighbors_(node, level);
                     std::size_t old_size = neighbors.size();
                     neighbors.clear();
                     for (std::size_t i = 0; i != old_size; ++i) {
                         compressed_slot_t neighbor_slot = neighbors[i];
-                        node_t neighbor = node_at_(neighbor_slot);
+                        node_t neighbor = node_at(neighbor_slot);
                         if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
                             neighbors.push_back(neighbor_slot);
                     }
@@ -3124,7 +3128,6 @@ class index_gt {
         return pre;
     }
 
-    inline node_t node_at_11_(std::size_t idx) const noexcept { return storage_.node_at_(idx); /* nodes_[idx]; */ }
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
     inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept {
@@ -3135,19 +3138,6 @@ class index_gt {
         return level ? neighbors_non_base_(node, level) : neighbors_base_(node);
     }
 
-    // struct node_lock_t {
-    //     nodes_mutexes_t& mutexes;
-    //     std::size_t slot;
-    //     inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
-    // };
-
-    // inline node_lock_t node_lock_(std::size_t slot) const noexcept {
-    //     while (nodes_mutexes_.atomic_set(slot))
-    //         ;
-    //     return {nodes_mutexes_, slot};
-    // }
-    // ^^^ move these to storage
-
     template <typename value_at, typename metric_at, typename prefetch_at>
     void connect_node_across_levels_(                                                           //
         value_at&& value, metric_at&& metric, prefetch_at&& prefetch,                           //
@@ -3173,7 +3163,7 @@ class index_gt {
         metric_at&& metric,        //
         std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.node_at_(new_slot);
+        node_t new_node = storage_.node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -3184,7 +3174,7 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= storage_.node_at_(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage_.node_at(top_view[idx].slot).level(), "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -3197,7 +3187,7 @@ class index_gt {
         metric_at&& metric,         //
         std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.node_at_(new_slot);
+        node_t new_node = storage_.node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -3210,8 +3200,8 @@ class index_gt {
             // I chose auto here to allow storage define its own lock smart pointer, without making assumptions
             // about it here. BUt are there cases where, e.g. auto will pick up the lock in the wrong way and instantly
             // drop it for example?
-            auto close_lock = storage_.node_lock_(close_slot);
-            node_t close_node = storage_.node_at_(close_slot);
+            auto close_lock = storage_.node_lock(close_slot);
+            node_t close_node = storage_.node_at(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3293,7 +3283,7 @@ class index_gt {
         bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; }
         bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; }
 
-        // vector_key_t key() const noexcept { return index_->node_at_(slot()).key(); }
+        // vector_key_t key() const noexcept { return index_->node_at(slot()).key(); }
         compressed_slot_t slot() const noexcept { return neighbors_[current_]; }
         friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); }
         friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); }
@@ -3328,8 +3318,8 @@ class index_gt {
             bool changed;
             do {
                 changed = false;
-                auto closest_lock = storage_.node_lock_(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.node_at_(closest_slot), level);
+                auto closest_lock = storage_.node_lock(closest_slot);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.node_at(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
@@ -3397,8 +3387,8 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = storage_.node_at_(candidate_slot);
-            auto candidate_lock = storage_.node_lock_(candidate_slot);
+            node_t candidate_ref = storage_.node_at(candidate_slot);
+            auto candidate_lock = storage_.node_lock(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
             // Optional prefetching
@@ -3468,7 +3458,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.node_at_(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.node_at(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3489,7 +3479,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{storage_.node_at_(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage_.node_at(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 0af5d7599..f6f564234 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -298,9 +298,9 @@ class dummy_storage_single_threaded {
   public:
     dummy_storage_single_threaded(index_config_t config) : pre_(node_t::precompute_(config)) {}
 
-    inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline node_t node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
 
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at_(idx).node_size_bytes(pre_); }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
 
     bool reserve(std::size_t count) {
         if (count < nodes_.size())
@@ -320,17 +320,17 @@ class dummy_storage_single_threaded {
 
     using span_bytes_t = span_gt<byte_t>;
 
-    span_bytes_t node_malloc_(level_t level) noexcept {
+    span_bytes_t node_malloc(level_t level) noexcept {
         std::size_t node_size = node_t::node_size_bytes(pre_, level);
         byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
         return data ? span_bytes_t{data, node_size} : span_bytes_t{};
     }
-    void node_free_(size_t slot, node_t node) {
+    void node_free(size_t slot, node_t node) {
         tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
         nodes_[slot] = node_t{};
     }
-    node_t node_make_(key_at key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc_(level);
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
         if (!node_bytes)
             return {};
 
@@ -354,7 +354,8 @@ class dummy_storage_single_threaded {
         nodes_[slot] = node;
     }
     inline size_t size() { return nodes_.size(); }
-    inline int node_lock_(std::size_t) const noexcept { return 0; }
+    // dummy lock just to satisfy the interface
+    constexpr inline int node_lock(std::size_t) noexcept { return 0; }
 };
 
 template <typename key_at, typename compressed_slot_at> class storage_v1 {

From a130ae8e8b7f89679cd65a3f3daffabd0dee8e7f Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 09:21:42 +0000
Subject: [PATCH 29/80] Get rid of tape_allocator from index and improve dummy
 storage

---
 include/usearch/index.hpp       |  38 ++-----
 include/usearch/index_dense.hpp | 186 +++++++++-----------------------
 2 files changed, 64 insertions(+), 160 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 197c5e262..633ad55c6 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1723,10 +1723,6 @@ static_assert(std::is_trivially_destructible<node_at<default_key_t, default_slot
  *      priority queues, needed during construction and traversals of graphs.
  *      The allocated buffers may be uninitialized.
  *
- *  @tparam tape_allocator_at
- *      Potentially different memory allocator for primary allocations of nodes and vectors.
- *      It would never `deallocate` separate entries, and would only free all the space at once.
- *      The allocated buffers may be uninitialized.
  *
  *  @section Features
  *
@@ -1781,17 +1777,16 @@ template <typename storage_at,                                    //
           typename distance_at = default_distance_t,              //
           typename key_at = default_key_t,                        //
           typename compressed_slot_at = default_slot_t,           //
-          typename dynamic_allocator_at = std::allocator<byte_t>, //
-          typename tape_allocator_at = dynamic_allocator_at>      //
+          typename dynamic_allocator_at = std::allocator<byte_t>> //
 class index_gt {
   public:
     using storage_t = storage_at;
+    using node_lock_t = typename storage_t::lock_type;
     using distance_t = distance_at;
     using vector_key_t = key_at;
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using dynamic_allocator_t = dynamic_allocator_at;
-    using tape_allocator_t = tape_allocator_at;
     using span_bytes_t = span_gt<byte_t>;
     static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense.");
 
@@ -1863,11 +1858,6 @@ class index_gt {
         sizeof(byte_t) == 1, //
         "Primary allocator must allocate separate addressable bytes");
 
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_t>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
-
   private:
     /**
      *  @brief  Integer for the number of node neighbors at a specific level of the
@@ -1969,7 +1959,6 @@ class index_gt {
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
-    tape_allocator_t tape_allocator_{};
 
     precomputed_constants_t pre_{};
     memory_mapped_file_t viewed_file_{};
@@ -2015,16 +2004,14 @@ class index_gt {
      */
     explicit index_gt(       //
         storage_at& storage, //
-        index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {},
-        tape_allocator_t tape_allocator = {}) noexcept
+        index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
-          tape_allocator_(std::move(tape_allocator)), pre_(precompute_(config)), nodes_count_(0u), max_level_(-1),
-          entry_slot_(0u), contexts_() {}
+          pre_(precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
 
     /**
      *  @brief  Clones the structure with the same hyper-parameters, but without contents.
      */
-    index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_, tape_allocator_}; }
+    index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_}; }
 
     ~index_gt() noexcept { reset(); }
 
@@ -2049,7 +2036,7 @@ class index_gt {
     copy_result_t copy(index_copy_config_t config = {}) const noexcept {
         copy_result_t result;
         index_gt& other = result.index;
-        other = index_gt(config_, dynamic_allocator_, tape_allocator_);
+        other = index_gt(config_, dynamic_allocator_);
         if (!other.reserve(limits_))
             return result.failed("Failed to reserve the contexts");
 
@@ -2082,7 +2069,6 @@ class index_gt {
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
     dynamic_allocator_t const& dynamic_allocator() const noexcept { return dynamic_allocator_; }
-    tape_allocator_t const& tape_allocator() const noexcept { return tape_allocator_; }
 
 #pragma region Adjusting Configuration
 
@@ -2126,7 +2112,6 @@ class index_gt {
         limits_ = index_limits_t{0, 0};
         nodes_capacity_ = 0;
         viewed_file_ = memory_mapped_file_t{};
-        tape_allocator_ = {};
     }
 
     /**
@@ -2136,7 +2121,6 @@ class index_gt {
         std::swap(config_, other.config_);
         std::swap(limits_, other.limits_);
         std::swap(dynamic_allocator_, other.dynamic_allocator_);
-        std::swap(tape_allocator_, other.tape_allocator_);
         std::swap(pre_, other.pre_);
         std::swap(viewed_file_, other.viewed_file_);
         std::swap(max_level_, other.max_level_);
@@ -2405,7 +2389,7 @@ class index_gt {
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
-        auto new_lock = storage_.node_lock(new_slot);
+        node_lock_t new_lock = storage_.node_lock(new_slot);
 
         // Do nothing for the first element
         if (!new_slot) {
@@ -2489,7 +2473,7 @@ class index_gt {
         if (!next.reserve(config.expansion))
             return result.failed("Out of memory!");
 
-        auto new_lock = storage_.node_lock(old_slot);
+        node_lock_t new_lock = storage_.node_lock(old_slot);
         node_t node = storage_.node_at(old_slot);
 
         level_t node_level = node.level();
@@ -3200,7 +3184,7 @@ class index_gt {
             // I chose auto here to allow storage define its own lock smart pointer, without making assumptions
             // about it here. BUt are there cases where, e.g. auto will pick up the lock in the wrong way and instantly
             // drop it for example?
-            auto close_lock = storage_.node_lock(close_slot);
+            node_lock_t close_lock = storage_.node_lock(close_slot);
             node_t close_node = storage_.node_at(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
@@ -3318,7 +3302,7 @@ class index_gt {
             bool changed;
             do {
                 changed = false;
-                auto closest_lock = storage_.node_lock(closest_slot);
+                node_lock_t closest_lock = storage_.node_lock(closest_slot);
                 neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.node_at(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
@@ -3388,7 +3372,7 @@ class index_gt {
             if (new_slot == candidate_slot)
                 continue;
             node_t candidate_ref = storage_.node_at(candidate_slot);
-            auto candidate_lock = storage_.node_lock(candidate_slot);
+            node_lock_t candidate_lock = storage_.node_lock(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
             // Optional prefetching
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index f6f564234..a8cea6572 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -281,6 +281,26 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
+/**
+ * @brief   Storage abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam tape_allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      It would never `deallocate` separate entries, and would only free all the space at once.
+ *      The allocated buffers may be uninitialized.
+ *
+ **/
 template <typename key_at, typename compressed_slot_at,
           typename tape_allocator_at = std::allocator<byte_t>> //
 class dummy_storage_single_threaded {
@@ -289,19 +309,33 @@ class dummy_storage_single_threaded {
 
     nodes_t nodes_{};
     precomputed_constants_t pre_{};
-    tape_allocator_at tape_allocator_;
+    tape_allocator_at tape_allocator_{};
     using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
     static_assert(                                                 //
         sizeof(typename tape_allocator_traits_t::value_type) == 1, //
         "Tape allocator must allocate separate addressable bytes");
 
   public:
-    dummy_storage_single_threaded(index_config_t config) : pre_(node_t::precompute_(config)) {}
+    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
     inline node_t node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
 
+    // exported for client-side lock-declaration
+    // alternatively, could just use auto in client side
+    // ideally, there would be a way to make this "void", but I could not make it work
+    // as client side ends up declaring a void variable
+    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
+    // for the dummy lock guard variable.
+    struct dummy_lock {
+        // destructor necessary to avoid "unused variable warning"
+        // will this get properly optimized away?
+        ~dummy_lock() {}
+    };
+    using lock_type = dummy_lock;
+
     bool reserve(std::size_t count) {
         if (count < nodes_.size())
             return true;
@@ -326,7 +360,11 @@ class dummy_storage_single_threaded {
         return data ? span_bytes_t{data, node_size} : span_bytes_t{};
     }
     void node_free(size_t slot, node_t node) {
-        tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        if (!has_reset<tape_allocator_at>()) {
+            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        } else {
+            tape_allocator_.deallocate(nullptr, 0);
+        }
         nodes_[slot] = node_t{};
     }
     node_t node_make(key_at key, level_t level) noexcept {
@@ -354,8 +392,9 @@ class dummy_storage_single_threaded {
         nodes_[slot] = node;
     }
     inline size_t size() { return nodes_.size(); }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
     // dummy lock just to satisfy the interface
-    constexpr inline int node_lock(std::size_t) noexcept { return 0; }
+    constexpr inline lock_type node_lock(std::size_t) noexcept { return dummy_lock{}; }
 };
 
 template <typename key_at, typename compressed_slot_at> class storage_v1 {
@@ -381,119 +420,6 @@ template <typename key_at, typename compressed_slot_at> class storage_v2 {
     using nodes_t = std::vector<node_t>;
 };
 
-template <typename key_at, typename compressed_slot_at> class storage_proxy_t {
-    using vector_key_t = key_at;
-    using node_t = node_at<vector_key_t, compressed_slot_at>;
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-    using nodes_mutexes_t = bitset_gt<>;
-    using nodes_t = std::vector<node_t>;
-
-    nodes_t* nodes_{};
-    index_config_t config_{};
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t* nodes_mutexes_{};
-    struct node_lock_t {
-        nodes_mutexes_t& mutexes;
-        std::size_t slot;
-        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
-    };
-
-    precomputed_constants_t pre_{};
-
-  public:
-    storage_proxy_t(nodes_t* nodes, nodes_mutexes_t* nodes_mutexes, index_config_t config) noexcept {
-        nodes_ = nodes;
-        nodes_mutexes_ = nodes_mutexes;
-        pre_ = node_t::precompute_(config);
-        config_ = config;
-    }
-
-    // warning: key_t is used in sys/types.h
-    inline node_t operator()(std::size_t slot) const noexcept { /*return index_->nodes_[];*/
-        nodes_t v = *nodes_;
-        usearch_assert_m(slot < v.size(), "Storage node index out of bounds");
-        return v[slot];
-    }
-
-    inline node_t node_at_(std::size_t idx) const noexcept { return (*this)(idx); }
-
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at_(idx).node_size_bytes(pre_); }
-    // todo:: reserve is not thread safe if another thread is running search or insert
-    bool reserve(std::size_t count) {
-        assert(nodes_mutexes_->size() == nodes_->size());
-        if (count < nodes_mutexes_->size())
-            return true;
-        nodes_mutexes_t new_mutexes(count);
-        *nodes_mutexes_ = std::move(new_mutexes);
-        nodes_->resize(count);
-        return true;
-    }
-
-    void clear() {
-        nodes_mutexes_->clear();
-        if (nodes_->data())
-            std::memset(nodes_->data(), 0, nodes_->size());
-    }
-    void reset() {
-        *nodes_mutexes_ = {};
-        nodes_->clear();
-        nodes_->shrink_to_fit();
-    }
-
-    using span_bytes_t = span_gt<byte_t>;
-
-    // todo:: make these private
-    span_bytes_t node_malloc_(level_t level) noexcept {
-        std::size_t node_bytes = node_t::node_size_bytes(pre_, level);
-        byte_t* data = (byte_t*)malloc(node_bytes);
-        assert(data);
-
-        std::memset(data, 0, node_bytes);
-        return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
-    }
-    void node_free_(size_t slot, node_t node) {
-        free(node.tape());
-        (*nodes_)[slot] = node_t{};
-        //  assert(false);
-        //    tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
-        //    node = node_t{};
-    }
-
-    node_t node_make_(vector_key_t key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc_(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-    //     if (!data)
-    //         return {};
-    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
-    //     return node_t{data};
-    // }
-
-    void node_store(size_t slot, node_t node) noexcept {
-        auto count = nodes_->size();
-        node_t* slot_ref = &(*nodes_)[slot];
-        *slot_ref = node;
-    }
-
-    /// -------- node locking logic
-    inline node_lock_t node_lock_(std::size_t slot) const noexcept {
-        while (nodes_mutexes_->atomic_set(slot))
-            ;
-        return {*nodes_mutexes_, slot};
-    }
-    inline size_t size() { return nodes_->size(); }
-};
 // template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
 // nodes_proxy_t<vector_key_t> make_storage(index_dense_gt<key_at, compressed_slot_at>index) { return
 // nodes_proxy_t<vector_key_t>(index); }
@@ -521,10 +447,6 @@ class index_dense_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
-    // todo:: relationship betwen storage_t and node_t is strange
-    //  have to define the type twice.. storage_proxy_ assumes storage is in node_ts
-    // using storage_t = storage_proxy_t<vector_key_t, compressed_slot_at>;
-    using storage2_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_at>;
     using node_t = node_at<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
@@ -539,15 +461,16 @@ class index_dense_gt {
 
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     using tape_allocator_t = memory_mapping_allocator_gt<64>;
+    using storage_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_t, tape_allocator_t>;
 
   private:
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
     using index_t = index_gt<                        //
-        storage2_t,                                  //
+        storage_t,                                   //
         distance_t, vector_key_t, compressed_slot_t, //
-        dynamic_allocator_t, tape_allocator_t>;
+        dynamic_allocator_t>;
     using index_allocator_t = aligned_allocator_gt<index_t, 64>;
 
     using member_iterator_t = typename index_t::member_iterator_t;
@@ -608,7 +531,7 @@ class index_dense_gt {
     std::mutex vector_mutex_;
     bitset_t nodes_mutexes_;
     // storage_t storage_{&nodes_, &nodes_mutexes_, config_};
-    storage2_t storage_{config_};
+    storage_t storage_{config_};
 
     /// @brief Originally forms and array of integers [0, threads], marking all
     mutable std::vector<std::size_t> available_threads_;
@@ -749,9 +672,7 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        // result.storage_ =
-        //     storage_proxy_t<vector_key_t, compressed_slot_t>{&result.nodes_, &result.nodes_mutexes_, config};
-        result.storage_ = dummy_storage_single_threaded<vector_key_t, compressed_slot_t>(config);
+        result.storage_ = storage_t(config);
         new (raw) index_t(result.storage_, config);
         result.typed_ = raw;
         return result;
@@ -819,10 +740,10 @@ class index_dense_gt {
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
     std::size_t memory_usage() const {
-        return                                          //
-            typed_->memory_usage(0) +                   //
-            typed_->tape_allocator().total_wasted() +   //
-            typed_->tape_allocator().total_reserved() + //
+        return                                           //
+            typed_->memory_usage(0) +                    //
+            storage_.node_allocator().total_wasted() +   //
+            storage_.node_allocator().total_reserved() + //
             vectors_tape_allocator_.total_allocated();
     }
 
@@ -1997,7 +1918,7 @@ class index_dense_gt {
         update_config.expansion = config_.expansion_add;
 
         metric_proxy_t metric{*this};
-        return reuse_node
+        return reuse_node //
                    ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, update_config, on_success)
                    : typed_->add(key, vector_data, metric, update_config, on_success);
     }
@@ -2023,7 +1944,6 @@ class index_dense_gt {
         search_config.exact = exact;
 
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
-
         return typed_->search(vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
     }
 

From 204535c2b546df73c1085acd627647cebf7651f9 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 2 Jan 2024 09:42:00 +0000
Subject: [PATCH 30/80] Add wip storage_v2

---
 cpp/test.cpp                    |   3 +-
 include/usearch/index_dense.hpp | 111 ++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index b08cc3618..55df8a24a 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -162,7 +162,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using slot_t = slot_at;
 
     // using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
-    using index_storage_t = dummy_storage_single_threaded<vector_key_t, slot_t>;
+    // using index_storage_t = dummy_storage_single_threaded<vector_key_t, slot_t>;
+    using index_storage_t = storage_v2<vector_key_t, slot_t>;
     using index_typed_t = index_gt<index_storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index a8cea6572..5285f85e8 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -411,18 +411,106 @@ template <typename key_at, typename compressed_slot_at> class storage_v1 {
     mutable nodes_mutexes_t nodes_mutexes_{};
 };
 
-template <typename key_at, typename compressed_slot_at> class storage_v2 {
-    using vector_key_t = key_at;
-    using node_t = node_at<vector_key_t, compressed_slot_at>;
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-    using nodes_mutexes_t = bitset_gt<>;
+template <typename key_at, typename compressed_slot_at,
+          typename tape_allocator_at = std::allocator<byte_t>> //
+class storage_v2 {
+    using node_t = node_at<key_at, compressed_slot_at>;
     using nodes_t = std::vector<node_t>;
-};
+    using nodes_mutexes_t = bitset_gt<>;
+
+    nodes_t nodes_{};
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_{};
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+    struct node_lock_t {
+        nodes_mutexes_t& mutexes;
+        std::size_t slot;
+        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
+    };
+
+  public:
+    storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+
+    inline node_t node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
+
+    using lock_type = node_lock_t;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
+        nodes_mutexes_ = std::move(new_mutexes);
+        nodes_.resize(count);
+        return true;
+    }
+
+    void clear() {
+        if (nodes_.data())
+            std::memset(nodes_.data(), 0, nodes_.size());
+    }
+    void reset() {
+        nodes_.clear();
+        nodes_mutexes_ = {};
+        nodes_.shrink_to_fit();
+    }
+
+    using span_bytes_t = span_gt<byte_t>;
 
-// template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
-// nodes_proxy_t<vector_key_t> make_storage(index_dense_gt<key_at, compressed_slot_at>index) { return
-// nodes_proxy_t<vector_key_t>(index); }
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        if (!has_reset<tape_allocator_at>()) {
+            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        } else {
+            tape_allocator_.deallocate(nullptr, 0);
+        }
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept {
+        auto count = nodes_.size();
+        nodes_[slot] = node;
+    }
+    inline size_t size() { return nodes_.size(); }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    // dummy lock just to satisfy the interface
+    constexpr inline lock_type node_lock(std::size_t slot) const noexcept {
+        while (nodes_mutexes_.atomic_set(slot))
+            ;
+        return {nodes_mutexes_, slot};
+    }
+};
 
 /**
  *  @brief  Oversimplified type-punned index for equidimensional vectors
@@ -461,7 +549,8 @@ class index_dense_gt {
 
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     using tape_allocator_t = memory_mapping_allocator_gt<64>;
-    using storage_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_t, tape_allocator_t>;
+    // using storage_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_t, tape_allocator_t>;
+    using storage_t = storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t>;
 
   private:
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.

From 5fdee31275f4deb9364b7108501a580d7d4c6fe1 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 01:50:24 +0000
Subject: [PATCH 31/80] Move vector storage to storage_ class

---
 include/usearch/index.hpp       |  6 +++---
 include/usearch/index_dense.hpp | 23 ++++++++++++++++-------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 633ad55c6..bd2252a2f 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1954,7 +1954,7 @@ class index_gt {
         }
     };
 
-    const storage_t& storage_;
+    storage_t& storage_;
     index_config_t config_{};
     index_limits_t limits_{};
 
@@ -2002,8 +2002,8 @@ class index_gt {
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
-    explicit index_gt(       //
-        storage_at& storage, //
+    explicit index_gt(      //
+        storage_t& storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
           pre_(precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 5285f85e8..fc4bf36b5 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -332,7 +332,7 @@ class dummy_storage_single_threaded {
     struct dummy_lock {
         // destructor necessary to avoid "unused variable warning"
         // will this get properly optimized away?
-        ~dummy_lock() {}
+        ~dummy_lock() = default;
     };
     using lock_type = dummy_lock;
 
@@ -394,7 +394,7 @@ class dummy_storage_single_threaded {
     inline size_t size() { return nodes_.size(); }
     tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
     // dummy lock just to satisfy the interface
-    constexpr inline lock_type node_lock(std::size_t) noexcept { return dummy_lock{}; }
+    constexpr inline lock_type node_lock(std::size_t) const noexcept { return dummy_lock{}; }
 };
 
 template <typename key_at, typename compressed_slot_at> class storage_v1 {
@@ -411,11 +411,13 @@ template <typename key_at, typename compressed_slot_at> class storage_v1 {
     mutable nodes_mutexes_t nodes_mutexes_{};
 };
 
-template <typename key_at, typename compressed_slot_at,
-          typename tape_allocator_at = std::allocator<byte_t>> //
+template <typename key_at, typename compressed_slot_at,        //
+          typename tape_allocator_at = std::allocator<byte_t>, //
+          typename vectors_allocator_at = tape_allocator_at>   //
 class storage_v2 {
     using node_t = node_at<key_at, compressed_slot_at>;
     using nodes_t = std::vector<node_t>;
+    using vectors_t = std::vector<const byte_t*>;
     using nodes_mutexes_t = bitset_gt<>;
 
     nodes_t nodes_{};
@@ -423,6 +425,7 @@ class storage_v2 {
     mutable nodes_mutexes_t nodes_mutexes_{};
     precomputed_constants_t pre_{};
     tape_allocator_at tape_allocator_{};
+    vectors_allocator_at vectors_allocator_{};
     using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
     static_assert(                                                 //
         sizeof(typename tape_allocator_traits_t::value_type) == 1, //
@@ -443,6 +446,7 @@ class storage_v2 {
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
 
     using lock_type = node_lock_t;
+    vectors_t vectors_lookup_{};
 
     bool reserve(std::size_t count) {
         if (count < nodes_.size())
@@ -582,8 +586,10 @@ class index_dense_gt {
 
         inline distance_t operator()(byte_t const* a, byte_t const* b) const noexcept { return f(a, b); }
 
-        inline byte_t const* v(member_cref_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; }
-        inline byte_t const* v(member_citerator_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; }
+        inline byte_t const* v(member_cref_t m) const noexcept { return index_->storage_.vectors_lookup_[get_slot(m)]; }
+        inline byte_t const* v(member_citerator_t m) const noexcept {
+            return index_->storage_.vectors_lookup_[get_slot(m)];
+        }
         inline distance_t f(byte_t const* a, byte_t const* b) const noexcept { return index_->metric_(a, b); }
     };
 
@@ -613,7 +619,7 @@ class index_dense_gt {
     vectors_tape_allocator_t vectors_tape_allocator_;
 
     /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
-    mutable std::vector<byte_t*> vectors_lookup_;
+    // ask-Ashot: why is this mutable?
 
     /// @brief  C-style array of `node_t` smart-pointers.
     std::vector<node_t> nodes_;
@@ -979,6 +985,9 @@ class index_dense_gt {
      *  @return `true` if the memory reservation was successful, `false` otherwise.
      */
     bool reserve(index_limits_t limits) {
+        // this seems to allow search() and add() on the dense index, concurrent to this reserve
+        // But that is not safe on typed_ as typed_->reserve() reallocates the lock buffer, discarding the old one
+        // without checking if anything is locked
         {
             unique_lock_t lock(slot_lookup_mutex_);
             slot_lookup_.reserve(limits.members);

From 6dab7ab544e262787162776b58dd1eff678a9ddc Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 03:59:50 +0000
Subject: [PATCH 32/80] Move vector storage to storage_v2 and fix compile
 warnings

- rename node_at -> get_node_at since node_at is now an abstract type
- make sure I use byte_t after defining it in index_gt and not relying
on previous definition earlier in the file
- make vector storage non const byte_t* because when updates are
allowed, usearch actually reuses the allocated space so it cannot
be const
- change memset -> std::fill for zeroing out vectors
---
 .clang-tidy                     |  28 ++++++--
 CMakeLists.txt                  |   1 +
 include/usearch/index.hpp       |  62 +++++++++--------
 include/usearch/index_dense.hpp | 116 ++++++++++++++++++--------------
 4 files changed, 121 insertions(+), 86 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 13db0f0c1..990b9503d 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,4 +1,9 @@
-Checks: '*,
+Checks: '-*,
+         clang-diagnostic-*,
+         clang-analyzer-*,
+	 cppcoreguidelines-*,
+	performance-move-constructor-init,
+	pugprone-*,
          -altera-id-dependent-backward-branch,
          -altera-struct-pack-align,
          -altera-unroll-loops,
@@ -7,6 +12,7 @@ Checks: '*,
          -cert-err58-cpp,
          -concurrency-mt-unsafe,
          -cppcoreguidelines-avoid-const-or-ref-data-members,
+	 cppcoreguidelines-pro-type-member-init,
          -cppcoreguidelines-avoid-do-while,
          -cppcoreguidelines-avoid-goto,
          -cppcoreguidelines-avoid-magic-numbers,
@@ -23,10 +29,13 @@ Checks: '*,
          -fuchsia-default-arguments-declarations,
          -fuchsia-overloaded-operator,
          -google-explicit-constructor,
-         -google-readability-function-size,
+         -google-readability-braces-around-statements,
+         -google-readability-casting,
+         # -google-readability-function-size,
          -google-runtime-int,
          -google-runtime-references,
          -hicpp-avoid-goto,
+	-hicpp-braces-around-statements,
          -hicpp-explicit-conversions,
          -hicpp-function-size,
          -hicpp-no-array-decay,
@@ -35,7 +44,6 @@ Checks: '*,
          -hicpp-uppercase-literal-suffix,
          -llvm-header-guard,
          -llvm-include-order,
-         -llvmlibc-*,
          -misc-use-anonymous-namespace,
          -misc-confusable-identifiers,
          -misc-no-recursion,
@@ -44,19 +52,25 @@ Checks: '*,
          -modernize-type-traits,
          -modernize-use-nodiscard,
          -modernize-use-trailing-return-type,
+         -readability-isolate-declaration,
+	-readability-braces-around-statements,
          -readability-function-cognitive-complexity,
          -readability-function-size,
          -readability-identifier-length,
          -readability-magic-numbers,
          -readability-redundant-access-specifiers,
          -readability-simplify-boolean-expr,
-         -readability-uppercase-literal-suffix'
-
+	-readability-braces-around-statements,
+         -readability-uppercase-literal-suffix,
+         -llvmlibc-*'
 CheckOptions:
   - key: hicpp-special-member-functions.AllowSoleDefaultDtor
     value: 1
+  # -google-readability-braces-around-statements does not require this to suppress, for some reason the others do
+  - key: hicpp-braces-around-statements.ShortStatementLines
+    value: 20
   - key: readability-braces-around-statements.ShortStatementLines
-    value: 10
+    value: 20
+
 
-WarningsAsErrors: '*'
 HeaderFilterRegex: '.*hpp$'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a3deb6da..a5e664757 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -153,6 +153,7 @@ function (setup_target TARGET_NAME)
                     >
                     -ffast-math
                     -fPIC
+		    -fpermissive
                     -Wall
                     -Wextra
                     -Wno-conversion
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index bd2252a2f..4a11fb481 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -77,9 +77,10 @@
 #include <atomic>    // `std::atomic`
 #include <bitset>    // `std::bitset`
 #include <cassert>
-#include <climits>   // `CHAR_BIT`
-#include <cmath>     // `std::sqrt`
-#include <cstring>   // `std::memset`
+#include <climits> // `CHAR_BIT`
+#include <cmath>   // `std::sqrt`
+#include <cstring> // `std::memset`
+#include <iostream>
 #include <iterator>  // `std::reverse_iterator`
 #include <mutex>     // `std::unique_lock` - replacement candidate
 #include <random>    // `std::default_random_engine` - replacement candidate
@@ -1787,7 +1788,6 @@ class index_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using dynamic_allocator_t = dynamic_allocator_at;
-    using span_bytes_t = span_gt<byte_t>;
     static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense.");
 
     using member_cref_t = member_cref_gt<vector_key_t>;
@@ -1814,8 +1814,8 @@ class index_gt {
         using reference = ref_t;
 
         // todo:: take care of these to use external storage
-        reference operator*() const noexcept { return {index_->storage_.node_at(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->storage_.node_at(slot_).key(); }
+        reference operator*() const noexcept { return {index_->storage_.get_node_at(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->storage_.get_node_at(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -1858,6 +1858,8 @@ class index_gt {
         sizeof(byte_t) == 1, //
         "Primary allocator must allocate separate addressable bytes");
 
+    using span_bytes_t = span_gt<byte_t>;
+
   private:
     /**
      *  @brief  Integer for the number of node neighbors at a specific level of the
@@ -1866,6 +1868,7 @@ class index_gt {
      */
     using neighbors_count_t = std::uint32_t;
 
+    // todo:: move near the rest of these functions
     /**
      *  @brief  How many bytes of memory are needed to form the "head" of the node.
      */
@@ -1954,6 +1957,7 @@ class index_gt {
         }
     };
 
+    // todo:: do I have to init this?
     storage_t& storage_;
     index_config_t config_{};
     index_limits_t limits_{};
@@ -2063,8 +2067,8 @@ class index_gt {
     member_iterator_t begin() noexcept { return {this, 0}; }
     member_iterator_t end() noexcept { return {this, size()}; }
 
-    member_ref_t at(std::size_t slot) noexcept { return {storage_.node_at(slot).key(), slot}; }
-    member_cref_t at(std::size_t slot) const noexcept { return {storage_.node_at(slot).ckey(), slot}; }
+    member_ref_t at(std::size_t slot) noexcept { return {storage_.get_node_at(slot).key(), slot}; }
+    member_cref_t at(std::size_t slot) const noexcept { return {storage_.get_node_at(slot).ckey(), slot}; }
     member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; }
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
@@ -2082,7 +2086,7 @@ class index_gt {
         if (!viewed_file_) {
             std::size_t n = nodes_count_;
             for (std::size_t i = 0; i != n; ++i) {
-                node_t node = storage_.node_at(i);
+                node_t node = storage_.get_node_at(i);
                 // if (!has_reset<tape_allocator_t>()) {
                 storage_.node_free(i, node);
                 // } else
@@ -2125,6 +2129,7 @@ class index_gt {
         std::swap(viewed_file_, other.viewed_file_);
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
+        assert(false);
         // std::swap(nodes_, other.nodes_);
         // std::swap(nodes_mutexes_, other.nodes_mutexes_);
         std::swap(contexts_, other.contexts_);
@@ -2258,7 +2263,7 @@ class index_gt {
             candidate_t const* top_ordered = top_->data();
             candidate_t candidate = top_ordered[i];
             // node_t node = nodes_[candidate.slot];
-            node_t node = storage_->node_at(candidate.slot);
+            node_t node = storage_->get_node_at(candidate.slot);
             return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance};
         }
         inline std::size_t merge_into(                 //
@@ -2474,7 +2479,7 @@ class index_gt {
             return result.failed("Out of memory!");
 
         node_lock_t new_lock = storage_.node_lock(old_slot);
-        node_t node = storage_.node_at(old_slot);
+        node_t node = storage_.get_node_at(old_slot);
 
         level_t node_level = node.level();
         span_bytes_t node_bytes = node.node_bytes(pre_);
@@ -2626,7 +2631,7 @@ class index_gt {
         stats_t result{};
 
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at(i);
+            node_t node = storage_.get_node_at(i);
             std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
             std::size_t edges = 0;
             for (level_t level = 0; level <= node.level(); ++level)
@@ -2645,7 +2650,7 @@ class index_gt {
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at(i);
+            node_t node = storage_.get_node_at(i);
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
 
@@ -2663,7 +2668,7 @@ class index_gt {
 
         std::size_t head_bytes = node_head_bytes_();
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.node_at(i);
+            node_t node = storage_.get_node_at(i);
 
             stats_per_level[0].nodes++;
             stats_per_level[0].edges += neighbors_(node, 0).size();
@@ -2727,7 +2732,7 @@ class index_gt {
         std::size_t neighbors_length = 0;
 
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_t::node_size_bytes(pre_, storage_.node_at(i).level()) + sizeof(level_t);
+            neighbors_length += node_t::node_size_bytes(pre_, storage_.get_node_at(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2757,7 +2762,7 @@ class index_gt {
         // That is both enough to estimate the overall memory consumption,
         // and to be able to estimate the offsets of every entry in the file.
         for (std::size_t i = 0; i != header.size; ++i) {
-            node_t node = storage_.node_at(i);
+            node_t node = storage_.get_node_at(i);
             level_t level = node.level();
             if (!output(&level, sizeof(level)))
                 return result.failed("Failed to serialize into stream");
@@ -2767,7 +2772,7 @@ class index_gt {
 
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = storage_.node_at(i).node_bytes(pre_);
+            span_bytes_t node_bytes = storage_.get_node_at(i).node_bytes(pre_);
             if (!output(node_bytes.data(), node_bytes.size()))
                 return result.failed("Failed to serialize into stream");
             if (!progress(++processed, total))
@@ -3079,14 +3084,14 @@ class index_gt {
             // Erase all the incoming links
             std::size_t nodes_count = size();
             executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
-                node_t node = node_at(node_idx);
+                node_t node = get_node_at(node_idx);
                 for (level_t level = 0; level <= node.level(); ++level) {
                     neighbors_ref_t neighbors = neighbors_(node, level);
                     std::size_t old_size = neighbors.size();
                     neighbors.clear();
                     for (std::size_t i = 0; i != old_size; ++i) {
                         compressed_slot_t neighbor_slot = neighbors[i];
-                        node_t neighbor = node_at(neighbor_slot);
+                        node_t neighbor = get_node_at(neighbor_slot);
                         if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
                             neighbors.push_back(neighbor_slot);
                     }
@@ -3112,6 +3117,7 @@ class index_gt {
         return pre;
     }
 
+    // todo:: these can also be moved to node_at, along with class neighbors_ref_t definition
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
     inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept {
@@ -3147,7 +3153,7 @@ class index_gt {
         metric_at&& metric,        //
         std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.node_at(new_slot);
+        node_t new_node = storage_.get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -3158,7 +3164,7 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= storage_.node_at(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage_.get_node_at(top_view[idx].slot).level(), "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -3171,7 +3177,7 @@ class index_gt {
         metric_at&& metric,         //
         std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.node_at(new_slot);
+        node_t new_node = storage_.get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -3185,7 +3191,7 @@ class index_gt {
             // about it here. BUt are there cases where, e.g. auto will pick up the lock in the wrong way and instantly
             // drop it for example?
             node_lock_t close_lock = storage_.node_lock(close_slot);
-            node_t close_node = storage_.node_at(close_slot);
+            node_t close_node = storage_.get_node_at(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3267,7 +3273,7 @@ class index_gt {
         bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; }
         bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; }
 
-        // vector_key_t key() const noexcept { return index_->node_at(slot()).key(); }
+        // vector_key_t key() const noexcept { return index_->get_node_at(slot()).key(); }
         compressed_slot_t slot() const noexcept { return neighbors_[current_]; }
         friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); }
         friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); }
@@ -3303,7 +3309,7 @@ class index_gt {
             do {
                 changed = false;
                 node_lock_t closest_lock = storage_.node_lock(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.node_at(closest_slot), level);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.get_node_at(closest_slot), level);
 
                 using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
                 static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
@@ -3371,7 +3377,7 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = storage_.node_at(candidate_slot);
+            node_t candidate_ref = storage_.get_node_at(candidate_slot);
             node_lock_t candidate_lock = storage_.node_lock(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
@@ -3442,7 +3448,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.node_at(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.get_node_at(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3463,7 +3469,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{storage_.node_at(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage_.get_node_at(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index fc4bf36b5..a12dae606 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -319,9 +319,9 @@ class dummy_storage_single_threaded {
     dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
-    inline node_t node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
 
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
 
     // exported for client-side lock-declaration
     // alternatively, could just use auto in client side
@@ -332,7 +332,7 @@ class dummy_storage_single_threaded {
     struct dummy_lock {
         // destructor necessary to avoid "unused variable warning"
         // will this get properly optimized away?
-        ~dummy_lock() = default;
+        ~dummy_lock() {}
     };
     using lock_type = dummy_lock;
 
@@ -345,7 +345,7 @@ class dummy_storage_single_threaded {
 
     void clear() {
         if (nodes_.data())
-            std::memset(nodes_.data(), 0, nodes_.size());
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
     }
     void reset() {
         nodes_.clear();
@@ -417,7 +417,7 @@ template <typename key_at, typename compressed_slot_at,        //
 class storage_v2 {
     using node_t = node_at<key_at, compressed_slot_at>;
     using nodes_t = std::vector<node_t>;
-    using vectors_t = std::vector<const byte_t*>;
+    using vectors_t = std::vector<byte_t*>;
     using nodes_mutexes_t = bitset_gt<>;
 
     nodes_t nodes_{};
@@ -441,9 +441,9 @@ class storage_v2 {
     storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
-    inline node_t node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
 
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return node_at(idx).node_size_bytes(pre_); }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
 
     using lock_type = node_lock_t;
     vectors_t vectors_lookup_{};
@@ -459,7 +459,7 @@ class storage_v2 {
 
     void clear() {
         if (nodes_.data())
-            std::memset(nodes_.data(), 0, nodes_.size());
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
     }
     void reset() {
         nodes_.clear();
@@ -502,10 +502,7 @@ class storage_v2 {
     //     return node_t{data};
     // }
 
-    void node_store(size_t slot, node_t node) noexcept {
-        auto count = nodes_.size();
-        nodes_[slot] = node;
-    }
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
     inline size_t size() { return nodes_.size(); }
     tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
     // dummy lock just to satisfy the interface
@@ -620,6 +617,7 @@ class index_dense_gt {
 
     /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
     // ask-Ashot: why is this mutable?
+    // mutable std::vector<byte_t*> vectors_lookup_;
 
     /// @brief  C-style array of `node_t` smart-pointers.
     std::vector<node_t> nodes_;
@@ -699,12 +697,15 @@ class index_dense_gt {
           metric_(std::move(other.metric_)),           //
 
           vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), //
-          vectors_lookup_(std::move(other.vectors_lookup_)),                 //
+          // vectors_lookup_(std::move(other.vectors_lookup_)),                 //
 
           available_threads_(std::move(other.available_threads_)), //
           slot_lookup_(std::move(other.slot_lookup_)),             //
           free_keys_(std::move(other.free_keys_)),                 //
-          free_key_(std::move(other.free_key_)) {}                 //
+          free_key_(std::move(other.free_key_)) {
+
+        assert(false);
+    } //
 
     index_dense_gt& operator=(index_dense_gt&& other) {
         swap(other);
@@ -724,12 +725,13 @@ class index_dense_gt {
         std::swap(metric_, other.metric_);
 
         std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_);
-        std::swap(vectors_lookup_, other.vectors_lookup_);
+        // std::swap(vectors_lookup_, other.vectors_lookup_);
 
         std::swap(available_threads_, other.available_threads_);
         std::swap(slot_lookup_, other.slot_lookup_);
         std::swap(free_keys_, other.free_keys_);
         std::swap(free_key_, other.free_key_);
+        assert(false);
     }
 
     ~index_dense_gt() {
@@ -901,9 +903,9 @@ class index_dense_gt {
                 return result;
 
             key_and_slot_t a_key_and_slot = *a_it;
-            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.vectors_lookup_[a_key_and_slot.slot];
             key_and_slot_t b_key_and_slot = *b_it;
-            byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+            byte_t const* b_vector = storage_.vectors_lookup_[b_key_and_slot.slot];
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
             result.mean = result.min = result.max = a_b_distance;
@@ -925,10 +927,10 @@ class index_dense_gt {
 
         while (a_range.first != a_range.second) {
             key_and_slot_t a_key_and_slot = *a_range.first;
-            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.vectors_lookup_[a_key_and_slot.slot];
             while (b_range.first != b_range.second) {
                 key_and_slot_t b_key_and_slot = *b_range.first;
-                byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+                byte_t const* b_vector = storage_.vectors_lookup_[b_key_and_slot.slot];
                 distance_t a_b_distance = metric_(a_vector, b_vector);
 
                 result.mean += a_b_distance;
@@ -968,7 +970,7 @@ class index_dense_gt {
         // Find the closest cluster for any vector under that key.
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* vector_data = vectors_lookup_[key_and_slot.slot];
+            byte_t const* vector_data = storage_.vectors_lookup_[key_and_slot.slot];
             cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow);
             if (!new_result)
                 return new_result;
@@ -991,7 +993,8 @@ class index_dense_gt {
         {
             unique_lock_t lock(slot_lookup_mutex_);
             slot_lookup_.reserve(limits.members);
-            vectors_lookup_.resize(limits.members);
+            // done by typed_
+            storage_.vectors_lookup_.resize(limits.members);
         }
         return typed_->reserve(limits);
     }
@@ -1008,7 +1011,9 @@ class index_dense_gt {
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
         typed_->clear();
         slot_lookup_.clear();
-        vectors_lookup_.clear();
+
+        // should by run by storage_->clear which is run by typed_->clear()
+        // storage_.vectors_lookup_.clear();
         free_keys_.clear();
         vectors_tape_allocator_.reset();
     }
@@ -1027,7 +1032,8 @@ class index_dense_gt {
         std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
         typed_->reset();
         slot_lookup_.clear();
-        vectors_lookup_.clear();
+        // // run by typed_->reset();
+        // vectors_lookup_.clear();
         free_keys_.clear();
         vectors_tape_allocator_.reset();
 
@@ -1071,7 +1077,7 @@ class index_dense_gt {
 
             // Dump the vectors one after another
             for (std::uint64_t i = 0; i != matrix_rows; ++i) {
-                byte_t* vector = vectors_lookup_[i];
+                byte_t* vector = storage_.vectors_lookup_[i];
                 if (!output(vector, matrix_cols))
                     return result.failed("Failed to serialize into stream");
             }
@@ -1158,14 +1164,16 @@ class index_dense_gt {
                 matrix_cols = dimensions[1];
             }
             // Load the vectors one after another
-            vectors_lookup_.resize(matrix_rows);
+            // most of this logic should move within storage class
+            storage_.vectors_lookup_.resize(matrix_rows);
             for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
                 byte_t* vector = vectors_tape_allocator_.allocate(matrix_cols);
                 if (!input(vector, matrix_cols))
                     return result.failed("Failed to read vectors");
-                vectors_lookup_[slot] = vector;
+                storage_.vectors_lookup_[slot] = vector;
             }
         }
+        // assert(false && "serialization and deserialization of streams must be moved to storage");
 
         // Load metadata and choose the right metric
         {
@@ -1284,10 +1292,10 @@ class index_dense_gt {
             return result.failed("Index size and the number of vectors doesn't match");
 
         // Address the vectors
-        vectors_lookup_.resize(matrix_rows);
+        storage_.vectors_lookup_.resize(matrix_rows);
         if (!config.exclude_vectors)
             for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
-                vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot;
+                storage_.vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot;
 
         reindex_keys_();
         return result;
@@ -1611,17 +1619,18 @@ class index_dense_gt {
             copy.free_keys_.push(free_keys_[i]);
 
         // Allocate buffers and move the vectors themselves
-        if (!config.force_vector_copy && copy.config_.exclude_vectors)
-            copy.vectors_lookup_ = vectors_lookup_;
-        else {
-            copy.vectors_lookup_.resize(vectors_lookup_.size());
-            for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
-                copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
-            if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr))
-                return result.failed("Out of memory!");
-            for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
-                std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector());
-        }
+        // if (!config.force_vector_copy && copy.config_.exclude_vectors)
+        //     copy.vectors_lookup_ = vectors_lookup_;
+        // else {
+        //     copy.vectors_lookup_.resize(vectors_lookup_.size());
+        //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
+        //         copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
+        //     if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr))
+        //         return result.failed("Out of memory!");
+        //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
+        //         std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector());
+        // }
+        assert(false);
 
         copy.slot_lookup_ = slot_lookup_;
         *copy.typed_ = std::move(typed_result.index);
@@ -1691,8 +1700,12 @@ class index_dense_gt {
 
       public:
         values_proxy_t(index_dense_gt const& index) noexcept : index_(&index) {}
-        byte_t const* operator[](compressed_slot_t slot) const noexcept { return index_->vectors_lookup_[slot]; }
-        byte_t const* operator[](member_citerator_t it) const noexcept { return index_->vectors_lookup_[get_slot(it)]; }
+        byte_t const* operator[](compressed_slot_t slot) const noexcept {
+            return index_->storage_.vectors_lookup_[slot];
+        }
+        byte_t const* operator[](member_citerator_t it) const noexcept {
+            return index_->storage_.vectors_lookup_[get_slot(it)];
+        }
     };
 
     /**
@@ -1707,18 +1720,18 @@ class index_dense_gt {
     compaction_result_t compact(executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) {
         compaction_result_t result;
 
-        std::vector<byte_t*> new_vectors_lookup(vectors_lookup_.size());
+        std::vector<byte_t*> new_vectors_lookup(storage_.vectors_lookup_.size());
         vectors_tape_allocator_t new_vectors_allocator;
 
         auto track_slot_change = [&](vector_key_t, compressed_slot_t old_slot, compressed_slot_t new_slot) {
             byte_t* new_vector = new_vectors_allocator.allocate(metric_.bytes_per_vector());
-            byte_t* old_vector = vectors_lookup_[old_slot];
+            byte_t* old_vector = storage_.vectors_lookup_[old_slot];
             std::memcpy(new_vector, old_vector, metric_.bytes_per_vector());
             new_vectors_lookup[new_slot] = new_vector;
         };
         typed_->compact(values_proxy_t{*this}, metric_proxy_t{*this}, track_slot_change,
                         std::forward<executor_at>(executor), std::forward<progress_at>(progress));
-        vectors_lookup_ = std::move(new_vectors_lookup);
+        storage_.vectors_lookup_ = std::move(new_vectors_lookup);
         vectors_tape_allocator_ = std::move(new_vectors_allocator);
         return result;
     }
@@ -1840,7 +1853,7 @@ class index_dense_gt {
 
             // Export in case we need to refine afterwards
             clusters[query_idx].centroid = result.cluster.member.key;
-            clusters[query_idx].vector = vectors_lookup_[result.cluster.member.slot];
+            clusters[query_idx].vector = storage_.vectors_lookup_[result.cluster.member.slot];
             clusters[query_idx].merged_into = free_key();
             clusters[query_idx].popularity = 1;
 
@@ -2005,10 +2018,11 @@ class index_dense_gt {
             slot_lookup_.try_emplace(key_and_slot_t{key, static_cast<compressed_slot_t>(member.slot)});
             if (copy_vector) {
                 if (!reuse_node)
-                    vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector());
-                std::memcpy(vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector());
+                    storage_.vectors_lookup_[member.slot] =
+                        vectors_tape_allocator_.allocate(metric_.bytes_per_vector());
+                std::memcpy(storage_.vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector());
             } else
-                vectors_lookup_[member.slot] = (byte_t*)vector_data;
+                storage_.vectors_lookup_[member.slot] = (byte_t*)vector_data;
         };
 
         index_update_config_t update_config;
@@ -2097,7 +2111,7 @@ class index_dense_gt {
 
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* a_vector = vectors_lookup_[key_and_slot.slot];
+            byte_t const* a_vector = storage_.vectors_lookup_[key_and_slot.slot];
             byte_t const* b_vector = vector_data;
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
@@ -2158,7 +2172,7 @@ class index_dense_gt {
                 slot = (*it).slot;
             }
             // Export the entry
-            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.vectors_lookup_[slot]);
             bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed);
             if (!casted)
                 std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector());
@@ -2171,7 +2185,7 @@ class index_dense_gt {
                  begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) {
                 //
                 compressed_slot_t slot = (*begin).slot;
-                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.vectors_lookup_[slot]);
                 byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported;
                 bool casted = cast(punned_vector, dimensions(), reconstructed_vector);
                 if (!casted)

From 3c1134077e8a4cede6826ad9dae70f8006f32675 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 06:17:14 +0000
Subject: [PATCH 33/80] Move vectors to external storage, keep similar
 interface

---
 include/usearch/index_dense.hpp | 364 +++++---------------------------
 1 file changed, 53 insertions(+), 311 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index a12dae606..552cc956d 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -421,10 +421,14 @@ class storage_v2 {
     using nodes_mutexes_t = bitset_gt<>;
 
     nodes_t nodes_{};
+
+    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
+    vectors_t vectors_lookup_{};
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
     mutable nodes_mutexes_t nodes_mutexes_{};
     precomputed_constants_t pre_{};
     tape_allocator_at tape_allocator_{};
+    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
     vectors_allocator_at vectors_allocator_{};
     using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
     static_assert(                                                 //
@@ -442,11 +446,23 @@ class storage_v2 {
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
+    inline void set_vector_at(std::size_t idx, const byte_t* vector_data, std::size_t bytes_per_vector,
+                              bool copy_vector, bool reuse_node) {
+        usearch_assert_m(!(reuse_node && !copy_vector),
+                         "Cannot reuse node when not copying as there is no allocation needed");
+        if (copy_vector) {
+            if (!reuse_node)
+                vectors_lookup_[idx] = vectors_allocator_.allocate(bytes_per_vector);
+            std::memcpy(vectors_lookup_[idx], vector_data, bytes_per_vector);
+        } else
+            vectors_lookup_[idx] = (byte_t*)vector_data;
+    }
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
 
     using lock_type = node_lock_t;
-    vectors_t vectors_lookup_{};
 
     bool reserve(std::size_t count) {
         if (count < nodes_.size())
@@ -454,16 +470,18 @@ class storage_v2 {
         nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
         nodes_mutexes_ = std::move(new_mutexes);
         nodes_.resize(count);
+        vectors_lookup_.resize(count);
         return true;
     }
 
     void clear() {
-        if (nodes_.data())
+        if (nodes_.data()) {
             std::fill(nodes_.begin(), nodes_.end(), node_t{});
+        }
     }
     void reset() {
-        nodes_.clear();
         nodes_mutexes_ = {};
+        nodes_.clear();
         nodes_.shrink_to_fit();
     }
 
@@ -550,10 +568,10 @@ class index_dense_gt {
 
     using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
     using tape_allocator_t = memory_mapping_allocator_gt<64>;
-    // using storage_t = dummy_storage_single_threaded<vector_key_t, compressed_slot_t, tape_allocator_t>;
-    using storage_t = storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t>;
 
   private:
+    using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
+    using storage_t = storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t, vectors_tape_allocator_t>;
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
@@ -583,9 +601,9 @@ class index_dense_gt {
 
         inline distance_t operator()(byte_t const* a, byte_t const* b) const noexcept { return f(a, b); }
 
-        inline byte_t const* v(member_cref_t m) const noexcept { return index_->storage_.vectors_lookup_[get_slot(m)]; }
+        inline byte_t const* v(member_cref_t m) const noexcept { return index_->storage_.get_vector_at(get_slot(m)); }
         inline byte_t const* v(member_citerator_t m) const noexcept {
-            return index_->storage_.vectors_lookup_[get_slot(m)];
+            return index_->storage_.get_vector_at(get_slot(m));
         }
         inline distance_t f(byte_t const* a, byte_t const* b) const noexcept { return index_->metric_(a, b); }
     };
@@ -611,14 +629,6 @@ class index_dense_gt {
     /// @brief An instance of a potentially stateful `metric_t` used to initialize copies and forks.
     metric_t metric_;
 
-    using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
-    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
-    vectors_tape_allocator_t vectors_tape_allocator_;
-
-    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
-    // ask-Ashot: why is this mutable?
-    // mutable std::vector<byte_t*> vectors_lookup_;
-
     /// @brief  C-style array of `node_t` smart-pointers.
     std::vector<node_t> nodes_;
     std::mutex vector_mutex_;
@@ -696,8 +706,8 @@ class index_dense_gt {
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
 
-          vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), //
-          // vectors_lookup_(std::move(other.vectors_lookup_)),                 //
+          // vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), //
+          //  vectors_lookup_(std::move(other.vectors_lookup_)),                 //
 
           available_threads_(std::move(other.available_threads_)), //
           slot_lookup_(std::move(other.slot_lookup_)),             //
@@ -724,8 +734,8 @@ class index_dense_gt {
         std::swap(casts_, other.casts_);
         std::swap(metric_, other.metric_);
 
-        std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_);
-        // std::swap(vectors_lookup_, other.vectors_lookup_);
+        // std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_);
+        //  std::swap(vectors_lookup_, other.vectors_lookup_);
 
         std::swap(available_threads_, other.available_threads_);
         std::swap(slot_lookup_, other.slot_lookup_);
@@ -837,11 +847,10 @@ class index_dense_gt {
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
     std::size_t memory_usage() const {
-        return                                           //
-            typed_->memory_usage(0) +                    //
-            storage_.node_allocator().total_wasted() +   //
-            storage_.node_allocator().total_reserved() + //
-            vectors_tape_allocator_.total_allocated();
+        return                                          //
+            typed_->memory_usage(0) +                   //
+            storage_.node_allocator().total_wasted() +  //
+            storage_.node_allocator().total_reserved(); //
     }
 
     static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
@@ -903,9 +912,9 @@ class index_dense_gt {
                 return result;
 
             key_and_slot_t a_key_and_slot = *a_it;
-            byte_t const* a_vector = storage_.vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot);
             key_and_slot_t b_key_and_slot = *b_it;
-            byte_t const* b_vector = storage_.vectors_lookup_[b_key_and_slot.slot];
+            byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot);
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
             result.mean = result.min = result.max = a_b_distance;
@@ -927,10 +936,10 @@ class index_dense_gt {
 
         while (a_range.first != a_range.second) {
             key_and_slot_t a_key_and_slot = *a_range.first;
-            byte_t const* a_vector = storage_.vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot);
             while (b_range.first != b_range.second) {
                 key_and_slot_t b_key_and_slot = *b_range.first;
-                byte_t const* b_vector = storage_.vectors_lookup_[b_key_and_slot.slot];
+                byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot);
                 distance_t a_b_distance = metric_(a_vector, b_vector);
 
                 result.mean += a_b_distance;
@@ -970,7 +979,7 @@ class index_dense_gt {
         // Find the closest cluster for any vector under that key.
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* vector_data = storage_.vectors_lookup_[key_and_slot.slot];
+            byte_t const* vector_data = storage_.get_vector_at(key_and_slot.slot);
             cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow);
             if (!new_result)
                 return new_result;
@@ -993,8 +1002,6 @@ class index_dense_gt {
         {
             unique_lock_t lock(slot_lookup_mutex_);
             slot_lookup_.reserve(limits.members);
-            // done by typed_
-            storage_.vectors_lookup_.resize(limits.members);
         }
         return typed_->reserve(limits);
     }
@@ -1015,7 +1022,7 @@ class index_dense_gt {
         // should by run by storage_->clear which is run by typed_->clear()
         // storage_.vectors_lookup_.clear();
         free_keys_.clear();
-        vectors_tape_allocator_.reset();
+        // vectors_tape_allocator_.reset();
     }
 
     /**
@@ -1035,7 +1042,7 @@ class index_dense_gt {
         // // run by typed_->reset();
         // vectors_lookup_.clear();
         free_keys_.clear();
-        vectors_tape_allocator_.reset();
+        // vectors_tape_allocator_.reset();
 
         // Reset the thread IDs.
         available_threads_.resize(std::thread::hardware_concurrency());
@@ -1077,7 +1084,7 @@ class index_dense_gt {
 
             // Dump the vectors one after another
             for (std::uint64_t i = 0; i != matrix_rows; ++i) {
-                byte_t* vector = storage_.vectors_lookup_[i];
+                const byte_t* vector = storage_.get_vector_at(i);
                 if (!output(vector, matrix_cols))
                     return result.failed("Failed to serialize into stream");
             }
@@ -1165,12 +1172,12 @@ class index_dense_gt {
             }
             // Load the vectors one after another
             // most of this logic should move within storage class
-            storage_.vectors_lookup_.resize(matrix_rows);
+            storage_.reserve(matrix_rows);
+            byte_t vector[matrix_cols];
             for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
-                byte_t* vector = vectors_tape_allocator_.allocate(matrix_cols);
                 if (!input(vector, matrix_cols))
                     return result.failed("Failed to read vectors");
-                storage_.vectors_lookup_[slot] = vector;
+                storage_.set_vector_at(slot, vector, matrix_cols, true, false);
             }
         }
         // assert(false && "serialization and deserialization of streams must be moved to storage");
@@ -1292,10 +1299,11 @@ class index_dense_gt {
             return result.failed("Index size and the number of vectors doesn't match");
 
         // Address the vectors
-        storage_.vectors_lookup_.resize(matrix_rows);
+        storage_.reserve(matrix_rows);
         if (!config.exclude_vectors)
             for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
-                storage_.vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot;
+                storage_.set_vector_at(slot, vectors_buffer.data() + matrix_cols * slot, matrix_cols, //
+                                       false, false);
 
         reindex_keys_();
         return result;
@@ -1624,7 +1632,8 @@ class index_dense_gt {
         // else {
         //     copy.vectors_lookup_.resize(vectors_lookup_.size());
         //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
-        //         copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
+        //         copy.vectors_lookup_[slot] =
+        //         copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
         //     if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr))
         //         return result.failed("Out of memory!");
         //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
@@ -1695,267 +1704,6 @@ class index_dense_gt {
         return result;
     }
 
-    class values_proxy_t {
-        index_dense_gt const* index_;
-
-      public:
-        values_proxy_t(index_dense_gt const& index) noexcept : index_(&index) {}
-        byte_t const* operator[](compressed_slot_t slot) const noexcept {
-            return index_->storage_.vectors_lookup_[slot];
-        }
-        byte_t const* operator[](member_citerator_t it) const noexcept {
-            return index_->storage_.vectors_lookup_[get_slot(it)];
-        }
-    };
-
-    /**
-     *  @brief Performs compaction on the index, pruning links to removed entries.
-     *  @param executor The executor parallel processing. Default ::dummy_executor_t single-threaded.
-     *  @param progress The progress tracker instance to use. Default ::dummy_progress_t reports nothing.
-     *  @return The ::compaction_result_t indicating the result of the compaction operation.
-     *          `result.pruned_edges` will contain the number of edges that were removed.
-     *          `result.error` will contain an error message if an error occurred during the compaction operation.
-     */
-    template <typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
-    compaction_result_t compact(executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) {
-        compaction_result_t result;
-
-        std::vector<byte_t*> new_vectors_lookup(storage_.vectors_lookup_.size());
-        vectors_tape_allocator_t new_vectors_allocator;
-
-        auto track_slot_change = [&](vector_key_t, compressed_slot_t old_slot, compressed_slot_t new_slot) {
-            byte_t* new_vector = new_vectors_allocator.allocate(metric_.bytes_per_vector());
-            byte_t* old_vector = storage_.vectors_lookup_[old_slot];
-            std::memcpy(new_vector, old_vector, metric_.bytes_per_vector());
-            new_vectors_lookup[new_slot] = new_vector;
-        };
-        typed_->compact(values_proxy_t{*this}, metric_proxy_t{*this}, track_slot_change,
-                        std::forward<executor_at>(executor), std::forward<progress_at>(progress));
-        storage_.vectors_lookup_ = std::move(new_vectors_lookup);
-        vectors_tape_allocator_ = std::move(new_vectors_allocator);
-        return result;
-    }
-
-    template <                                                 //
-        typename man_to_woman_at = dummy_key_to_key_mapping_t, //
-        typename woman_to_man_at = dummy_key_to_key_mapping_t, //
-        typename executor_at = dummy_executor_t,               //
-        typename progress_at = dummy_progress_t                //
-        >
-    join_result_t join(                                     //
-        index_dense_gt const& women,                        //
-        index_join_config_t config = {},                    //
-        man_to_woman_at&& man_to_woman = man_to_woman_at{}, //
-        woman_to_man_at&& woman_to_man = woman_to_man_at{}, //
-        executor_at&& executor = executor_at{},             //
-        progress_at&& progress = progress_at{}) const {
-
-        index_dense_gt const& men = *this;
-        return unum::usearch::join(                      //
-            *men.typed_, *women.typed_,                  //
-            values_proxy_t{men}, values_proxy_t{women},  //
-            metric_proxy_t{men}, metric_proxy_t{women},  //
-            config,                                      //
-            std::forward<man_to_woman_at>(man_to_woman), //
-            std::forward<woman_to_man_at>(woman_to_man), //
-            std::forward<executor_at>(executor),         //
-            std::forward<progress_at>(progress));
-    }
-
-    struct clustering_result_t {
-        error_t error{};
-        std::size_t clusters{};
-        std::size_t visited_members{};
-        std::size_t computed_distances{};
-
-        explicit operator bool() const noexcept { return !error; }
-        clustering_result_t failed(error_t message) noexcept {
-            error = std::move(message);
-            return std::move(*this);
-        }
-    };
-
-    /**
-     *  @brief  Implements clustering, classifying the given objects (vectors of member keys)
-     *          into a given number of clusters.
-     *
-     *  @param[in] queries_begin Iterator pointing to the first query.
-     *  @param[in] queries_end Iterator pointing to the last query.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     *  @param[in] config Configuration parameters for clustering.
-     *
-     *  @param[out] cluster_keys Pointer to the array where the cluster keys will be exported.
-     *  @param[out] cluster_distances Pointer to the array where the distances to those centroids will be exported.
-     */
-    template <                                   //
-        typename queries_iterator_at,            //
-        typename executor_at = dummy_executor_t, //
-        typename progress_at = dummy_progress_t  //
-        >
-    clustering_result_t cluster(                //
-        queries_iterator_at queries_begin,      //
-        queries_iterator_at queries_end,        //
-        index_dense_clustering_config_t config, //
-        vector_key_t* cluster_keys,             //
-        distance_t* cluster_distances,          //
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}) {
-
-        std::size_t const queries_count = queries_end - queries_begin;
-
-        // Find the first level (top -> down) that has enough nodes to exceed `config.min_clusters`.
-        std::size_t level = max_level();
-        if (config.min_clusters) {
-            for (; level > 1; --level) {
-                if (stats(level).nodes > config.min_clusters)
-                    break;
-            }
-        } else
-            level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2;
-
-        clustering_result_t result;
-        if (max_level() < 2)
-            return result.failed("Index too small to cluster!");
-
-        // A structure used to track the popularity of a specific cluster
-        struct cluster_t {
-            vector_key_t centroid;
-            vector_key_t merged_into;
-            std::size_t popularity;
-            byte_t* vector;
-        };
-
-        auto centroid_id = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
-        auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
-
-        std::atomic<std::size_t> visited_members(0);
-        std::atomic<std::size_t> computed_distances(0);
-        std::atomic<char const*> atomic_error{nullptr};
-
-        using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
-        using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
-        buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
-        if (!clusters)
-            return result.failed("Out of memory!");
-
-    map_to_clusters:
-        // Concurrently perform search until a certain depth
-        executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
-            auto result = cluster(queries_begin[query_idx], level, thread_idx);
-            if (!result) {
-                atomic_error = result.error.release();
-                return false;
-            }
-
-            cluster_keys[query_idx] = result.cluster.member.key;
-            cluster_distances[query_idx] = result.cluster.distance;
-
-            // Export in case we need to refine afterwards
-            clusters[query_idx].centroid = result.cluster.member.key;
-            clusters[query_idx].vector = storage_.vectors_lookup_[result.cluster.member.slot];
-            clusters[query_idx].merged_into = free_key();
-            clusters[query_idx].popularity = 1;
-
-            visited_members += result.visited_members;
-            computed_distances += result.computed_distances;
-            return true;
-        });
-
-        if (atomic_error)
-            return result.failed(atomic_error.load());
-
-        // Now once we have identified the closest clusters,
-        // we can try reducing their quantity, refining
-        std::sort(clusters.begin(), clusters.end(), centroid_id);
-
-        // Transform into run-length encoding, computing the number of unique clusters
-        std::size_t unique_clusters = 0;
-        {
-            std::size_t last_idx = 0;
-            for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
-                if (clusters[last_idx].centroid == clusters[current_idx].centroid) {
-                    clusters[last_idx].popularity++;
-                } else {
-                    last_idx++;
-                    clusters[last_idx] = clusters[current_idx];
-                }
-            }
-            unique_clusters = last_idx + 1;
-        }
-
-        // In some cases the queries may be co-located, all mapping into the same cluster on that
-        // level. In that case we refine the granularity and dive deeper into clusters:
-        if (unique_clusters < config.min_clusters && level > 1) {
-            level--;
-            goto map_to_clusters;
-        }
-
-        std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity);
-
-        // If clusters are too numerous, merge the ones that are too close to each other.
-        std::size_t merge_cycles = 0;
-    merge_nearby_clusters:
-        if (unique_clusters > config.max_clusters) {
-
-            cluster_t& merge_source = clusters[unique_clusters - 1];
-            std::size_t merge_target_idx = 0;
-            distance_t merge_distance = std::numeric_limits<distance_t>::max();
-
-            for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) {
-                distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector);
-                if (distance < merge_distance) {
-                    merge_distance = distance;
-                    merge_target_idx = candidate_idx;
-                }
-            }
-
-            merge_source.merged_into = clusters[merge_target_idx].centroid;
-            clusters[merge_target_idx].popularity += exchange(merge_source.popularity, 0);
-
-            // The target object may have to be swapped a few times to get to optimal position.
-            while (merge_target_idx &&
-                   clusters[merge_target_idx - 1].popularity < clusters[merge_target_idx].popularity)
-                std::swap(clusters[merge_target_idx - 1], clusters[merge_target_idx]), --merge_target_idx;
-
-            unique_clusters--;
-            merge_cycles++;
-            goto merge_nearby_clusters;
-        }
-
-        // Replace evicted clusters
-        if (merge_cycles) {
-            // Sort dropped clusters by name to accelerate future lookups
-            auto clusters_end = clusters.data() + config.max_clusters + merge_cycles;
-            std::sort(clusters.data(), clusters_end, centroid_id);
-
-            executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
-                vector_key_t& cluster_key = cluster_keys[query_idx];
-                distance_t& cluster_distance = cluster_distances[query_idx];
-
-                // Recursively trace replacements of that cluster
-                while (true) {
-                    // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
-                    cluster_t updated_cluster;
-                    updated_cluster.centroid = cluster_key;
-                    updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, centroid_id);
-                    if (updated_cluster.merged_into == free_key())
-                        break;
-                    cluster_key = updated_cluster.merged_into;
-                }
-
-                cluster_distance = distance_between(cluster_key, queries_begin[query_idx], thread_idx).mean;
-                return true;
-            });
-        }
-
-        result.computed_distances = computed_distances;
-        result.visited_members = visited_members;
-
-        (void)progress;
-        return result;
-    }
-
   private:
     struct thread_lock_t {
         index_dense_gt const& parent;
@@ -2016,13 +1764,7 @@ class index_dense_gt {
         auto on_success = [&](member_ref_t member) {
             unique_lock_t slot_lock(slot_lookup_mutex_);
             slot_lookup_.try_emplace(key_and_slot_t{key, static_cast<compressed_slot_t>(member.slot)});
-            if (copy_vector) {
-                if (!reuse_node)
-                    storage_.vectors_lookup_[member.slot] =
-                        vectors_tape_allocator_.allocate(metric_.bytes_per_vector());
-                std::memcpy(storage_.vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector());
-            } else
-                storage_.vectors_lookup_[member.slot] = (byte_t*)vector_data;
+            storage_.set_vector_at(member.slot, vector_data, metric_.bytes_per_vector(), copy_vector, reuse_node);
         };
 
         index_update_config_t update_config;
@@ -2111,7 +1853,7 @@ class index_dense_gt {
 
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* a_vector = storage_.vectors_lookup_[key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(key_and_slot.slot);
             byte_t const* b_vector = vector_data;
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
@@ -2172,7 +1914,7 @@ class index_dense_gt {
                 slot = (*it).slot;
             }
             // Export the entry
-            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.vectors_lookup_[slot]);
+            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.get_vector_at(slot));
             bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed);
             if (!casted)
                 std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector());
@@ -2185,7 +1927,7 @@ class index_dense_gt {
                  begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) {
                 //
                 compressed_slot_t slot = (*begin).slot;
-                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.vectors_lookup_[slot]);
+                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.get_vector_at(slot));
                 byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported;
                 bool casted = cast(punned_vector, dimensions(), reconstructed_vector);
                 if (!casted)

From d303be17c83794b6578f9b8aa4161000121315f7 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 06:28:24 +0000
Subject: [PATCH 34/80] Move storage providers to separate files

---
 include/usearch/dummy_stor.hpp  | 141 ++++++++++++++++++
 include/usearch/index_dense.hpp | 254 +-------------------------------
 include/usearch/storage.hpp     | 147 ++++++++++++++++++
 3 files changed, 291 insertions(+), 251 deletions(-)
 create mode 100644 include/usearch/dummy_stor.hpp
 create mode 100644 include/usearch/storage.hpp

diff --git a/include/usearch/dummy_stor.hpp b/include/usearch/dummy_stor.hpp
new file mode 100644
index 000000000..a1ce19fd5
--- /dev/null
+++ b/include/usearch/dummy_stor.hpp
@@ -0,0 +1,141 @@
+
+#pragma once
+
+#include <usearch/index.hpp>
+#include <usearch/index_plugins.hpp>
+
+namespace unum {
+namespace usearch {
+
+/**
+ * @brief   Storage abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam tape_allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      It would never `deallocate` separate entries, and would only free all the space at once.
+ *      The allocated buffers may be uninitialized.
+ *
+ **/
+template <typename key_at, typename compressed_slot_at,
+          typename tape_allocator_at = std::allocator<byte_t>> //
+class dummy_storage_single_threaded {
+    using node_t = node_at<key_at, compressed_slot_at>;
+    using nodes_t = std::vector<node_t>;
+
+    nodes_t nodes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_{};
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+  public:
+    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+
+    // exported for client-side lock-declaration
+    // alternatively, could just use auto in client side
+    // ideally, there would be a way to make this "void", but I could not make it work
+    // as client side ends up declaring a void variable
+    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
+    // for the dummy lock guard variable.
+    struct dummy_lock {
+        // destructor necessary to avoid "unused variable warning"
+        // will this get properly optimized away?
+        ~dummy_lock() {}
+    };
+    using lock_type = dummy_lock;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_.resize(count);
+        return true;
+    }
+
+    void clear() {
+        if (nodes_.data())
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
+    }
+    void reset() {
+        nodes_.clear();
+        nodes_.shrink_to_fit();
+    }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        if (!has_reset<tape_allocator_at>()) {
+            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        } else {
+            tape_allocator_.deallocate(nullptr, 0);
+        }
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept {
+        auto count = nodes_.size();
+        nodes_[slot] = node;
+    }
+    inline size_t size() { return nodes_.size(); }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    // dummy lock just to satisfy the interface
+    constexpr inline lock_type node_lock(std::size_t) const noexcept { return dummy_lock{}; }
+};
+
+template <typename key_at, typename compressed_slot_at> class storage_v1 {
+    using vector_key_t = key_at;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
+    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_gt<>;
+    using nodes_t = std::vector<node_t>;
+
+    index_config_t config_{};
+    nodes_t nodes_{};
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+};
+
+} // namespace usearch
+} // namespace unum
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 552cc956d..479aff425 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -10,6 +10,7 @@
 
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
+#include <usearch/storage.hpp>
 
 #if defined(USEARCH_DEFINED_CPP17)
 #include <shared_mutex> // `std::shared_mutex`
@@ -280,257 +281,6 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
 
     return result.failed("Not a dense USearch index!");
 }
-
-/**
- * @brief   Storage abstraction for HNSW graph and associated vector data
- *
- *  @tparam key_at
- *      The type of primary objects stored in the index.
- *      The values, to which those map, are not managed by the same index structure.
- *
- *  @tparam compressed_slot_at
- *      The smallest unsigned integer type to address indexed elements.
- *      It is used internally to maximize space-efficiency and is generally
- *      up-casted to @b `std::size_t` in public interfaces.
- *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
- *      Which makes the most sense for 4B+ entry indexes.
- *
- *  @tparam tape_allocator_at
- *      Potentially different memory allocator for primary allocations of nodes and vectors.
- *      It would never `deallocate` separate entries, and would only free all the space at once.
- *      The allocated buffers may be uninitialized.
- *
- **/
-template <typename key_at, typename compressed_slot_at,
-          typename tape_allocator_at = std::allocator<byte_t>> //
-class dummy_storage_single_threaded {
-    using node_t = node_at<key_at, compressed_slot_at>;
-    using nodes_t = std::vector<node_t>;
-
-    nodes_t nodes_{};
-    precomputed_constants_t pre_{};
-    tape_allocator_at tape_allocator_{};
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
-
-  public:
-    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
-        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
-
-    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
-
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
-
-    // exported for client-side lock-declaration
-    // alternatively, could just use auto in client side
-    // ideally, there would be a way to make this "void", but I could not make it work
-    // as client side ends up declaring a void variable
-    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
-    // for the dummy lock guard variable.
-    struct dummy_lock {
-        // destructor necessary to avoid "unused variable warning"
-        // will this get properly optimized away?
-        ~dummy_lock() {}
-    };
-    using lock_type = dummy_lock;
-
-    bool reserve(std::size_t count) {
-        if (count < nodes_.size())
-            return true;
-        nodes_.resize(count);
-        return true;
-    }
-
-    void clear() {
-        if (nodes_.data())
-            std::fill(nodes_.begin(), nodes_.end(), node_t{});
-    }
-    void reset() {
-        nodes_.clear();
-        nodes_.shrink_to_fit();
-    }
-
-    using span_bytes_t = span_gt<byte_t>;
-
-    span_bytes_t node_malloc(level_t level) noexcept {
-        std::size_t node_size = node_t::node_size_bytes(pre_, level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
-        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
-    }
-    void node_free(size_t slot, node_t node) {
-        if (!has_reset<tape_allocator_at>()) {
-            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
-        } else {
-            tape_allocator_.deallocate(nullptr, 0);
-        }
-        nodes_[slot] = node_t{};
-    }
-    node_t node_make(key_at key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-    //     if (!data)
-    //         return {};
-    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
-    //     return node_t{data};
-    // }
-
-    void node_store(size_t slot, node_t node) noexcept {
-        auto count = nodes_.size();
-        nodes_[slot] = node;
-    }
-    inline size_t size() { return nodes_.size(); }
-    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
-    // dummy lock just to satisfy the interface
-    constexpr inline lock_type node_lock(std::size_t) const noexcept { return dummy_lock{}; }
-};
-
-template <typename key_at, typename compressed_slot_at> class storage_v1 {
-    using vector_key_t = key_at;
-    using node_t = node_at<vector_key_t, compressed_slot_at>;
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-    using nodes_mutexes_t = bitset_gt<>;
-    using nodes_t = std::vector<node_t>;
-
-    index_config_t config_{};
-    nodes_t nodes_{};
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
-};
-
-template <typename key_at, typename compressed_slot_at,        //
-          typename tape_allocator_at = std::allocator<byte_t>, //
-          typename vectors_allocator_at = tape_allocator_at>   //
-class storage_v2 {
-    using node_t = node_at<key_at, compressed_slot_at>;
-    using nodes_t = std::vector<node_t>;
-    using vectors_t = std::vector<byte_t*>;
-    using nodes_mutexes_t = bitset_gt<>;
-
-    nodes_t nodes_{};
-
-    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
-    vectors_t vectors_lookup_{};
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
-    precomputed_constants_t pre_{};
-    tape_allocator_at tape_allocator_{};
-    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
-    vectors_allocator_at vectors_allocator_{};
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
-
-    struct node_lock_t {
-        nodes_mutexes_t& mutexes;
-        std::size_t slot;
-        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
-    };
-
-  public:
-    storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
-        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
-
-    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
-    // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
-    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
-    inline void set_vector_at(std::size_t idx, const byte_t* vector_data, std::size_t bytes_per_vector,
-                              bool copy_vector, bool reuse_node) {
-        usearch_assert_m(!(reuse_node && !copy_vector),
-                         "Cannot reuse node when not copying as there is no allocation needed");
-        if (copy_vector) {
-            if (!reuse_node)
-                vectors_lookup_[idx] = vectors_allocator_.allocate(bytes_per_vector);
-            std::memcpy(vectors_lookup_[idx], vector_data, bytes_per_vector);
-        } else
-            vectors_lookup_[idx] = (byte_t*)vector_data;
-    }
-
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
-
-    using lock_type = node_lock_t;
-
-    bool reserve(std::size_t count) {
-        if (count < nodes_.size())
-            return true;
-        nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
-        nodes_mutexes_ = std::move(new_mutexes);
-        nodes_.resize(count);
-        vectors_lookup_.resize(count);
-        return true;
-    }
-
-    void clear() {
-        if (nodes_.data()) {
-            std::fill(nodes_.begin(), nodes_.end(), node_t{});
-        }
-    }
-    void reset() {
-        nodes_mutexes_ = {};
-        nodes_.clear();
-        nodes_.shrink_to_fit();
-    }
-
-    using span_bytes_t = span_gt<byte_t>;
-
-    span_bytes_t node_malloc(level_t level) noexcept {
-        std::size_t node_size = node_t::node_size_bytes(pre_, level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
-        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
-    }
-    void node_free(size_t slot, node_t node) {
-        if (!has_reset<tape_allocator_at>()) {
-            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
-        } else {
-            tape_allocator_.deallocate(nullptr, 0);
-        }
-        nodes_[slot] = node_t{};
-    }
-    node_t node_make(key_at key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-    //     if (!data)
-    //         return {};
-    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
-    //     return node_t{data};
-    // }
-
-    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
-    inline size_t size() { return nodes_.size(); }
-    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
-    // dummy lock just to satisfy the interface
-    constexpr inline lock_type node_lock(std::size_t slot) const noexcept {
-        while (nodes_mutexes_.atomic_set(slot))
-            ;
-        return {nodes_mutexes_, slot};
-    }
-};
-
 /**
  *  @brief  Oversimplified type-punned index for equidimensional vectors
  *          with automatic @b down-casting, hardware-specific @b SIMD metrics,
@@ -851,6 +601,8 @@ class index_dense_gt {
             typed_->memory_usage(0) +                   //
             storage_.node_allocator().total_wasted() +  //
             storage_.node_allocator().total_reserved(); //
+
+        // vectors_tape_allocator_.total_allocated();
     }
 
     static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
new file mode 100644
index 000000000..338bf468c
--- /dev/null
+++ b/include/usearch/storage.hpp
@@ -0,0 +1,147 @@
+#pragma once
+
+#include <usearch/index.hpp>
+#include <usearch/index_plugins.hpp>
+
+namespace unum {
+namespace usearch {
+
+template <typename key_at, typename compressed_slot_at,        //
+          typename tape_allocator_at = std::allocator<byte_t>, //
+          typename vectors_allocator_at = tape_allocator_at>   //
+class storage_v2 {
+    using node_t = node_at<key_at, compressed_slot_at>;
+    using nodes_t = std::vector<node_t>;
+    using vectors_t = std::vector<byte_t*>;
+    using nodes_mutexes_t = bitset_gt<>;
+
+    nodes_t nodes_{};
+
+    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
+    vectors_t vectors_lookup_{};
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_{};
+    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
+    vectors_allocator_at vectors_allocator_{};
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+    struct node_lock_t {
+        nodes_mutexes_t& mutexes;
+        std::size_t slot;
+        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
+    };
+
+  public:
+    storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
+    inline void set_vector_at(std::size_t idx, const byte_t* vector_data, std::size_t bytes_per_vector,
+                              bool copy_vector, bool reuse_node) {
+        usearch_assert_m(!(reuse_node && !copy_vector),
+                         "Cannot reuse node when not copying as there is no allocation needed");
+        if (copy_vector) {
+            if (!reuse_node)
+                vectors_lookup_[idx] = vectors_allocator_.allocate(bytes_per_vector);
+            std::memcpy(vectors_lookup_[idx], vector_data, bytes_per_vector);
+        } else
+            vectors_lookup_[idx] = (byte_t*)vector_data;
+    }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+
+    using lock_type = node_lock_t;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
+        nodes_mutexes_ = std::move(new_mutexes);
+        nodes_.resize(count);
+        vectors_lookup_.resize(count);
+        return true;
+    }
+
+    /*
+    void clear() noexcept {
+        if (!has_reset<tape_allocator_t>()) {
+            std::size_t n = nodes_count_;
+            for (std::size_t i = 0; i != n; ++i)
+                node_free_(i);
+        } else
+            tape_allocator_.deallocate(nullptr, 0);
+        nodes_count_ = 0;
+        max_level_ = -1;
+        entry_slot_ = 0u;
+    }
+    ****/
+    void clear() {
+        if (nodes_.data()) {
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
+            //   std::fill(vectors_lookup_.begin(), vectors_lookup_.end(), nullptr);
+        }
+    }
+    void reset() {
+        nodes_mutexes_ = {};
+        nodes_.clear();
+        nodes_.shrink_to_fit();
+
+        // vectors_lookup_.clear();
+        // vectors_lookup_.shrink_to_fit();
+    }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        if (!has_reset<tape_allocator_at>()) {
+            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        } else {
+            tape_allocator_.deallocate(nullptr, 0);
+        }
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
+    inline size_t size() { return nodes_.size(); }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    // dummy lock just to satisfy the interface
+    constexpr inline lock_type node_lock(std::size_t slot) const noexcept {
+        while (nodes_mutexes_.atomic_set(slot))
+            ;
+        return {nodes_mutexes_, slot};
+    }
+};
+
+} // namespace usearch
+} // namespace unum

From 0dcf79148217cee4a56760c2c147643c732a2470 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 22:10:14 +0000
Subject: [PATCH 35/80] Finish storage interface for loading from file

---
 include/usearch/index.hpp       |  49 +++-------
 include/usearch/index_dense.hpp |  43 +--------
 include/usearch/storage.hpp     | 158 +++++++++++++++++++++++++++++++-
 3 files changed, 171 insertions(+), 79 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 4a11fb481..f90292c8e 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2784,61 +2784,30 @@ class index_gt {
 
     /**
      *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     *  Note: assumes storage is properly reset and ready for loading the hnsw graph
      */
     template <typename input_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t load_from_stream(input_callback_at&& input, progress_at&& progress = {}) noexcept {
 
         serialization_result_t result;
 
-        // Remove previously stored objects
-        reset();
-
         // Pull basic metadata
         index_serialized_header_t header;
-        if (!input(&header, sizeof(header)))
-            return result.failed("Failed to pull the header from the stream");
-
-        // We are loading an empty index, no more work to do
-        if (!header.size) {
-            reset();
-            return result;
-        }
-
-        // Allocate some dynamic memory to read all the levels
-        using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
-        buffer_gt<level_t, levels_allocator_t> levels(header.size);
-        if (!levels)
-            return result.failed("Out of memory");
-        if (!input(levels, header.size * sizeof(level_t)))
-            return result.failed("Failed to pull nodes levels from the stream");
-
+        storage_.load_nodes_from_stream(input, header, progress);
         // Submit metadata
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
         pre_ = precompute_(config_);
+        nodes_count_ = header.size;
+        max_level_ = static_cast<level_t>(header.max_level);
+        entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
+        // allocate dynamic contexts for queries (storage has already been allocated for the deserialization process)
         index_limits_t limits;
         limits.members = header.size;
         if (!reserve(limits)) {
             reset();
             return result.failed("Out of memory");
         }
-        nodes_count_ = header.size;
-        max_level_ = static_cast<level_t>(header.max_level);
-        entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
-
-        // Load the nodes
-        for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = storage_.node_malloc(levels[i]);
-            if (!input(node_bytes.data(), node_bytes.size())) {
-                reset();
-                return result.failed("Failed to pull nodes from the stream");
-            }
-            // nodes_[i] = node_t{node_bytes.data()};
-            storage_.node_store(i, node_t{node_bytes.data()});
-
-            if (!progress(i + 1, header.size))
-                return result.failed("Terminated by user");
-        }
         return {};
     }
 
@@ -2911,6 +2880,9 @@ class index_gt {
         if (!io_result)
             return io_result;
 
+        // Remove previously stored objects
+        reset();
+
         serialization_result_t stream_result = load_from_stream(
             [&](void* buffer, std::size_t length) {
                 io_result = file.read(buffer, length);
@@ -2936,6 +2908,9 @@ class index_gt {
         if (!io_result)
             return io_result;
 
+        // Remove previously stored objects
+        reset();
+
         serialization_result_t stream_result = load_from_stream(
             [&](void* buffer, std::size_t length) {
                 if (offset + length > file.size())
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 479aff425..65d1ffe72 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -126,11 +126,6 @@ struct index_dense_clustering_config_t {
     } mode = merge_smallest_k;
 };
 
-struct index_dense_serialization_config_t {
-    bool exclude_vectors = false;
-    bool use_64_bit_dimensions = false;
-};
-
 struct index_dense_copy_config_t : public index_copy_config_t {
     bool force_vector_copy = true;
 
@@ -903,43 +898,11 @@ class index_dense_gt {
 
         // Infer the new index size
         serialization_result_t result;
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-
-        // We may not want to load the vectors from the same file, or allow attaching them afterwards
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                if (!input(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to read 32-bit dimensions of the matrix");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            } else {
-                std::uint64_t dimensions[2];
-                if (!input(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to read 64-bit dimensions of the matrix");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            }
-            // Load the vectors one after another
-            // most of this logic should move within storage class
-            storage_.reserve(matrix_rows);
-            byte_t vector[matrix_cols];
-            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
-                if (!input(vector, matrix_cols))
-                    return result.failed("Failed to read vectors");
-                storage_.set_vector_at(slot, vector, matrix_cols, true, false);
-            }
-        }
-        // assert(false && "serialization and deserialization of streams must be moved to storage");
+        index_dense_head_buffer_t buffer;
+        storage_.load_vectors_from_stream(input, buffer, config);
 
         // Load metadata and choose the right metric
         {
-            index_dense_head_buffer_t buffer;
-            if (!input(buffer, sizeof(buffer)))
-                return result.failed("Failed to read the index ");
-
             index_dense_head_t head{buffer};
             if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0)
                 return result.failed("Magic header mismatch - the file isn't an index");
@@ -962,8 +925,6 @@ class index_dense_gt {
         result = typed_->load_from_stream(std::forward<input_callback_at>(input), std::forward<progress_at>(progress));
         if (!result)
             return result;
-        if (typed_->size() != static_cast<std::size_t>(matrix_rows))
-            return result.failed("Index size and the number of vectors doesn't match");
 
         reindex_keys_();
         return result;
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 338bf468c..6a594c438 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -9,7 +9,49 @@ namespace usearch {
 template <typename key_at, typename compressed_slot_at,        //
           typename tape_allocator_at = std::allocator<byte_t>, //
           typename vectors_allocator_at = tape_allocator_at>   //
-class storage_v2 {
+class storage_interface {
+  public:
+    using node_t = node_at<key_at, compressed_slot_at>;
+    // storage_interface(index_config_t conig, tape_allocator_at allocator = {});
+
+    struct lock_type;
+
+    // q:: can I enforce this interface function in inherited storages somehow?
+    constexpr inline lock_type node_lock(std::size_t slot) const noexcept;
+
+    virtual inline node_t get_node_at(std::size_t idx) const noexcept = 0;
+    virtual inline std::size_t node_size_bytes(std::size_t idx) const noexcept = 0;
+    virtual inline byte_t* get_vector_at(std::size_t idx) const noexcept = 0;
+
+    inline void set_at(std::size_t idx, node_t node, byte_t* vector_data, std::size_t vector_size, bool reuse_node);
+
+    // virtual void load_vectors_from_stream() = 0;
+    // virtual void load_nodes_from_stream() = 0;
+
+    void store_vectors_to_stream();
+    void store_nodes_to_stream();
+
+    std::size_t size();
+    bool reserve(std::size_t count);
+    void clear();
+    void reset();
+
+    std::size_t memory_usage();
+};
+
+struct index_dense_serialization_config_t {
+    // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
+    bool exclude_vectors = false;
+    bool use_64_bit_dimensions = false;
+};
+using index_dense_head_buffer_t = byte_t[64];
+static_assert(sizeof(index_dense_head_buffer_t) == 64, "File header should be exactly 64 bytes");
+using serialization_config_t = index_dense_serialization_config_t;
+
+template <typename key_at, typename compressed_slot_at,        //
+          typename tape_allocator_at = std::allocator<byte_t>, //
+          typename vectors_allocator_at = tape_allocator_at>   //
+class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at> {
     using node_t = node_at<key_at, compressed_slot_at>;
     using nodes_t = std::vector<node_t>;
     using vectors_t = std::vector<byte_t*>;
@@ -25,6 +67,10 @@ class storage_v2 {
     tape_allocator_at tape_allocator_{};
     /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
     vectors_allocator_at vectors_allocator_{};
+
+    std::uint64_t matrix_rows_ = 0;
+    std::uint64_t matrix_cols_ = 0;
+    bool vectors_loaded_{};
     using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
     static_assert(                                                 //
         sizeof(typename tape_allocator_traits_t::value_type) == 1, //
@@ -141,6 +187,116 @@ class storage_v2 {
             ;
         return {nodes_mutexes_, slot};
     }
+
+#pragma region Storage Serialization and Deserialization
+
+    /**
+     *  @brief Parses the index from file to RAM.
+     *  @param[in] path The path to the file.
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename input_callback_at, typename vectors_metadata_at>
+    serialization_result_t load_vectors_from_stream(input_callback_at& input, //
+                                                    vectors_metadata_at& metadata_buffer,
+                                                    serialization_config_t config = {}) {
+
+        reset();
+
+        // Infer the new index size
+        serialization_result_t result;
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+
+        // We may not want to load the vectors from the same file, or allow attaching them afterwards
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                if (!input(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to read 32-bit dimensions of the matrix");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            } else {
+                std::uint64_t dimensions[2];
+                if (!input(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to read 64-bit dimensions of the matrix");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            }
+            // Load the vectors one after another
+            // most of this logic should move within storage class
+            reserve(matrix_rows);
+            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
+                byte_t* vector = vectors_allocator_.allocate(matrix_cols);
+                if (!input(vector, matrix_cols))
+                    return result.failed("Failed to read vectors");
+                vectors_lookup_[slot] = vector;
+            }
+            vectors_loaded_ = true;
+        }
+        matrix_rows_ = matrix_rows;
+        matrix_cols_ = matrix_cols;
+
+        if (!input(metadata_buffer, sizeof(metadata_buffer)))
+            return result.failed("Failed to read the index vector metadata");
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename input_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
+                                                  progress_at&& progress = {}) noexcept {
+
+        using dynamic_allocator_traits_t = std::allocator_traits<vectors_allocator_at>;
+        serialization_result_t result;
+
+        // Pull basic metadata directly into the return paramter
+        if (!input(&header, sizeof(header)))
+            return result.failed("Failed to pull the header from the stream");
+
+        // We are loading an empty index, no more work to do
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
+        // Allocate some dynamic memory to read all the levels
+        // using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
+        // // todo:: fix the allocator above
+        buffer_gt<level_t> levels(header.size);
+        if (!levels)
+            return result.failed("Out of memory");
+        if (!input(levels, header.size * sizeof(level_t)))
+            return result.failed("Failed to pull nodes levels from the stream");
+
+        if (!reserve(header.size)) {
+            reset();
+            return result.failed("Out of memory");
+        }
+
+        // Load the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = node_malloc(levels[i]);
+            if (!input(node_bytes.data(), node_bytes.size())) {
+                reset();
+                return result.failed("Failed to pull nodes from the stream");
+            }
+            node_store(i, node_t{node_bytes.data()});
+
+            if (!progress(i + 1, header.size))
+                return result.failed("Terminated by user");
+        }
+
+        if (vectors_loaded_ && header.size != static_cast<std::size_t>(matrix_rows_))
+            return result.failed("Index size and the number of vectors doesn't match");
+        return {};
+    }
+
+#pragma endregion
 };
 
 } // namespace usearch

From 854a11c7561ec9909d38cc10e9be03321583c161 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Fri, 5 Jan 2024 23:05:22 +0000
Subject: [PATCH 36/80] Fix dynamic allocation issue in storage

---
 include/usearch/index_dense.hpp |  3 ++-
 include/usearch/storage.hpp     | 34 +++++++++++++++++++++------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 65d1ffe72..2056f35aa 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -316,7 +316,8 @@ class index_dense_gt {
 
   private:
     using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
-    using storage_t = storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t, vectors_tape_allocator_t>;
+    using storage_t =
+        storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 6a594c438..5104e35a6 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -6,9 +6,10 @@
 namespace unum {
 namespace usearch {
 
-template <typename key_at, typename compressed_slot_at,        //
-          typename tape_allocator_at = std::allocator<byte_t>, //
-          typename vectors_allocator_at = tape_allocator_at>   //
+template <typename key_at, typename compressed_slot_at, //
+          typename tape_allocator_at,                   //
+          typename vectors_allocator_at,                //
+          typename dynamic_allocator_at>                //
 class storage_interface {
   public:
     using node_t = node_at<key_at, compressed_slot_at>;
@@ -48,14 +49,24 @@ using index_dense_head_buffer_t = byte_t[64];
 static_assert(sizeof(index_dense_head_buffer_t) == 64, "File header should be exactly 64 bytes");
 using serialization_config_t = index_dense_serialization_config_t;
 
-template <typename key_at, typename compressed_slot_at,        //
-          typename tape_allocator_at = std::allocator<byte_t>, //
-          typename vectors_allocator_at = tape_allocator_at>   //
-class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at> {
+template <typename key_at, typename compressed_slot_at,           //
+          typename tape_allocator_at = std::allocator<byte_t>,    //
+          typename vectors_allocator_at = tape_allocator_at,      //
+          typename dynamic_allocator_at = std::allocator<byte_t>> //
+class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,
+                                            dynamic_allocator_at> {
     using node_t = node_at<key_at, compressed_slot_at>;
+    // todo:: ask-Ashot: why can I not use dynamic_allocator_at in std::vector<node_t, dynamic_allocator_at> ?
+    // Getting the following error:
+    // /usr/include/c++/10/bits/stl_vector.h:285:16: error: no matching function for call to
+    // ‘unum::usearch::aligned_allocator_gt<>::aligned_allocator_gt(const _Tp_alloc_type&)’
+    // 285 |       { return allocator_type(_M_get_Tp_allocator()); }
+
     using nodes_t = std::vector<node_t>;
     using vectors_t = std::vector<byte_t*>;
     using nodes_mutexes_t = bitset_gt<>;
+    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_at>;
+    using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
 
     nodes_t nodes_{};
 
@@ -192,7 +203,9 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
     /**
      *  @brief Parses the index from file to RAM.
-     *  @param[in] path The path to the file.
+     *  @param[in] input Input stream from which vectors will be loaded according to this storage format.
+     *  @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be
+     *  loaded from input stream
      *  @param[in] config Configuration parameters for imports.
      *  @return Outcome descriptor explicitly convertible to boolean.
      */
@@ -251,7 +264,6 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
                                                   progress_at&& progress = {}) noexcept {
 
-        using dynamic_allocator_traits_t = std::allocator_traits<vectors_allocator_at>;
         serialization_result_t result;
 
         // Pull basic metadata directly into the return paramter
@@ -265,9 +277,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         }
 
         // Allocate some dynamic memory to read all the levels
-        // using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
-        // // todo:: fix the allocator above
-        buffer_gt<level_t> levels(header.size);
+        buffer_gt<level_t, levels_allocator_t> levels(header.size);
         if (!levels)
             return result.failed("Out of memory");
         if (!input(levels, header.size * sizeof(level_t)))

From 28c9a047621f55c92b50bc50c1c41a03fcc9de45 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 6 Jan 2024 02:42:25 +0000
Subject: [PATCH 37/80] Clean up Existing diff without adding any new features

---
 include/usearch/index.hpp       | 13 +-------
 include/usearch/index_dense.hpp | 28 +++++------------
 include/usearch/storage.hpp     | 53 +++++++++++++++++++--------------
 3 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index f90292c8e..1caa4284a 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1813,7 +1813,6 @@ class index_gt {
         using pointer = void;
         using reference = ref_t;
 
-        // todo:: take care of these to use external storage
         reference operator*() const noexcept { return {index_->storage_.get_node_at(slot_).key(), slot_}; }
         vector_key_t key() const noexcept { return index_->storage_.get_node_at(slot_).key(); }
 
@@ -2083,17 +2082,6 @@ class index_gt {
      *  Will keep the number of available threads/contexts the same as it was.
      */
     void clear() noexcept {
-        if (!viewed_file_) {
-            std::size_t n = nodes_count_;
-            for (std::size_t i = 0; i != n; ++i) {
-                node_t node = storage_.get_node_at(i);
-                // if (!has_reset<tape_allocator_t>()) {
-                storage_.node_free(i, node);
-                // } else
-                //     tape_allocator_.deallocate(nullptr, 0);
-            }
-        }
-
         storage_.clear();
 
         nodes_count_ = 0;
@@ -2989,6 +2977,7 @@ class index_gt {
                 return result.failed("Terminated by user");
         }
         viewed_file_ = std::move(file);
+        storage_.view_file_ = true;
         return {};
     }
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 2056f35aa..d5794e17e 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -375,14 +375,12 @@ class index_dense_gt {
     /// @brief An instance of a potentially stateful `metric_t` used to initialize copies and forks.
     metric_t metric_;
 
-    /// @brief  C-style array of `node_t` smart-pointers.
-    std::vector<node_t> nodes_;
-    std::mutex vector_mutex_;
-    bitset_t nodes_mutexes_;
-    // storage_t storage_{&nodes_, &nodes_mutexes_, config_};
+    /// @brief The underlying storage provider for this index that determines file storage layout,
+    /// implements serialization/deserialization routines, and provides an API to add, update and
+    /// retrieve vectors and hnsw graph nodes.
     storage_t storage_{config_};
 
-    /// @brief Originally forms and array of integers [0, threads], marking all
+    /// @brief Originally forms and array of integers [0, threads], marking all.
     mutable std::vector<std::size_t> available_threads_;
 
     /// @brief Mutex, controlling concurrent access to `available_threads_`.
@@ -452,9 +450,6 @@ class index_dense_gt {
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
 
-          // vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), //
-          //  vectors_lookup_(std::move(other.vectors_lookup_)),                 //
-
           available_threads_(std::move(other.available_threads_)), //
           slot_lookup_(std::move(other.slot_lookup_)),             //
           free_keys_(std::move(other.free_keys_)),                 //
@@ -480,9 +475,6 @@ class index_dense_gt {
         std::swap(casts_, other.casts_);
         std::swap(metric_, other.metric_);
 
-        // std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_);
-        //  std::swap(vectors_lookup_, other.vectors_lookup_);
-
         std::swap(available_threads_, other.available_threads_);
         std::swap(slot_lookup_, other.slot_lookup_);
         std::swap(free_keys_, other.free_keys_);
@@ -764,13 +756,10 @@ class index_dense_gt {
         unique_lock_t lookup_lock(slot_lookup_mutex_);
 
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
+        // storage_ cleared by typed_ todo:: is this confusing?
         typed_->clear();
         slot_lookup_.clear();
-
-        // should by run by storage_->clear which is run by typed_->clear()
-        // storage_.vectors_lookup_.clear();
         free_keys_.clear();
-        // vectors_tape_allocator_.reset();
     }
 
     /**
@@ -787,10 +776,7 @@ class index_dense_gt {
         std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
         typed_->reset();
         slot_lookup_.clear();
-        // // run by typed_->reset();
-        // vectors_lookup_.clear();
         free_keys_.clear();
-        // vectors_tape_allocator_.reset();
 
         // Reset the thread IDs.
         available_threads_.resize(std::thread::hardware_concurrency());
@@ -894,7 +880,7 @@ class index_dense_gt {
                                             serialization_config_t config = {}, //
                                             progress_at&& progress = {}) {
 
-        // Discard all previous memory allocations of `vectors_tape_allocator_`
+        // Discard all previous memory allocations of
         reset();
 
         // Infer the new index size
@@ -942,7 +928,7 @@ class index_dense_gt {
                                 std::size_t offset = 0, serialization_config_t config = {}, //
                                 progress_at&& progress = {}) {
 
-        // Discard all previous memory allocations of `vectors_tape_allocator_`
+        // Discard all previous memory allocations.
         reset();
 
         serialization_result_t result = file.open_if_not();
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 5104e35a6..53de8544d 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -67,6 +67,10 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     using nodes_mutexes_t = bitset_gt<>;
     using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_at>;
     using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
+    using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
+
+    /// @brief  C-style array of `node_t` smart-pointers.
+    // buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
     nodes_t nodes_{};
 
@@ -97,6 +101,8 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
+    bool view_file_{};
+
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
     // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
     inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
@@ -126,32 +132,37 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         return true;
     }
 
-    /*
-    void clear() noexcept {
-        if (!has_reset<tape_allocator_t>()) {
-            std::size_t n = nodes_count_;
-            for (std::size_t i = 0; i != n; ++i)
-                node_free_(i);
-        } else
-            tape_allocator_.deallocate(nullptr, 0);
-        nodes_count_ = 0;
-        max_level_ = -1;
-        entry_slot_ = 0u;
-    }
-    ****/
     void clear() {
-        if (nodes_.data()) {
-            std::fill(nodes_.begin(), nodes_.end(), node_t{});
-            //   std::fill(vectors_lookup_.begin(), vectors_lookup_.end(), nullptr);
+        if (!view_file_) {
+            if (!has_reset<tape_allocator_at>()) {
+                std::size_t n = nodes_.size();
+                for (std::size_t i = 0; i != n; ++i) {
+                    // we do not know which slots have been filled and which ones - no
+                    // so we iterate over full reserved space
+                    if (nodes_[i])
+                        node_free(i, nodes_[i]);
+                }
+            } else
+                tape_allocator_.deallocate(nullptr, 0);
+
+            if (!has_reset<vectors_allocator_at>()) {
+                std::size_t n = vectors_lookup_.size();
+                for (std::size_t i = 0; i != n; ++i) {
+                    if (vectors_lookup_[i])
+                        vectors_allocator_.deallocate(vectors_lookup_[i], matrix_cols_);
+                }
+            } else
+                tape_allocator_.deallocate(nullptr, 0);
         }
+        std::fill(nodes_.begin(), nodes_.end(), node_t{});
     }
     void reset() {
         nodes_mutexes_ = {};
         nodes_.clear();
         nodes_.shrink_to_fit();
 
-        // vectors_lookup_.clear();
-        // vectors_lookup_.shrink_to_fit();
+        vectors_lookup_.clear();
+        vectors_lookup_.shrink_to_fit();
     }
 
     using span_bytes_t = span_gt<byte_t>;
@@ -162,11 +173,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         return data ? span_bytes_t{data, node_size} : span_bytes_t{};
     }
     void node_free(size_t slot, node_t node) {
-        if (!has_reset<tape_allocator_at>()) {
-            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
-        } else {
-            tape_allocator_.deallocate(nullptr, 0);
-        }
+        tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
         nodes_[slot] = node_t{};
     }
     node_t node_make(key_at key, level_t level) noexcept {

From 098f87574d8bf34e95843fac17583b503e8ab8c1 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 6 Jan 2024 04:32:29 +0000
Subject: [PATCH 38/80] Fix: Make sure index to  stream saving callbacks take
 const arguments

---
 include/usearch/index.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 1caa4284a..c2a05d899 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2820,7 +2820,7 @@ class index_gt {
             return io_result;
 
         serialization_result_t stream_result = save_to_stream(
-            [&](void* buffer, std::size_t length) {
+            [&](const void* buffer, std::size_t length) {
                 io_result = file.write(buffer, length);
                 return !!io_result;
             },
@@ -2844,7 +2844,7 @@ class index_gt {
             return io_result;
 
         serialization_result_t stream_result = save_to_stream(
-            [&](void* buffer, std::size_t length) {
+            [&](const void* buffer, std::size_t length) {
                 if (offset + length > file.size())
                     return false;
                 std::memcpy(file.data() + offset, buffer, length);

From 84734d6acde5b79b6d507d25239165a63a99fe8e Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 6 Jan 2024 04:33:01 +0000
Subject: [PATCH 39/80] More high level cleanup

---
 include/usearch/index.hpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index c2a05d899..de3ec80fd 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1982,11 +1982,6 @@ class index_gt {
     /// @brief  The slot in which the only node of the top-level graph is stored.
     std::size_t entry_slot_{};
 
-    using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
-
-    /// @brief  C-style array of `node_t` smart-pointers.
-    // buffer_gt<node_t, nodes_allocator_t> nodes_{};
-
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
 
     /// @brief  Array of thread-specific buffers for temporary data.

From 75a3cc43c768b9d12cfe9aa4535da5b4431d1d30 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sat, 6 Jan 2024 04:34:31 +0000
Subject: [PATCH 40/80] Move index load to under storage API

---
 include/usearch/index.hpp       | 36 +++----------
 include/usearch/index_dense.hpp | 42 ++-------------
 include/usearch/storage.hpp     | 93 ++++++++++++++++++++++++++++++++-
 3 files changed, 104 insertions(+), 67 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index de3ec80fd..71a1e287e 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2734,35 +2734,8 @@ class index_gt {
         header.connectivity_base = config_.connectivity_base;
         header.max_level = max_level_;
         header.entry_slot = entry_slot_;
-        if (!output(&header, sizeof(header)))
-            return result.failed("Failed to serialize the header into stream");
 
-        // Progress status
-        std::size_t processed = 0;
-        std::size_t const total = 2 * header.size;
-
-        // Export the number of levels per node
-        // That is both enough to estimate the overall memory consumption,
-        // and to be able to estimate the offsets of every entry in the file.
-        for (std::size_t i = 0; i != header.size; ++i) {
-            node_t node = storage_.get_node_at(i);
-            level_t level = node.level();
-            if (!output(&level, sizeof(level)))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
-
-        // After that dump the nodes themselves
-        for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = storage_.get_node_at(i).node_bytes(pre_);
-            if (!output(node_bytes.data(), node_bytes.size()))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
-
-        return {};
+        return storage_.save_nodes_to_stream(output, header, progress);
     }
 
     /**
@@ -2776,7 +2749,12 @@ class index_gt {
 
         // Pull basic metadata
         index_serialized_header_t header;
-        storage_.load_nodes_from_stream(input, header, progress);
+        result = storage_.load_nodes_from_stream(input, header, progress);
+        if (!result) {
+            reset();
+            return result;
+        }
+
         // Submit metadata
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index d5794e17e..11f0e9f42 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -792,41 +792,9 @@ class index_dense_gt {
                                           progress_at&& progress = {}) const {
 
         serialization_result_t result;
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-
-        // We may not want to put the vectors into the same file
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                dimensions[0] = static_cast<std::uint32_t>(typed_->size());
-                dimensions[1] = static_cast<std::uint32_t>(metric_.bytes_per_vector());
-                if (!output(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to serialize into stream");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            } else {
-                std::uint64_t dimensions[2];
-                dimensions[0] = static_cast<std::uint64_t>(typed_->size());
-                dimensions[1] = static_cast<std::uint64_t>(metric_.bytes_per_vector());
-                if (!output(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to serialize into stream");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            }
-
-            // Dump the vectors one after another
-            for (std::uint64_t i = 0; i != matrix_rows; ++i) {
-                const byte_t* vector = storage_.get_vector_at(i);
-                if (!output(vector, matrix_cols))
-                    return result.failed("Failed to serialize into stream");
-            }
-        }
-
-        // Augment metadata
+        index_dense_head_buffer_t buffer;
+        // Prepare opaque header for Storage
         {
-            index_dense_head_buffer_t buffer;
             std::memset(buffer, 0, sizeof(buffer));
             index_dense_head_t head{buffer};
             std::memcpy(buffer, default_magic(), std::strlen(default_magic()));
@@ -847,11 +815,11 @@ class index_dense_gt {
             head.count_deleted = typed_->size() - size();
             head.dimensions = dimensions();
             head.multi = multi();
-
-            if (!output(&buffer, sizeof(buffer)))
-                return result.failed("Failed to serialize into stream");
         }
 
+        // save vectors and metadata to storage
+        storage_.save_vectors_to_stream(output, metric_.bytes_per_vector(), typed_->size(), buffer, config);
+
         // Save the actual proximity graph
         return typed_->save_to_stream(std::forward<output_callback_at>(output), std::forward<progress_at>(progress));
     }
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 53de8544d..d8fcc8665 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -208,6 +208,97 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
 #pragma region Storage Serialization and Deserialization
 
+    /**
+     *  @brief  Saves serialized binary index vectors to a stream.
+     *  @param[in] output Output stream to which vectors will be saved to according to this storage format.
+     *  @param[in] metadata_buffer A buffer opaque to Storage, that will be serialized into output stream
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename output_callback_at, typename vectors_metadata_at>
+    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes,
+                                                  std::uint64_t node_count, //
+                                                  const vectors_metadata_at& metadata_buffer,
+                                                  serialization_config_t config = {}) {
+
+        serialization_result_t result;
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+
+        // We may not want to put the vectors into the same file
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                dimensions[0] = static_cast<std::uint32_t>(node_count);
+                dimensions[1] = static_cast<std::uint32_t>(vector_size_bytes);
+                if (!output(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to serialize into stream");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            } else {
+                std::uint64_t dimensions[2];
+                dimensions[0] = static_cast<std::uint64_t>(node_count);
+                dimensions[1] = static_cast<std::uint64_t>(vector_size_bytes);
+                if (!output(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to serialize into stream");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            }
+
+            // Dump the vectors one after another
+            for (std::uint64_t i = 0; i != matrix_rows; ++i) {
+                const byte_t* vector = get_vector_at(i);
+                if (!output(vector, matrix_cols))
+                    return result.failed("Failed to serialize into stream");
+            }
+        }
+
+        if (!output(&metadata_buffer, sizeof(metadata_buffer)))
+            return result.failed("Failed to read the index vector metadata");
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename output_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
+                                                progress_at& progress = {}) noexcept {
+
+        serialization_result_t result;
+
+        if (!output(&header, sizeof(header)))
+            return result.failed("Failed to serialize the header into stream");
+
+        // Progress status
+        std::size_t processed = 0;
+        std::size_t const total = 2 * header.size;
+
+        // Export the number of levels per node
+        // That is both enough to estimate the overall memory consumption,
+        // and to be able to estimate the offsets of every entry in the file.
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_t node = get_node_at(i);
+            level_t level = node.level();
+            if (!output(&level, sizeof(level)))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
+
+        // After that dump the nodes themselves
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
+            if (!output(node_bytes.data(), node_bytes.size()))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
+        return result;
+    }
+
     /**
      *  @brief Parses the index from file to RAM.
      *  @param[in] input Input stream from which vectors will be loaded according to this storage format.
@@ -269,7 +360,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
      */
     template <typename input_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
-                                                  progress_at&& progress = {}) noexcept {
+                                                  progress_at& progress = {}) noexcept {
 
         serialization_result_t result;
 

From b5d0a5019bcea2def2624ee2ce3ad9e6f58d8118 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 01:12:49 +0000
Subject: [PATCH 41/80] Add view() support from storage

---
 include/usearch/index.hpp       |  48 +++--------
 include/usearch/index_dense.hpp |  55 ++-----------
 include/usearch/storage.hpp     | 142 +++++++++++++++++++++++++++++++-
 3 files changed, 155 insertions(+), 90 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 71a1e287e..c4921aee7 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2893,44 +2893,23 @@ class index_gt {
     serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0,
                                 progress_at&& progress = {}) noexcept {
 
-        // Remove previously stored objects
         reset();
-
-        serialization_result_t result = file.open_if_not();
-        if (!result)
-            return result;
-
-        // Pull basic metadata
+        return view_internal(std::move(file), offset, progress);
+    }
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_internal(memory_mapped_file_t file, std::size_t offset = 0,
+                                         progress_at&& progress = {}) noexcept {
+        // shall not call reset()
+        // storage_ may already have some relevant stuff...
+        serialization_result_t result;
         index_serialized_header_t header;
-        if (file.size() - offset < sizeof(header))
-            return result.failed("File is corrupted and lacks a header");
-        std::memcpy(&header, file.data() + offset, sizeof(header));
-
-        if (!header.size) {
-            reset();
+        result = storage_.view_nodes_from_stream(file, header, offset, progress);
+        if (!result)
             return result;
-        }
-
-        // Precompute offsets of every node, but before that we need to update the configs
-        // This could have been done with `std::exclusive_scan`, but it's only available from C++17.
-        using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
-        buffer_gt<std::size_t, offsets_allocator_t> offsets(header.size);
-        if (!offsets)
-            return result.failed("Out of memory");
 
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
         pre_ = precompute_(config_);
-        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
-        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
-        for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
-
-        std::size_t total_bytes = offsets[header.size - 1] + node_t::node_size_bytes(pre_, levels[header.size - 1]);
-        if (file.size() < total_bytes) {
-            reset();
-            return result.failed("File is corrupted and can't fit all the nodes");
-        }
 
         // Submit metadata and reserve memory
         index_limits_t limits;
@@ -2943,14 +2922,7 @@ class index_gt {
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
 
-        // Rapidly address all the nodes
-        for (std::size_t i = 0; i != header.size; ++i) {
-            storage_.node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
-            if (!progress(i + 1, header.size))
-                return result.failed("Terminated by user");
-        }
         viewed_file_ = std::move(file);
-        storage_.view_file_ = true;
         return {};
     }
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 11f0e9f42..472ef83c3 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -878,6 +878,7 @@ class index_dense_gt {
 
         // Pull the actual proximity graph
         result = typed_->load_from_stream(std::forward<input_callback_at>(input), std::forward<progress_at>(progress));
+
         if (!result)
             return result;
 
@@ -898,49 +899,15 @@ class index_dense_gt {
 
         // Discard all previous memory allocations.
         reset();
-
-        serialization_result_t result = file.open_if_not();
+        serialization_result_t result;
+        index_dense_head_buffer_t buffer;
+        result = storage_.view_vectors_from_stream(file, buffer, offset, config);
         if (!result)
             return result;
-
-        // Infer the new index size
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-        span_punned_t vectors_buffer;
-
-        // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                if (file.size() - offset < sizeof(dimensions))
-                    return result.failed("File is corrupted and lacks matrix dimensions");
-                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-                offset += sizeof(dimensions);
-            } else {
-                std::uint64_t dimensions[2];
-                if (file.size() - offset < sizeof(dimensions))
-                    return result.failed("File is corrupted and lacks matrix dimensions");
-                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-                offset += sizeof(dimensions);
-            }
-            vectors_buffer = {file.data() + offset, static_cast<std::size_t>(matrix_rows * matrix_cols)};
-            offset += vectors_buffer.size();
-        }
-
         // Load metadata and choose the right metric
         {
-            index_dense_head_buffer_t buffer;
-            if (file.size() - offset < sizeof(buffer))
-                return result.failed("File is corrupted and lacks a header");
-
-            std::memcpy(buffer, file.data() + offset, sizeof(buffer));
-
             index_dense_head_t head{buffer};
+
             if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0)
                 return result.failed("Magic header mismatch - the file isn't an index");
 
@@ -956,22 +923,12 @@ class index_dense_gt {
 
             metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar);
             config_.multi = head.multi;
-            offset += sizeof(buffer);
         }
 
         // Pull the actual proximity graph
-        result = typed_->view(std::move(file), offset, std::forward<progress_at>(progress));
+        result = typed_->view_internal(std::move(file), offset, std::forward<progress_at>(progress));
         if (!result)
             return result;
-        if (typed_->size() != static_cast<std::size_t>(matrix_rows))
-            return result.failed("Index size and the number of vectors doesn't match");
-
-        // Address the vectors
-        storage_.reserve(matrix_rows);
-        if (!config.exclude_vectors)
-            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
-                storage_.set_vector_at(slot, vectors_buffer.data() + matrix_cols * slot, matrix_cols, //
-                                       false, false);
 
         reindex_keys_();
         return result;
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index d8fcc8665..19c7c32fd 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -68,6 +68,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_at>;
     using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
     using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
+    using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
 
     /// @brief  C-style array of `node_t` smart-pointers.
     // buffer_gt<node_t, nodes_allocator_t> nodes_{};
@@ -123,7 +124,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     using lock_type = node_lock_t;
 
     bool reserve(std::size_t count) {
-        if (count < nodes_.size())
+        if (count < nodes_.size() && count < nodes_mutexes_.size())
             return true;
         nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
         nodes_mutexes_ = std::move(new_mutexes);
@@ -219,7 +220,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes,
                                                   std::uint64_t node_count, //
                                                   const vectors_metadata_at& metadata_buffer,
-                                                  serialization_config_t config = {}) {
+                                                  serialization_config_t config = {}) const {
 
         serialization_result_t result;
         std::uint64_t matrix_rows = 0;
@@ -265,7 +266,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
      */
     template <typename output_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
-                                                progress_at& progress = {}) noexcept {
+                                                progress_at& progress = {}) const {
 
         serialization_result_t result;
 
@@ -404,6 +405,141 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         return {};
     }
 
+    /**
+     *  @brief Parses the index from file to RAM.
+     *  @param[in] file Memory mapped file from which vectors will be viewed according to this storage format.
+     *  @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be
+     *  loaded from input stream
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename vectors_metadata_at>
+    serialization_result_t view_vectors_from_stream(
+        memory_mapped_file_t& file, //
+                                    //// todo!! document that offset is a reference, or better - do not do it this way
+        vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
+
+        reset();
+
+        serialization_result_t result = file.open_if_not();
+        if (!result)
+            return result;
+
+        // Infer the new index size
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+        span_punned_t vectors_buffer;
+
+        // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                if (file.size() - offset < sizeof(dimensions))
+                    return result.failed("File is corrupted and lacks matrix dimensions");
+                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+                offset += sizeof(dimensions);
+            } else {
+                std::uint64_t dimensions[2];
+                if (file.size() - offset < sizeof(dimensions))
+                    return result.failed("File is corrupted and lacks matrix dimensions");
+                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+                offset += sizeof(dimensions);
+            }
+            vectors_buffer = {file.data() + offset, static_cast<std::size_t>(matrix_rows * matrix_cols)};
+            offset += vectors_buffer.size();
+            vectors_loaded_ = true;
+        }
+        matrix_rows_ = matrix_rows;
+        matrix_cols_ = matrix_cols;
+        // q:: how does this work when vectors are excluded?
+        // Address the vectors
+        reserve(matrix_rows);
+        if (!config.exclude_vectors)
+            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
+                set_vector_at(slot, vectors_buffer.data() + matrix_cols * slot, matrix_cols, //
+                              false, false);
+
+        if (file.size() - offset < sizeof(metadata_buffer))
+            return result.failed("File is corrupted and lacks a header");
+
+        std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
+        offset += sizeof(metadata_buffer);
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_nodes_from_stream(memory_mapped_file_t& file, index_serialized_header_t& header,
+                                                  std::size_t offset = 0, progress_at& progress = {}) noexcept {
+
+        serialization_result_t result = file.open_if_not();
+        if (!result)
+            return result;
+
+        // Pull basic metadata
+        if (file.size() - offset < sizeof(header))
+            return result.failed("File is corrupted and lacks a header");
+        std::memcpy(&header, file.data() + offset, sizeof(header));
+
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
+        // update config_ and pre_ for correct node_t size calculations below
+        index_config_t config;
+        config.connectivity = header.connectivity;
+        config.connectivity_base = header.connectivity_base;
+        pre_ = node_t::precompute_(config);
+
+        buffer_gt<std::size_t, offsets_allocator_t> offsets(header.size);
+
+        if (!offsets)
+            return result.failed("Out of memory");
+
+        // before mapping levels[] from file, let's make sure the file is large enough
+        if (file.size() - offset - sizeof(header) - header.size * sizeof(level_t) < 0)
+            return result.failed("File is corrupted. Unable to parse node levels from file");
+
+        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
+        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
+
+        for (std::size_t i = 1; i < header.size; ++i)
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
+
+        std::size_t total_bytes = offsets[header.size - 1] + node_t::node_size_bytes(pre_, levels[header.size - 1]);
+        if (file.size() < total_bytes) {
+            reset();
+            return result.failed("File is corrupted and can't fit all the nodes");
+        }
+
+        if (!reserve(header.size)) {
+            reset();
+            return result.failed("Out of memory");
+        }
+
+        // Rapidly address all the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
+            if (!progress(i + 1, header.size))
+                return result.failed("Terminated by user");
+        }
+        view_file_ = true;
+
+        if (vectors_loaded_ && header.size != static_cast<std::size_t>(matrix_rows_))
+            return result.failed("Index size and the number of vectors doesn't match");
+
+        return {};
+    }
+
 #pragma endregion
 };
 

From e36bd1cc2e4ac7fd4ba3b8bc85d89d119a4eb656 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 01:51:42 +0000
Subject: [PATCH 42/80] Get rid of the duplicate precompute_ and use the one
 from node_t everywhere

---
 include/usearch/index.hpp | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index c4921aee7..c940d0a58 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2004,7 +2004,7 @@ class index_gt {
         storage_t& storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
-          pre_(precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
+          pre_(node_t::precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
 
     /**
      *  @brief  Clones the structure with the same hyper-parameters, but without contents.
@@ -2758,7 +2758,7 @@ class index_gt {
         // Submit metadata
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
-        pre_ = precompute_(config_);
+        pre_ = node_t::precompute_(config_);
         nodes_count_ = header.size;
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
@@ -2909,7 +2909,7 @@ class index_gt {
 
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
-        pre_ = precompute_(config_);
+        pre_ = node_t::precompute_(config_);
 
         // Submit metadata and reserve memory
         index_limits_t limits;
@@ -3017,15 +3017,6 @@ class index_gt {
     }
 
   private:
-    // todo:: only needed in storage
-    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
-        precomputed_constants_t pre;
-        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
-        pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_t) + sizeof(neighbors_count_t);
-        pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_t) + sizeof(neighbors_count_t);
-        return pre;
-    }
-
     // todo:: these can also be moved to node_at, along with class neighbors_ref_t definition
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 

From 754d9b77474b19d7e919723010f792dbb320c55e Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 01:52:48 +0000
Subject: [PATCH 43/80] Get rid of global node_head_bytes and use the
 equivalent node_t::head_size_bytes() everywhere

---
 include/usearch/index.hpp | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index c940d0a58..5b3dae5f8 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1628,10 +1628,6 @@ struct precomputed_constants_t {
 template <typename key_at, typename slot_at> class node_at {
     byte_t* tape_{};
 
-    /**
-     *  @brief  How many bytes of memory are needed to form the "head" of the node.
-     */
-    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
     inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_at node) const noexcept {
         return node_neighbors_bytes_(pre, node.level());
     }
@@ -1651,17 +1647,21 @@ template <typename key_at, typename slot_at> class node_at {
     using span_bytes_t = span_gt<byte_t>;
     explicit node_at(byte_t* tape) noexcept : tape_(tape) {}
     byte_t* tape() const noexcept { return tape_; }
-    byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
+    /**
+     *  @brief  How many bytes of memory are needed to form the "head" of the node.
+     */
+    static constexpr std::size_t head_size_bytes() { return sizeof(vector_key_t) + sizeof(level_t); }
+    byte_t* neighbors_tape() const noexcept { return tape_ + head_size_bytes(); }
     explicit operator bool() const noexcept { return tape_; }
 
     inline span_bytes_t node_bytes(const precomputed_constants_t& pre) const noexcept {
         return {tape(), node_size_bytes(pre, level())};
     }
     inline std::size_t node_size_bytes(const precomputed_constants_t& pre) noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(pre, level());
+        return head_size_bytes() + node_neighbors_bytes_(pre, level());
     }
     static inline std::size_t node_size_bytes(const precomputed_constants_t& pre, level_t level) noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(pre, level);
+        return head_size_bytes() + node_neighbors_bytes_(pre, level);
     }
 
     inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
@@ -1867,12 +1867,6 @@ class index_gt {
      */
     using neighbors_count_t = std::uint32_t;
 
-    // todo:: move near the rest of these functions
-    /**
-     *  @brief  How many bytes of memory are needed to form the "head" of the node.
-     */
-    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
-
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
     /// @brief A space-efficient internal data-structure used in graph traversal queues.
@@ -2639,7 +2633,7 @@ class index_gt {
 
             ++result.nodes;
             result.edges += neighbors_(node, level).size();
-            result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
+            result.allocated_bytes += node_t::head_size_bytes() + neighbors_bytes;
         }
 
         std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
@@ -2649,7 +2643,7 @@ class index_gt {
 
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept {
 
-        std::size_t head_bytes = node_head_bytes_();
+        std::size_t head_bytes = node_t::head_size_bytes();
         for (std::size_t i = 0; i != size(); ++i) {
             node_t node = storage_.get_node_at(i);
 

From 02ecba431ad216909b8b7dab81bd471f97645f41 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 02:15:20 +0000
Subject: [PATCH 44/80] Remove unnecessary code changes (comments and
 formatting)

---
 include/usearch/index.hpp | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5b3dae5f8..133aaac0a 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2133,15 +2133,10 @@ class index_gt {
             return true;
 
         bool storage_reserved = storage_.reserve(limits.members);
-        // buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
         if (!new_contexts || !storage_reserved)
             return false;
 
-        // Move the nodes info, and deallocate previous buffers.
-        // if (nodes_)
-        //     std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
-
         limits_ = limits;
         nodes_capacity_ = limits.members;
         contexts_ = std::move(new_contexts);
@@ -2239,7 +2234,6 @@ class index_gt {
         inline match_t at(std::size_t i) const noexcept {
             candidate_t const* top_ordered = top_->data();
             candidate_t candidate = top_ordered[i];
-            // node_t node = nodes_[candidate.slot];
             node_t node = storage_->get_node_at(candidate.slot);
             return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance};
         }
@@ -2317,11 +2311,10 @@ class index_gt {
         typename callback_at = dummy_callback_t, //
         typename prefetch_at = dummy_prefetch_t  //
         >
-    add_result_t add(                           //
-        vector_key_t key, value_at&& value,     //
-        metric_at&& metric,                     //
-        index_update_config_t config = {},      //
-        callback_at&& callback = callback_at{}, //
+    add_result_t add(                                           //
+        vector_key_t key, value_at&& value, metric_at&& metric, //
+        index_update_config_t config = {},                      //
+        callback_at&& callback = callback_at{},                 //
         prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m {
 
         add_result_t result;
@@ -2360,7 +2353,6 @@ class index_gt {
 
         // Allocate the neighbors
         node_t node = storage_.node_make(key, target_level);
-        storage_.node_store(new_slot, node);
         if (!node) {
             nodes_count_.fetch_sub(1);
             return result.failed("Out of memory!");
@@ -2368,6 +2360,7 @@ class index_gt {
         if (target_level <= max_level_copy)
             new_level_lock.unlock();
 
+        storage_.node_store(new_slot, node);
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
@@ -2663,8 +2656,8 @@ class index_gt {
         for (std::size_t l = 1; l <= max_level; ++l)
             stats_per_level[l].max_edges = stats_per_level[l].nodes * config_.connectivity;
 
-        stats_t result{};
         // Aggregate stats across levels
+        stats_t result{};
         for (std::size_t l = 0; l <= max_level; ++l)
             result.nodes += stats_per_level[l].nodes,                         //
                 result.edges += stats_per_level[l].edges,                     //
@@ -2707,7 +2700,6 @@ class index_gt {
      */
     std::size_t serialized_length() const noexcept {
         std::size_t neighbors_length = 0;
-
         for (std::size_t i = 0; i != size(); ++i)
             neighbors_length += node_t::node_size_bytes(pre_, storage_.get_node_at(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
@@ -2887,6 +2879,7 @@ class index_gt {
     serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0,
                                 progress_at&& progress = {}) noexcept {
 
+        // Remove previously stored objects
         reset();
         return view_internal(std::move(file), offset, progress);
     }

From f26cde97d261589c17ea158379d4c545e7e7fa39 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 02:18:00 +0000
Subject: [PATCH 45/80] Remove compaction API for initial storage PR

---
 cpp/test.cpp              |  4 ++--
 include/usearch/index.hpp | 35 -----------------------------------
 2 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 55df8a24a..59c1a457a 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -140,8 +140,8 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
         index.get(key_second, vec_recovered_from_view.data());
         expect(std::equal(vector_second, vector_second + dimensions, vec_recovered_from_view.data()));
 
-        auto compaction_result = index.compact();
-        expect(bool(compaction_result));
+        // auto compaction_result = index.compact();
+        // expect(bool(compaction_result));
     }
 
     expect(index.memory_usage() > 0);
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 133aaac0a..0db7a42fe 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2915,41 +2915,6 @@ class index_gt {
 
 #pragma endregion
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-    /**
-     *  @brief  Performs compaction on the whole HNSW index, purging some entries
-     *          and links to them, while also generating a more efficient mapping,
-     *          putting the more frequently used entries closer together.
-     *
-     *
-     * Scans the whole collection, removing the links leading towards
-     *          banned entries. This essentially isolates some nodes from the rest
-     *          of the graph, while keeping their outgoing links, in case the node
-     *          is structurally relevant and has a crucial role in the index.
-     *          It won't reclaim the memory.
-     *
-     *  @param[in] allow_member Predicate to mark nodes for isolation.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     */
-    template <typename values_at, typename metric_at,                   //
-              typename slot_transition_at = dummy_key_to_key_mapping_t, //
-              typename executor_at = dummy_executor_t,                  //
-              typename progress_at = dummy_progress_t,                  //
-              typename prefetch_at = dummy_prefetch_t>
-    void compact(                             //
-        values_at&& values,                   //
-        metric_at&& metric,                   //
-        slot_transition_at&& slot_transition, //
-
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}, //
-        prefetch_at&& prefetch = prefetch_at{}) noexcept {
-        return;
-    }
-#pragma GCC diagnostic pop
-
     /**
      *  @brief  Scans the whole collection, removing the links leading towards
      *          banned entries. This essentially isolates some nodes from the rest

From 846355ca2951ecee71b25227cfc4228c0ca2be4e Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 02:30:56 +0000
Subject: [PATCH 46/80] Remove more useless changes

---
 include/usearch/index.hpp | 74 ++++-----------------------------------
 1 file changed, 6 insertions(+), 68 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 0db7a42fe..8549bc8a7 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2915,59 +2915,6 @@ class index_gt {
 
 #pragma endregion
 
-    /**
-     *  @brief  Scans the whole collection, removing the links leading towards
-     *          banned entries. This essentially isolates some nodes from the rest
-     *          of the graph, while keeping their outgoing links, in case the node
-     *          is structurally relevant and has a crucial role in the index.
-     *          It won't reclaim the memory.
-     *
-     *  @param[in] allow_member Predicate to mark nodes for isolation.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     */
-    template <                                        //
-        typename allow_member_at = dummy_predicate_t, //
-        typename executor_at = dummy_executor_t,      //
-        typename progress_at = dummy_progress_t       //
-        >
-    void isolate(                               //
-        allow_member_at&& allow_member,         //
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}) noexcept {
-
-        // Progress status
-        std::atomic<bool> do_tasks{true};
-        std::atomic<std::size_t> processed{0};
-        assert(false);
-
-        /*
-            // Erase all the incoming links
-            std::size_t nodes_count = size();
-            executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
-                node_t node = get_node_at(node_idx);
-                for (level_t level = 0; level <= node.level(); ++level) {
-                    neighbors_ref_t neighbors = neighbors_(node, level);
-                    std::size_t old_size = neighbors.size();
-                    neighbors.clear();
-                    for (std::size_t i = 0; i != old_size; ++i) {
-                        compressed_slot_t neighbor_slot = neighbors[i];
-                        node_t neighbor = get_node_at(neighbor_slot);
-                        if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
-                            neighbors.push_back(neighbor_slot);
-                    }
-                }
-                ++processed;
-                if (thread_idx == 0)
-                    do_tasks = progress(processed.load(), nodes_count);
-                return do_tasks.load();
-            });
-
-            // At the end report the latest numbers, because the reporter thread may be finished earlier
-            progress(processed.load(), nodes_count);
-            */
-    }
-
   private:
     // todo:: these can also be moved to node_at, along with class neighbors_ref_t definition
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
@@ -3002,8 +2949,7 @@ class index_gt {
 
     template <typename metric_at>
     std::size_t connect_new_node_( //
-        metric_at&& metric,        //
-        std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
+        metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
         node_t new_node = storage_.get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
@@ -3026,8 +2972,8 @@ class index_gt {
 
     template <typename value_at, typename metric_at>
     void reconnect_neighbor_nodes_( //
-        metric_at&& metric,         //
-        std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m {
+        metric_at&& metric, std::size_t new_slot, value_at&& value, level_t level,
+        context_t& context) usearch_noexcept_m {
 
         node_t new_node = storage_.get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
@@ -3038,10 +2984,6 @@ class index_gt {
         for (compressed_slot_t close_slot : new_neighbors) {
             if (close_slot == new_slot)
                 continue;
-            // todo:: q:: I do not know all the idiosyncrasies of 'auto'. Is this a proper usage of this?
-            // I chose auto here to allow storage define its own lock smart pointer, without making assumptions
-            // about it here. BUt are there cases where, e.g. auto will pick up the lock in the wrong way and instantly
-            // drop it for example?
             node_lock_t close_lock = storage_.node_lock(close_slot);
             node_t close_node = storage_.get_node_at(close_slot);
 
@@ -3125,7 +3067,7 @@ class index_gt {
         bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; }
         bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; }
 
-        // vector_key_t key() const noexcept { return index_->get_node_at(slot()).key(); }
+        vector_key_t key() const noexcept { return index_->get_node_at(slot()).key(); }
         compressed_slot_t slot() const noexcept { return neighbors_[current_]; }
         friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); }
         friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); }
@@ -3143,9 +3085,8 @@ class index_gt {
     };
 
     template <typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
-    std::size_t search_for_one_(                    //
-        value_at&& query,                           //
-        metric_at&& metric, prefetch_at&& prefetch, //
+    std::size_t search_for_one_(                                      //
+        value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept {
 
         visits_hash_set_t& visits = context.visits;
@@ -3163,9 +3104,6 @@ class index_gt {
                 node_lock_t closest_lock = storage_.node_lock(closest_slot);
                 neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.get_node_at(closest_slot), level);
 
-                using vvv = typename std::decay<decltype(*this)>::type::vector_key_t;
-                static_assert(std::is_same<vvv, vector_key_t>::value, "this cannot happen");
-
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
                     candidates_range_t missing_candidates{*this, closest_neighbors, visits};

From c42c2f7fa65aa72d87a668ead0dceeb4a3a207b9 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 02:34:00 +0000
Subject: [PATCH 47/80] Remove unused header

---
 include/usearch/index.hpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 8549bc8a7..fe7d3e647 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -77,10 +77,9 @@
 #include <atomic>    // `std::atomic`
 #include <bitset>    // `std::bitset`
 #include <cassert>
-#include <climits> // `CHAR_BIT`
-#include <cmath>   // `std::sqrt`
-#include <cstring> // `std::memset`
-#include <iostream>
+#include <climits>   // `CHAR_BIT`
+#include <cmath>     // `std::sqrt`
+#include <cstring>   // `std::memset`
 #include <iterator>  // `std::reverse_iterator`
 #include <mutex>     // `std::unique_lock` - replacement candidate
 #include <random>    // `std::default_random_engine` - replacement candidate

From f969d4ea0f00e132852511076e4c6626df1bbba9 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 02:50:46 +0000
Subject: [PATCH 48/80] Remove useless changes

---
 include/usearch/index.hpp       |  1 -
 include/usearch/index_dense.hpp | 20 +++++++-------------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index fe7d3e647..059402359 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2123,7 +2123,6 @@ class index_gt {
      *  @brief  Increases the `capacity()` of the index to allow adding more vectors.
      *  @return `true` on success, `false` on memory allocation errors.
      */
-    // todo:: reserve is not thread safe if another thread is running search or insert
     bool reserve(index_limits_t limits) usearch_noexcept_m {
 
         if (limits.threads_add <= limits_.threads_add          //
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 472ef83c3..32d4a98fc 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1,12 +1,10 @@
 #pragma once
-#include <cstring>
 #include <stdlib.h> // `aligned_alloc`
 
 #include <functional> // `std::function`
-#include <iostream>
-#include <numeric> // `std::iota`
-#include <thread>  // `std::thread`
-#include <vector>  // `std::vector`
+#include <numeric>    // `std::iota`
+#include <thread>     // `std::thread`
+#include <vector>     // `std::vector`
 
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
@@ -441,11 +439,7 @@ class index_dense_gt {
     index_dense_gt(index_dense_gt&& other)
         : config_(std::move(other.config_)),
 
-          // todo:: ask-Ashot: is the following change ok? why is it needed
-          // for some reason exchange stopped working after I added allocator to strage
-          // it was complaining about some ambiguity
-          // typed_(exchange(other.typed_, nullptr)),     //
-          typed_(std::move(other.typed_)),             //
+          typed_(exchange(other.typed_, nullptr)),     //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
@@ -736,7 +730,7 @@ class index_dense_gt {
      *  @return `true` if the memory reservation was successful, `false` otherwise.
      */
     bool reserve(index_limits_t limits) {
-        // this seems to allow search() and add() on the dense index, concurrent to this reserve
+        // todo:: ask-Ashot this seems to allow search() and add() on the dense index, concurrent to this reserve
         // But that is not safe on typed_ as typed_->reserve() reallocates the lock buffer, discarding the old one
         // without checking if anything is locked
         {
@@ -774,6 +768,7 @@ class index_dense_gt {
 
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
         std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
+        // storage is reset by typed_
         typed_->reset();
         slot_lookup_.clear();
         free_keys_.clear();
@@ -878,7 +873,6 @@ class index_dense_gt {
 
         // Pull the actual proximity graph
         result = typed_->load_from_stream(std::forward<input_callback_at>(input), std::forward<progress_at>(progress));
-
         if (!result)
             return result;
 
@@ -900,6 +894,7 @@ class index_dense_gt {
         // Discard all previous memory allocations.
         reset();
         serialization_result_t result;
+        // Note that buffer and offset are passed by reference
         index_dense_head_buffer_t buffer;
         result = storage_.view_vectors_from_stream(file, buffer, offset, config);
         if (!result)
@@ -907,7 +902,6 @@ class index_dense_gt {
         // Load metadata and choose the right metric
         {
             index_dense_head_t head{buffer};
-
             if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0)
                 return result.failed("Magic header mismatch - the file isn't an index");
 

From 4af55fd0511ad2e70d49550918506d2c2098c208 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 7 Jan 2024 07:18:31 +0000
Subject: [PATCH 49/80] Attempt adding storage provider typechecking

---
 include/usearch/storage.hpp | 54 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 19c7c32fd..6f27a1864 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -6,6 +6,48 @@
 namespace unum {
 namespace usearch {
 
+// taken from has_reset_gt
+// but added a C macro to make it generic for other function names
+// Can I do this in C++?
+#define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                         \
+    template <typename, typename at> struct has_##NAME_AK##_gt {                                                       \
+        static_assert(std::integral_constant<at, false>::value,                                                        \
+                      "Second template parameter needs to be of function type.");                                      \
+    };                                                                                                                 \
+                                                                                                                       \
+    template <typename check_at, typename return_at, typename... args_at>                                              \
+    struct has_##NAME_AK##_gt<check_at, return_at(args_at...)> {                                                       \
+      private:                                                                                                         \
+        template <typename at>                                                                                         \
+        static constexpr auto check(at*) ->                                                                            \
+            typename std::is_same<decltype(std::declval<at>().NAME_AK(std::declval<args_at>()...)), return_at>::type;  \
+        template <typename> static constexpr std::false_type check(...);                                               \
+                                                                                                                       \
+        typedef decltype(check<check_at>(0)) type;                                                                     \
+                                                                                                                       \
+      public:                                                                                                          \
+        static constexpr bool value = type::value;                                                                     \
+    };
+
+// note:: adding CHECK_AT based namespace so if the template can be used for multiple types
+#define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                           \
+    namespace CHECK_AT##__##NAME_AK {                                                                                  \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                         \
+    }                                                                                                                  \
+    static_assert(CHECK_AT##__##NAME_AK::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value, " nope")
+
+#define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
+
+// todo:: enforce const-ness
+#define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                                    \
+    ASSERT_HAS_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                                     \
+    ASSERT_HAS_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                            \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                      \
+    ASSERT_HAS_FUNCTION(                                                                                               \
+        CHECK_AT, set_at,                                                                                              \
+        void(std::size_t idx, CHECK_AT::node_t node, byte_t * vector_data, std::size_t vector_size, bool reuse_node));
+
 template <typename key_at, typename compressed_slot_at, //
           typename tape_allocator_at,                   //
           typename vectors_allocator_at,                //
@@ -55,8 +97,11 @@ template <typename key_at, typename compressed_slot_at,           //
           typename dynamic_allocator_at = std::allocator<byte_t>> //
 class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,
                                             dynamic_allocator_at> {
-    using node_t = node_at<key_at, compressed_slot_at>;
     // todo:: ask-Ashot: why can I not use dynamic_allocator_at in std::vector<node_t, dynamic_allocator_at> ?
+  public:
+    using node_t = node_at<key_at, compressed_slot_at>;
+
+  private:
     // Getting the following error:
     // /usr/include/c++/10/bits/stl_vector.h:285:16: error: no matching function for call to
     // ‘unum::usearch::aligned_allocator_gt<>::aligned_allocator_gt(const _Tp_alloc_type&)’
@@ -121,6 +166,8 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
 
+    HAS_FUNCTION_TEMPLATE(storage_v2, get_node_at, node_t(std::size_t))
+    static constexpr bool typecheck() { return HAS_FUNCTION(storage_v2, get_node_at, node_t(std::size_t)); };
     using lock_type = node_lock_t;
 
     bool reserve(std::size_t count) {
@@ -543,5 +590,10 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 #pragma endregion
 };
 
+using dummy_storage = storage_v2<default_key_t, default_slot_t>;
+
+static_assert(dummy_storage::typecheck());
+ASSERT_VALID_STORAGE(dummy_storage);
+
 } // namespace usearch
 } // namespace unum

From 14480966fe8ef7dbb446211c459ab193a47bff22 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 02:38:50 +0000
Subject: [PATCH 50/80] Add const-ness and noexcept enforcement to HAS_FUNCTION
 macro

---
 include/usearch/storage.hpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 6f27a1864..db3f4475e 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -9,7 +9,13 @@ namespace usearch {
 // taken from has_reset_gt
 // but added a C macro to make it generic for other function names
 // Can I do this in C++?
-#define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                         \
+// Changes from the above:
+// 1. Replace declval<at> with declval<OPTIONAL_CONST at&> to enforce function const-ness
+//  method: https://stackoverflow.com/questions/30407754/how-to-test-if-a-method-is-const
+// 2. Replace .reset with dynamic NAME_AK to support methods with other names
+// 3. Add option to enforce noexcept
+//  method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method
+#define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
     template <typename, typename at> struct has_##NAME_AK##_gt {                                                       \
         static_assert(std::integral_constant<at, false>::value,                                                        \
                       "Second template parameter needs to be of function type.");                                      \
@@ -20,13 +26,18 @@ namespace usearch {
       private:                                                                                                         \
         template <typename at>                                                                                         \
         static constexpr auto check(at*) ->                                                                            \
-            typename std::is_same<decltype(std::declval<at>().NAME_AK(std::declval<args_at>()...)), return_at>::type;  \
+            typename std::is_same<decltype(std::declval<CONST_AK at&>().NAME_AK(std::declval<args_at>()...)),          \
+                                  return_at>::type;                                                                    \
         template <typename> static constexpr std::false_type check(...);                                               \
                                                                                                                        \
+        template <typename at> static constexpr bool f_is_noexcept(at*) {                                              \
+            return noexcept(std::declval<CONST_AK at&>().NAME_AK(std::declval<args_at>()...));                         \
+        }                                                                                                              \
+                                                                                                                       \
         typedef decltype(check<check_at>(0)) type;                                                                     \
                                                                                                                        \
-      public:                                                                                                          \
-        static constexpr bool value = type::value;                                                                     \
+      public: /*                                    if NOEXCEPT_AK then f_is_noexcept(0)    */                         \
+        static constexpr bool value = type::value && (!NOEXCEPT_AK || f_is_noexcept<check_at>(0));                     \
     };
 
 // note:: adding CHECK_AT based namespace so if the template can be used for multiple types

From b13269e68a8b51927a98c1c3efbac107f47c5c05 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 02:44:20 +0000
Subject: [PATCH 51/80] Add helper macros for various signature assertions

---
 include/usearch/storage.hpp | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index db3f4475e..9e87f08c2 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -41,11 +41,38 @@ namespace usearch {
     };
 
 // note:: adding CHECK_AT based namespace so if the template can be used for multiple types
-#define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                           \
+#define ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                 \
+    /************ check function signature without const or noexcept*/                                                 \
     namespace CHECK_AT##__##NAME_AK {                                                                                  \
-        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                         \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, , false)                                                \
+    }                                                                                                                  \
+    static_assert(CHECK_AT##__##NAME_AK::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,                            \
+                  " Function \"" #CHECK_AT "::" #NAME_AK                                                               \
+                  "\" does not exist or does not satisfy storage API signature");                                      \
+    /************ check function signature with const requirement but without noexcept*/                               \
+    namespace CHECK_AT##__##NAME_AK##_const {                                                                          \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, false)                                        \
+    }                                                                                                                  \
+    static_assert(CHECK_AT##__##NAME_AK##_const::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,                    \
+                  " Function \"" #CHECK_AT "::" #NAME_AK                                                               \
+                  "\" exists but does not satisfy const-requirement of storage API");                                  \
+    /************ check function signature with const and noexcept requirements */                                     \
+    namespace CHECK_AT##__##NAME_AK##_const_noexcept {                                                                 \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
     }                                                                                                                  \
-    static_assert(CHECK_AT##__##NAME_AK::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value, " nope")
+    static_assert(                                                                                                     \
+        !NOEXCEPT_AK || CHECK_AT##__##NAME_AK##_const_noexcept::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,     \
+        " Function \"" #CHECK_AT "::" #NAME_AK "\" exists but does not satisfy noexcept requirement of storage API")
+
+/* NOCONST in comments indicates intentional lack of const qualifier*/
+#define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                           \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, false)
+#define ASSERT_HAS_CONST_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                     \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, false)
+#define ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                  \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, true)
+#define ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                            \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, true)
 
 #define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
 

From 7717c29f97d4a0c599165cec543928a0ffe13a08 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 02:45:08 +0000
Subject: [PATCH 52/80] Add more functions for storage API enforcement

---
 include/usearch/storage.hpp | 48 ++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 9e87f08c2..29059e200 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -76,15 +76,23 @@ namespace usearch {
 
 #define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
 
-// todo:: enforce const-ness
+// N.B: the validation does notenforce reference argument types properly
+// Validation succeeds even when in the sertions below an interface is required to take a reference type
+// but the actual implementation takes a copy
 #define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
-    ASSERT_HAS_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                                    \
-    ASSERT_HAS_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                                     \
-    ASSERT_HAS_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                            \
-    ASSERT_HAS_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                      \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                               \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                      \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, size, std::size_t());                                                 \
+                                                                                                                       \
+    ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
+    ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
+    ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void());                                                             \
     ASSERT_HAS_FUNCTION(                                                                                               \
         CHECK_AT, set_at,                                                                                              \
-        void(std::size_t idx, CHECK_AT::node_t node, byte_t * vector_data, std::size_t vector_size, bool reuse_node));
+        void(std::size_t idx, CHECK_AT::node_t node, byte_t * vector_data, std::size_t vector_size, bool reuse_node)); \
+    static_assert(true, "this is to require a semicolon at the end of macro call")
 
 template <typename key_at, typename compressed_slot_at, //
           typename tape_allocator_at,                   //
@@ -97,7 +105,12 @@ class storage_interface {
 
     struct lock_type;
 
-    // q:: can I enforce this interface function in inherited storages somehow?
+    // q:: ask-Ashot can I enforce this interface function in inherited storages somehow?
+    // currently impossible because
+    // 1. can do virtual constexpr after c++2a
+    // 2. making this virtual would enforce this particular lock_type struct as the return type,
+    //  and not an equivalently named one in the child class
+    // I currently enforice it via macros
     constexpr inline lock_type node_lock(std::size_t slot) const noexcept;
 
     virtual inline node_t get_node_at(std::size_t idx) const noexcept = 0;
@@ -106,17 +119,20 @@ class storage_interface {
 
     inline void set_at(std::size_t idx, node_t node, byte_t* vector_data, std::size_t vector_size, bool reuse_node);
 
+    // the following functions take template arguments so cannot be type-enforced via virtual function inheritence
     // virtual void load_vectors_from_stream() = 0;
     // virtual void load_nodes_from_stream() = 0;
 
-    void store_vectors_to_stream();
-    void store_nodes_to_stream();
+    // serialization_result_t save_vectors_to_stream() const;
+    // serialization_result_t save_nodes_to_stream() const;
 
-    std::size_t size();
-    bool reserve(std::size_t count);
-    void clear();
-    void reset();
+    // serialization_result_t view_vectors_from_file() const;
+    // serialization_result_t view_nodes_from_file() const;
 
+    virtual std::size_t size() const noexcept = 0;
+    virtual bool reserve(std::size_t count) = 0;
+    virtual void clear() noexcept = 0;
+    virtual void reset() noexcept = 0;
     std::size_t memory_usage();
 };
 
@@ -218,7 +234,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         return true;
     }
 
-    void clear() {
+    void clear() noexcept {
         if (!view_file_) {
             if (!has_reset<tape_allocator_at>()) {
                 std::size_t n = nodes_.size();
@@ -242,7 +258,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
         }
         std::fill(nodes_.begin(), nodes_.end(), node_t{});
     }
-    void reset() {
+    void reset() noexcept {
         nodes_mutexes_ = {};
         nodes_.clear();
         nodes_.shrink_to_fit();
@@ -283,7 +299,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     // }
 
     void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
-    inline size_t size() { return nodes_.size(); }
+    inline size_t size() const noexcept { return nodes_.size(); }
     tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
     // dummy lock just to satisfy the interface
     constexpr inline lock_type node_lock(std::size_t slot) const noexcept {

From 64996a7faa3bcbacad8cd5217b0fb30f65e9adf5 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 02:46:21 +0000
Subject: [PATCH 53/80] Get rid of the old approach for API enforcement

---
 include/usearch/storage.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 29059e200..8299b1a29 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -220,8 +220,6 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
 
-    HAS_FUNCTION_TEMPLATE(storage_v2, get_node_at, node_t(std::size_t))
-    static constexpr bool typecheck() { return HAS_FUNCTION(storage_v2, get_node_at, node_t(std::size_t)); };
     using lock_type = node_lock_t;
 
     bool reserve(std::size_t count) {

From 36f71095d4e556da7a066451eec5060ae8ca4066 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 03:17:45 +0000
Subject: [PATCH 54/80] remove last remnant of class typechecking with the old
 approach

---
 include/usearch/storage.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 8299b1a29..33c0fa5a2 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -644,7 +644,6 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
 using dummy_storage = storage_v2<default_key_t, default_slot_t>;
 
-static_assert(dummy_storage::typecheck());
 ASSERT_VALID_STORAGE(dummy_storage);
 
 } // namespace usearch

From 21f6b76b7b9f2d3221e858ecc51d47800b8914e8 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 03:29:01 +0000
Subject: [PATCH 55/80] Add Storage type enforcement API comments

---
 include/usearch/storage.hpp | 85 ++++++++++++++++++++++++++++++++++---
 1 file changed, 80 insertions(+), 5 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 33c0fa5a2..98ede9abb 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -15,6 +15,65 @@ namespace usearch {
 // 2. Replace .reset with dynamic NAME_AK to support methods with other names
 // 3. Add option to enforce noexcept
 //  method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method
+
+/**
+ *  @brief  This macro, `HAS_FUNCTION_TEMPLATE`, is a utility to heck at
+ * compile-time whether a given type (CHECK_AT) has a member function with a specific name (NAME_AK), signature
+ * (SIGNATURE_AT=return_at(args_at...)), constness (CONST_AK=const|[empty]), and exception specification
+ * (NOEXCEPT_AK=true|false).
+ *
+ *  @param[in] CHECK_AT Placeholder type used within the template instantiation to denote the type to be checked.
+ *  @param[in] NAME_AK Name of the member function to be checked for. This name is incorporated in the generated
+ * structure's name and used in the check.
+ *  @param[in] SIGNATURE_AT Placeholder for the function signature, employed in specializing the template for function
+ * types.
+ *  @param[in] CONST_AK Indicates if the member function should be a const function. This forms part of the function
+ * call signature within the check.
+ *  @param[in] NOEXCEPT_AK Indicates if the member function should be noexcept. This affects the check, particularly
+ * important for ensuring exception safety in certain contexts.
+ *
+ *  generates a structure structure named `has_##NAME_AK##_gt` with a static constexpr boolean member `value`. This
+ * member is true if the specified type has a member function that matches the name, signature, constness, and noexcept
+ * status provided in the macro's arguments. Otherwise, it is false.
+ *
+ *  @example
+ *  Suppose you have a class `Foo` with that has an interface requirement of a const noexcept member function `bar` that
+ * returns an `int` and takes a `const double`. To enforce the interface requirement, if this function exists, is const,
+ * and noexcept, you would instantiate the generated template like so:
+ *  ```cpp
+ * struct Foo {
+ *    // CHECK CATCHES: expected double, got double*
+ *    // int bar(const double*) const noexcept { return 42; }
+ *    // CHECK CATCHES: wrong const-ness
+ *    // int bar(const double) noexcept { return 42; }
+ *    // CHECK CATCHES: wrong excempt-ness
+ *    // int bar(const double) const { return 42; }
+ *    // CHECK CATCHES because required int can be cast to double
+ *    // double bar(const double) const noexcept { return 42; }
+ *    // CHECK CATHCES wrong returned value
+ *    // int* bar(const double) const noexcept { return nullptr; }
+ *    // CHECK CATHCES wrong signature
+ *    // int bar(const double, int) const { return 42; }
+ *    //
+ *    // SUCCESS! the invariant  we wanted
+ *
+ *    int bar(const double) const noexcept { return 42; }
+ *
+ *    //
+ *    // Some PROBLEMS
+ *    // CHECK **DOES NOT** CATCH. assertion succeeds
+ *    // int bar(const double&) const noexcept { return 42; }
+ *    // CHECK **DOES NOT** CATCH. assertion succeeds
+ *    // int bar(const double&&) const noexcept { return 42; }
+ * };
+ *
+ * HAS_FUNCTION_TEMPLATE(Foo, bar, int(const double), const, true);
+ * static_assert(has_bar_gt<Foo, int(double)>::value);
+ *  ```
+ *  If `Foo` indeed has a const noexcept member function `bar` matching this signature, the static assertion succeeds
+ * Otherwise, it will cause a compile failure
+ */
+
 #define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
     template <typename, typename at> struct has_##NAME_AK##_gt {                                                       \
         static_assert(std::integral_constant<at, false>::value,                                                        \
@@ -40,7 +99,18 @@ namespace usearch {
         static constexpr bool value = type::value && (!NOEXCEPT_AK || f_is_noexcept<check_at>(0));                     \
     };
 
-// note:: adding CHECK_AT based namespace so if the template can be used for multiple types
+/**
+ * This is a wrapper around the macro above that allows getting less cryptic error messages
+ * in particular, it:
+ * 1. Wraps the defined template in a unique namespace to avoid collisions. If this ends up being used elsewhere,
+ *    probably it would be worth it to add a __FILE__ prefix to the namespace name as well
+ * 2. Regarless of the requrement, it runs signature check without taking into account const-ness and exception
+ *    requirement.
+ * 3. Only after the initial signature check succeeds, it takes into acount const and noexcept and runs relevant checks,
+ * printing descriptive error messages is the constraints are not satisfied
+ *
+ * The macro takes the same parameters as the one above
+ **/
 #define ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                 \
     /************ check function signature without const or noexcept*/                                                 \
     namespace CHECK_AT##__##NAME_AK {                                                                                  \
@@ -64,7 +134,9 @@ namespace usearch {
         !NOEXCEPT_AK || CHECK_AT##__##NAME_AK##_const_noexcept::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,     \
         " Function \"" #CHECK_AT "::" #NAME_AK "\" exists but does not satisfy noexcept requirement of storage API")
 
-/* NOCONST in comments indicates intentional lack of const qualifier*/
+/** Various commonly used shortcusts for the assertion macro above
+ * Note: NOCONST in comments indicates intentional lack of const qualifier
+ **/
 #define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                           \
     ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, false)
 #define ASSERT_HAS_CONST_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                     \
@@ -76,9 +148,12 @@ namespace usearch {
 
 #define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
 
-// N.B: the validation does notenforce reference argument types properly
-// Validation succeeds even when in the sertions below an interface is required to take a reference type
-// but the actual implementation takes a copy
+/**
+ * The macro takes in a usearch Storage-provider type, and makes sure the type provides the necessary interface assumed
+ *in usearch internals N.B: the validation does notenforce reference argument types properly Validation succeeds even
+ *when in the sertions below an interface is required to take a reference type but the actual implementation takes a
+ *copy
+ **/
 #define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                               \

From 33220d5fd6e0f470867572afb2a29db2767ae0de Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 04:40:43 +0000
Subject: [PATCH 56/80] Add comments and add the rest of Storage interface
 enforcement

---
 include/usearch/storage.hpp | 127 +++++++++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 37 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 98ede9abb..6f25b8aa5 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -6,40 +6,37 @@
 namespace unum {
 namespace usearch {
 
-// taken from has_reset_gt
-// but added a C macro to make it generic for other function names
-// Can I do this in C++?
-// Changes from the above:
-// 1. Replace declval<at> with declval<OPTIONAL_CONST at&> to enforce function const-ness
-//  method: https://stackoverflow.com/questions/30407754/how-to-test-if-a-method-is-const
-// 2. Replace .reset with dynamic NAME_AK to support methods with other names
-// 3. Add option to enforce noexcept
-//  method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method
-
 /**
- *  @brief  This macro, `HAS_FUNCTION_TEMPLATE`, is a utility to heck at
- * compile-time whether a given type (CHECK_AT) has a member function with a specific name (NAME_AK), signature
- * (SIGNATURE_AT=return_at(args_at...)), constness (CONST_AK=const|[empty]), and exception specification
- * (NOEXCEPT_AK=true|false).
+ *  @brief  This macro, `HAS_FUNCTION_TEMPLATE`, is a utility to check at
+ *  compile-time whether a given type (CHECK_AT) has a member function with a specific name (NAME_AK), signature
+ *  (SIGNATURE_AT=return_at(args_at...)), constness (CONST_AK=const|[empty]), and exception specification
+ *  (NOEXCEPT_AK=true|false).
+ *
+ *  It is based on has_reset_gt template:
+ *    1. Replace declval<at> with declval<OPTIONAL_CONST at&> to enforce function const-ness
+ *     method: https://stackoverflow.com/questions/30407754/how-to-test-if-a-method-is-const
+ *    2. Replace .reset with dynamic NAME_AK to support methods with other names
+ *    3. Add option to enforce noexcept
+ *  method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method
  *
  *  @param[in] CHECK_AT Placeholder type used within the template instantiation to denote the type to be checked.
  *  @param[in] NAME_AK Name of the member function to be checked for. This name is incorporated in the generated
- * structure's name and used in the check.
+ *  structure's name and used in the check.
  *  @param[in] SIGNATURE_AT Placeholder for the function signature, employed in specializing the template for function
- * types.
+ *  types.
  *  @param[in] CONST_AK Indicates if the member function should be a const function. This forms part of the function
- * call signature within the check.
+ *  call signature within the check.
  *  @param[in] NOEXCEPT_AK Indicates if the member function should be noexcept. This affects the check, particularly
- * important for ensuring exception safety in certain contexts.
+ *  important for ensuring exception safety in certain contexts.
  *
  *  generates a structure structure named `has_##NAME_AK##_gt` with a static constexpr boolean member `value`. This
- * member is true if the specified type has a member function that matches the name, signature, constness, and noexcept
- * status provided in the macro's arguments. Otherwise, it is false.
+ *  member is true if the specified type has a member function that matches the name, signature, constness, and noexcept
+ *  status provided in the macro's arguments. Otherwise, it is false.
  *
  *  @example
  *  Suppose you have a class `Foo` with that has an interface requirement of a const noexcept member function `bar` that
- * returns an `int` and takes a `const double`. To enforce the interface requirement, if this function exists, is const,
- * and noexcept, you would instantiate the generated template like so:
+ *  returns an `int` and takes a `const double`. To enforce the interface requirement, if this function exists, is
+ *  const, and noexcept, you would instantiate the generated template like so:
  *  ```cpp
  * struct Foo {
  *    // CHECK CATCHES: expected double, got double*
@@ -70,10 +67,9 @@ namespace usearch {
  * HAS_FUNCTION_TEMPLATE(Foo, bar, int(const double), const, true);
  * static_assert(has_bar_gt<Foo, int(double)>::value);
  *  ```
- *  If `Foo` indeed has a const noexcept member function `bar` matching this signature, the static assertion succeeds
+ * If `Foo` indeed has a const noexcept member function `bar` matching this signature, the static assertion succeeds
  * Otherwise, it will cause a compile failure
  */
-
 #define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
     template <typename, typename at> struct has_##NAME_AK##_gt {                                                       \
         static_assert(std::integral_constant<at, false>::value,                                                        \
@@ -149,10 +145,46 @@ namespace usearch {
 #define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
 
 /**
- * The macro takes in a usearch Storage-provider type, and makes sure the type provides the necessary interface assumed
- *in usearch internals N.B: the validation does notenforce reference argument types properly Validation succeeds even
- *when in the sertions below an interface is required to take a reference type but the actual implementation takes a
- *copy
+ *  @brief  An example of what a USearch-Storage-compatible output callback should look like.
+ *  The callback is called to store arbitrarily serialized usearch index data in the underlying
+ *  storage medium managed in the callback implementation
+ *
+ */
+struct dummy_output_callback_t {
+    inline bool operator()(const void* /*source memory*/, std::size_t /*size of the source*/) { return true; }
+};
+
+/**
+ *  @brief  An example of what a USearch-Storage-compatible input callback should look like.
+ *  The callback is called to read arbitrarily serialized usearch index data from the underlying
+ *  storage medium managed in the callback implementation
+ *
+ */
+struct dummy_input_callback_t {
+    inline bool operator()(void* /*destination memory*/, std::size_t /*size of the destination*/) { return true; }
+};
+
+/**
+ *  @brief  A dummy metadata buffer used in serialization/deserialization API checks below
+ *  An actual index implementation might need to keep some app-level constants in here to be serialized on the
+ *  stored index binary, but we do not need its structure for type-checking
+ *
+ */
+struct dummy_vectors_metadata_buffer_t {};
+
+struct index_dense_serialization_config_t {
+    // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
+    bool exclude_vectors = false;
+    bool use_64_bit_dimensions = false;
+};
+
+using serialization_config_t = index_dense_serialization_config_t;
+
+/**
+ * @brief The macro takes in a usearch Storage-provider type, and makes sure the type provides the necessary interface
+ * assumed in usearch internals N.B: the validation does notenforce reference argument types properly Validation
+ *succeeds even when in the sertions below an interface is required to take a reference type but the actual
+ *implementation takes a copy
  **/
 #define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
@@ -167,8 +199,37 @@ namespace usearch {
     ASSERT_HAS_FUNCTION(                                                                                               \
         CHECK_AT, set_at,                                                                                              \
         void(std::size_t idx, CHECK_AT::node_t node, byte_t * vector_data, std::size_t vector_size, bool reuse_node)); \
+    /*Save/Restore API enforcement*/                                                                                   \
+    ASSERT_HAS_FUNCTION(CHECK_AT, save_vectors_to_stream,                                                              \
+                        serialization_result_t(                                                                        \
+                            dummy_output_callback_t& cb, std::size_t vector_size_bytes, std::uint64_t node_count,      \
+                            const dummy_vectors_metadata_buffer_t& metadata_buffer, serialization_config_t config));   \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, save_nodes_to_stream,                                                          \
+                              serialization_result_t(dummy_output_callback_t& cb,                                      \
+                                                     const index_serialized_header_t& header,                          \
+                                                     dummy_progress_t& progress));                                     \
+    ASSERT_HAS_FUNCTION(CHECK_AT, load_vectors_from_stream,                                                            \
+                        serialization_result_t(dummy_input_callback_t& cb,                                             \
+                                               const dummy_vectors_metadata_buffer_t& metadata_buffer,                 \
+                                               serialization_config_t config));                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, load_nodes_from_stream,                                                              \
+                        serialization_result_t(dummy_input_callback_t& cb, index_serialized_header_t& header,          \
+                                               dummy_progress_t& progress));                                           \
+                                                                                                                       \
+    /* View from file API*/                                                                                            \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_vectors_from_stream,                                                            \
+                        serialization_result_t(memory_mapped_file_t& file,                                             \
+                                               dummy_vectors_metadata_buffer_t& metadata_buffer, std::size_t& offset,  \
+                                               serialization_config_t config));                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_stream,                                                              \
+                        serialization_result_t(memory_mapped_file_t& file, index_serialized_header_t& metadata_buffer, \
+                                               std::size_t& offset, dummy_progress_t& progress));                      \
     static_assert(true, "this is to require a semicolon at the end of macro call")
 
+/** I initially used this abstract class as a way to enforce storage API but ran into several limitations mentioned
+ * below I switched to macro+template based approach in the end, but left this around, in case there are ways to work
+ * around the issues below that I am not aware of.
+ **/
 template <typename key_at, typename compressed_slot_at, //
           typename tape_allocator_at,                   //
           typename vectors_allocator_at,                //
@@ -195,6 +256,7 @@ class storage_interface {
     inline void set_at(std::size_t idx, node_t node, byte_t* vector_data, std::size_t vector_size, bool reuse_node);
 
     // the following functions take template arguments so cannot be type-enforced via virtual function inheritence
+    // as far as I can tell.
     // virtual void load_vectors_from_stream() = 0;
     // virtual void load_nodes_from_stream() = 0;
 
@@ -211,15 +273,6 @@ class storage_interface {
     std::size_t memory_usage();
 };
 
-struct index_dense_serialization_config_t {
-    // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
-    bool exclude_vectors = false;
-    bool use_64_bit_dimensions = false;
-};
-using index_dense_head_buffer_t = byte_t[64];
-static_assert(sizeof(index_dense_head_buffer_t) == 64, "File header should be exactly 64 bytes");
-using serialization_config_t = index_dense_serialization_config_t;
-
 template <typename key_at, typename compressed_slot_at,           //
           typename tape_allocator_at = std::allocator<byte_t>,    //
           typename vectors_allocator_at = tape_allocator_at,      //

From f37bb7af46d0c18e00ffff9647473a81077cc82e Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 07:40:16 +0000
Subject: [PATCH 57/80] Move viewed_file_ state to storage_

---
 include/usearch/index.hpp   | 10 +++-------
 include/usearch/storage.hpp | 15 +++++++++------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 059402359..ab2f9405e 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1957,7 +1957,6 @@ class index_gt {
     mutable dynamic_allocator_t dynamic_allocator_{};
 
     precomputed_constants_t pre_{};
-    memory_mapped_file_t viewed_file_{};
 
     /// @brief  Number of "slots" available for `node_t` objects. Equals to @b `limits_.members`.
     usearch_align_m mutable std::atomic<std::size_t> nodes_capacity_{};
@@ -1987,7 +1986,7 @@ class index_gt {
     std::size_t max_level() const noexcept { return nodes_count_ ? static_cast<std::size_t>(max_level_) : 0; }
     index_config_t const& config() const noexcept { return config_; }
     index_limits_t const& limits() const noexcept { return limits_; }
-    bool is_immutable() const noexcept { return bool(viewed_file_); }
+    bool is_immutable() const noexcept { return storage_.is_immutable(); }
 
     /**
      *  @section Exceptions
@@ -2091,7 +2090,6 @@ class index_gt {
         contexts_ = {};
         limits_ = index_limits_t{0, 0};
         nodes_capacity_ = 0;
-        viewed_file_ = memory_mapped_file_t{};
     }
 
     /**
@@ -2102,7 +2100,6 @@ class index_gt {
         std::swap(limits_, other.limits_);
         std::swap(dynamic_allocator_, other.dynamic_allocator_);
         std::swap(pre_, other.pre_);
-        std::swap(viewed_file_, other.viewed_file_);
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
         assert(false);
@@ -2673,7 +2670,7 @@ class index_gt {
      */
     std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept {
         std::size_t total = 0;
-        if (!viewed_file_) {
+        if (!storage_.is_immutable()) {
             stats_t s = stats();
             total += s.allocated_bytes;
             total += s.nodes * allocator_entry_bytes;
@@ -2888,7 +2885,7 @@ class index_gt {
         // storage_ may already have some relevant stuff...
         serialization_result_t result;
         index_serialized_header_t header;
-        result = storage_.view_nodes_from_stream(file, header, offset, progress);
+        result = storage_.view_nodes_from_stream(std::move(file), header, offset, progress);
         if (!result)
             return result;
 
@@ -2907,7 +2904,6 @@ class index_gt {
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
 
-        viewed_file_ = std::move(file);
         return {};
     }
 
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 6f25b8aa5..b7031d6b9 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -192,6 +192,7 @@ using serialization_config_t = index_dense_serialization_config_t;
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                      \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, size, std::size_t());                                                 \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool());                                                \
                                                                                                                        \
     ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
@@ -222,7 +223,7 @@ using serialization_config_t = index_dense_serialization_config_t;
                                                dummy_vectors_metadata_buffer_t& metadata_buffer, std::size_t& offset,  \
                                                serialization_config_t config));                                        \
     ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_stream,                                                              \
-                        serialization_result_t(memory_mapped_file_t& file, index_serialized_header_t& metadata_buffer, \
+                        serialization_result_t(memory_mapped_file_t file, index_serialized_header_t& metadata_buffer,  \
                                                std::size_t& offset, dummy_progress_t& progress));                      \
     static_assert(true, "this is to require a semicolon at the end of macro call")
 
@@ -314,6 +315,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     std::uint64_t matrix_rows_ = 0;
     std::uint64_t matrix_cols_ = 0;
     bool vectors_loaded_{};
+    memory_mapped_file_t viewed_file_{};
     using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
     static_assert(                                                 //
         sizeof(typename tape_allocator_traits_t::value_type) == 1, //
@@ -329,8 +331,6 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
-    bool view_file_{};
-
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
     // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
     inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
@@ -347,6 +347,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     }
 
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+    bool is_immutable() const noexcept { return bool(viewed_file_); }
 
     using lock_type = node_lock_t;
 
@@ -361,7 +362,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
     }
 
     void clear() noexcept {
-        if (!view_file_) {
+        if (!viewed_file_) {
             if (!has_reset<tape_allocator_at>()) {
                 std::size_t n = nodes_.size();
                 for (std::size_t i = 0; i != n; ++i) {
@@ -383,6 +384,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
                 tape_allocator_.deallocate(nullptr, 0);
         }
         std::fill(nodes_.begin(), nodes_.end(), node_t{});
+        viewed_file_ = {};
     }
     void reset() noexcept {
         nodes_mutexes_ = {};
@@ -391,6 +393,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
 
         vectors_lookup_.clear();
         vectors_lookup_.shrink_to_fit();
+        viewed_file_ = {};
     }
 
     using span_bytes_t = span_gt<byte_t>;
@@ -704,7 +707,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
      *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
      */
     template <typename progress_at = dummy_progress_t>
-    serialization_result_t view_nodes_from_stream(memory_mapped_file_t& file, index_serialized_header_t& header,
+    serialization_result_t view_nodes_from_stream(memory_mapped_file_t file, index_serialized_header_t& header,
                                                   std::size_t offset = 0, progress_at& progress = {}) noexcept {
 
         serialization_result_t result = file.open_if_not();
@@ -759,7 +762,7 @@ class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_all
             if (!progress(i + 1, header.size))
                 return result.failed("Terminated by user");
         }
-        view_file_ = true;
+        viewed_file_ = std::move(file);
 
         if (vectors_loaded_ && header.size != static_cast<std::size_t>(matrix_rows_))
             return result.failed("Index size and the number of vectors doesn't match");

From 42910718006607fde76ddedb3ee84a5bfef56346 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 07:49:56 +0000
Subject: [PATCH 58/80] Fix first bug wound by typechecker

---
 include/usearch/storage.hpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index b7031d6b9..a3cb999e5 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -197,9 +197,6 @@ using serialization_config_t = index_dense_serialization_config_t;
     ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void());                                                             \
-    ASSERT_HAS_FUNCTION(                                                                                               \
-        CHECK_AT, set_at,                                                                                              \
-        void(std::size_t idx, CHECK_AT::node_t node, byte_t * vector_data, std::size_t vector_size, bool reuse_node)); \
     /*Save/Restore API enforcement*/                                                                                   \
     ASSERT_HAS_FUNCTION(CHECK_AT, save_vectors_to_stream,                                                              \
                         serialization_result_t(                                                                        \
@@ -274,12 +271,19 @@ class storage_interface {
     std::size_t memory_usage();
 };
 
+/**
+ * NOTE:
+ * The class below used to inherit from storage_interface via:
+ * class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,
+ *                                            dynamic_allocator_at>
+ * I disabled inheritence for now as interface compatibility is more
+ * thoroughly enforced via the macros at the beginning of this file
+ **/
 template <typename key_at, typename compressed_slot_at,           //
           typename tape_allocator_at = std::allocator<byte_t>,    //
           typename vectors_allocator_at = tape_allocator_at,      //
           typename dynamic_allocator_at = std::allocator<byte_t>> //
-class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,
-                                            dynamic_allocator_at> {
+class storage_v2 {
     // todo:: ask-Ashot: why can I not use dynamic_allocator_at in std::vector<node_t, dynamic_allocator_at> ?
   public:
     using node_t = node_at<key_at, compressed_slot_at>;

From 08db9ec94481f90ffd60ea2f763650908439ccac Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 08:18:22 +0000
Subject: [PATCH 59/80] Fix nodes_ allocator to be what in original storage_v2
 it was

---
 include/usearch/storage.hpp | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index a3cb999e5..3be1dab48 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -284,23 +284,21 @@ template <typename key_at, typename compressed_slot_at,           //
           typename vectors_allocator_at = tape_allocator_at,      //
           typename dynamic_allocator_at = std::allocator<byte_t>> //
 class storage_v2 {
-    // todo:: ask-Ashot: why can I not use dynamic_allocator_at in std::vector<node_t, dynamic_allocator_at> ?
   public:
     using node_t = node_at<key_at, compressed_slot_at>;
 
   private:
-    // Getting the following error:
-    // /usr/include/c++/10/bits/stl_vector.h:285:16: error: no matching function for call to
-    // ‘unum::usearch::aligned_allocator_gt<>::aligned_allocator_gt(const _Tp_alloc_type&)’
-    // 285 |       { return allocator_type(_M_get_Tp_allocator()); }
-
-    using nodes_t = std::vector<node_t>;
-    using vectors_t = std::vector<byte_t*>;
-    using nodes_mutexes_t = bitset_gt<>;
+    using nodes_mutexes_t = bitset_gt<dynamic_allocator_at>;
     using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_at>;
     using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
     using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
     using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
+    using vectors_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<byte_t*>;
+    using nodes_t = buffer_gt<node_t, nodes_allocator_t>;
+    // todo:: ask-Ashot: in the older version vectors_lookup_ was using the default vector allocator,
+    // and not the dynamic_allocator_at that was passed it.
+    // Can remove this if the previous approach was intentional
+    using vectors_t = std::vector<byte_t*, vectors_allocator_t>;
 
     /// @brief  C-style array of `node_t` smart-pointers.
     // buffer_gt<node_t, nodes_allocator_t> nodes_{};
@@ -358,9 +356,17 @@ class storage_v2 {
     bool reserve(std::size_t count) {
         if (count < nodes_.size() && count < nodes_mutexes_.size())
             return true;
-        nodes_mutexes_t new_mutexes = nodes_mutexes_t(count);
+        nodes_mutexes_t new_mutexes(count);
+        nodes_t new_nodes(count);
+        if (!new_mutexes || !new_nodes)
+            return false;
+        if (nodes_)
+            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
+
         nodes_mutexes_ = std::move(new_mutexes);
-        nodes_.resize(count);
+        nodes_ = std::move(new_nodes);
+        // todo:: make sure to only reserve this if vectors are not stored externally
+        // will probably need to pass the fact as storage config parameter
         vectors_lookup_.resize(count);
         return true;
     }
@@ -390,10 +396,10 @@ class storage_v2 {
         std::fill(nodes_.begin(), nodes_.end(), node_t{});
         viewed_file_ = {};
     }
+
     void reset() noexcept {
         nodes_mutexes_ = {};
-        nodes_.clear();
-        nodes_.shrink_to_fit();
+        nodes_ = {};
 
         vectors_lookup_.clear();
         vectors_lookup_.shrink_to_fit();

From 338e2fe17fd92371d8ca20feb3f8722725f45738 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 09:39:24 +0000
Subject: [PATCH 60/80] Add a simple storage interface that uses std containers

---
 include/usearch/dummy_stor.hpp     | 141 ----------------
 include/usearch/simple_storage.hpp | 260 +++++++++++++++++++++++++++++
 include/usearch/storage.hpp        |  19 ++-
 3 files changed, 278 insertions(+), 142 deletions(-)
 delete mode 100644 include/usearch/dummy_stor.hpp
 create mode 100644 include/usearch/simple_storage.hpp

diff --git a/include/usearch/dummy_stor.hpp b/include/usearch/dummy_stor.hpp
deleted file mode 100644
index a1ce19fd5..000000000
--- a/include/usearch/dummy_stor.hpp
+++ /dev/null
@@ -1,141 +0,0 @@
-
-#pragma once
-
-#include <usearch/index.hpp>
-#include <usearch/index_plugins.hpp>
-
-namespace unum {
-namespace usearch {
-
-/**
- * @brief   Storage abstraction for HNSW graph and associated vector data
- *
- *  @tparam key_at
- *      The type of primary objects stored in the index.
- *      The values, to which those map, are not managed by the same index structure.
- *
- *  @tparam compressed_slot_at
- *      The smallest unsigned integer type to address indexed elements.
- *      It is used internally to maximize space-efficiency and is generally
- *      up-casted to @b `std::size_t` in public interfaces.
- *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
- *      Which makes the most sense for 4B+ entry indexes.
- *
- *  @tparam tape_allocator_at
- *      Potentially different memory allocator for primary allocations of nodes and vectors.
- *      It would never `deallocate` separate entries, and would only free all the space at once.
- *      The allocated buffers may be uninitialized.
- *
- **/
-template <typename key_at, typename compressed_slot_at,
-          typename tape_allocator_at = std::allocator<byte_t>> //
-class dummy_storage_single_threaded {
-    using node_t = node_at<key_at, compressed_slot_at>;
-    using nodes_t = std::vector<node_t>;
-
-    nodes_t nodes_{};
-    precomputed_constants_t pre_{};
-    tape_allocator_at tape_allocator_{};
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
-
-  public:
-    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
-        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
-
-    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
-
-    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
-
-    // exported for client-side lock-declaration
-    // alternatively, could just use auto in client side
-    // ideally, there would be a way to make this "void", but I could not make it work
-    // as client side ends up declaring a void variable
-    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
-    // for the dummy lock guard variable.
-    struct dummy_lock {
-        // destructor necessary to avoid "unused variable warning"
-        // will this get properly optimized away?
-        ~dummy_lock() {}
-    };
-    using lock_type = dummy_lock;
-
-    bool reserve(std::size_t count) {
-        if (count < nodes_.size())
-            return true;
-        nodes_.resize(count);
-        return true;
-    }
-
-    void clear() {
-        if (nodes_.data())
-            std::fill(nodes_.begin(), nodes_.end(), node_t{});
-    }
-    void reset() {
-        nodes_.clear();
-        nodes_.shrink_to_fit();
-    }
-
-    using span_bytes_t = span_gt<byte_t>;
-
-    span_bytes_t node_malloc(level_t level) noexcept {
-        std::size_t node_size = node_t::node_size_bytes(pre_, level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
-        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
-    }
-    void node_free(size_t slot, node_t node) {
-        if (!has_reset<tape_allocator_at>()) {
-            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
-        } else {
-            tape_allocator_.deallocate(nullptr, 0);
-        }
-        nodes_[slot] = node_t{};
-    }
-    node_t node_make(key_at key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-    //     if (!data)
-    //         return {};
-    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
-    //     return node_t{data};
-    // }
-
-    void node_store(size_t slot, node_t node) noexcept {
-        auto count = nodes_.size();
-        nodes_[slot] = node;
-    }
-    inline size_t size() { return nodes_.size(); }
-    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
-    // dummy lock just to satisfy the interface
-    constexpr inline lock_type node_lock(std::size_t) const noexcept { return dummy_lock{}; }
-};
-
-template <typename key_at, typename compressed_slot_at> class storage_v1 {
-    using vector_key_t = key_at;
-    using node_t = node_at<vector_key_t, compressed_slot_at>;
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    // using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
-    using nodes_mutexes_t = bitset_gt<>;
-    using nodes_t = std::vector<node_t>;
-
-    index_config_t config_{};
-    nodes_t nodes_{};
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
-};
-
-} // namespace usearch
-} // namespace unum
diff --git a/include/usearch/simple_storage.hpp b/include/usearch/simple_storage.hpp
new file mode 100644
index 000000000..d4fa59c41
--- /dev/null
+++ b/include/usearch/simple_storage.hpp
@@ -0,0 +1,260 @@
+
+#pragma once
+
+#include <deque>
+#include <mutex>
+#include <usearch/index.hpp>
+#include <usearch/index_plugins.hpp>
+#include <usearch/storage.hpp>
+
+namespace unum {
+namespace usearch {
+
+/**
+ * @brief   Storage abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam tape_allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      It would never `deallocate` separate entries, and would only free all the space at once.
+ *      The allocated buffers may be uninitialized.
+ *
+ **/
+template <typename key_at, typename compressed_slot_at,
+          typename tape_allocator_at = std::allocator<byte_t>> //
+class dummy_storage_single_threaded {
+  public:
+    using node_t = node_at<key_at, compressed_slot_at>;
+
+  private:
+    using nodes_t = std::vector<node_t>;
+
+    nodes_t nodes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_{};
+    memory_mapped_file_t viewed_file_{};
+    mutable std::deque<std::mutex> locks_{};
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+  public:
+    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return nullptr; }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+    bool is_immutable() const noexcept { return bool(viewed_file_); }
+
+    // exported for client-side lock-declaration
+    // alternatively, could just use auto in client side
+    // ideally, there would be a way to make this "void", but I could not make it work
+    // as client side ends up declaring a void variable
+    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
+    // for the dummy lock guard variable.
+    struct dummy_lock {
+        // destructor necessary to avoid "unused variable warning"
+        // will this get properly optimized away?
+        ~dummy_lock() {}
+    };
+    using lock_type = std::unique_lock<std::mutex>;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_.resize(count);
+        locks_.resize(count);
+        return true;
+    }
+    void clear() noexcept {
+        if (!is_immutable()) {
+            std::size_t n = nodes_.size();
+            for (std::size_t i = 0; i != n; ++i) {
+                // we do not know which slots have been filled and which ones - no
+                // so we iterate over full reserved space
+                if (nodes_[i])
+                    node_free(i, nodes_[i]);
+            }
+        }
+        if (nodes_.data())
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
+    }
+    void reset() noexcept { clear(); }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        if (!has_reset<tape_allocator_at>()) {
+            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        } else {
+            tape_allocator_.deallocate(nullptr, 0);
+        }
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    // dummy lock just to satisfy the interface
+    inline lock_type node_lock(std::size_t i) const noexcept { return std::unique_lock(locks_[i]); }
+
+    // serialization
+
+    template <typename output_callback_at, typename vectors_metadata_at>
+    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t,
+                                                  std::uint64_t, //
+                                                  const vectors_metadata_at& metadata_buffer,
+                                                  serialization_config_t config = {}) const {
+        assert(config.exclude_vectors);
+        assert(!config.use_64_bit_dimensions);
+        bool ok = output(metadata_buffer, sizeof(metadata_buffer));
+        assert(ok);
+        return {};
+    }
+    template <typename output_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
+                                                progress_at& = {}) const {
+        bool ok = output(&header, sizeof(header));
+        assert(ok);
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_t node = get_node_at(i);
+            level_t level = node.level();
+            ok = output(&level, sizeof(level));
+            assert(ok);
+        }
+
+        // After that dump the nodes themselves
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
+            ok = output(node_bytes.data(), node_bytes.size());
+            assert(ok);
+        }
+        return {};
+    }
+    template <typename input_callback_at, typename vectors_metadata_at>
+    serialization_result_t load_vectors_from_stream(input_callback_at& input, //
+                                                    vectors_metadata_at& metadata_buffer,
+                                                    serialization_config_t config = {}) {
+        assert(config.exclude_vectors);
+        assert(!config.use_64_bit_dimensions);
+        bool ok = input(metadata_buffer, sizeof(metadata_buffer));
+        assert(ok);
+        return {};
+    }
+    template <typename input_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
+                                                  progress_at& = {}) noexcept {
+
+        bool ok = input(&header, sizeof(header));
+        assert(ok);
+        if (!header.size) {
+            reset();
+            return {};
+        }
+        buffer_gt<level_t> levels(header.size);
+        assert(levels);
+        ok = input(levels, header.size * sizeof(level_t));
+        assert(ok);
+
+        ok = reserve(header.size);
+        assert(ok);
+
+        // Load the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = node_malloc(levels[i]);
+            ok = input(node_bytes.data(), node_bytes.size());
+            assert(ok);
+            node_store(i, node_t{node_bytes.data()});
+        }
+        return {};
+    }
+    template <typename vectors_metadata_at>
+    serialization_result_t view_vectors_from_stream(
+        memory_mapped_file_t& file, //
+                                    //// todo!! document that offset is a reference, or better - do not do it this way
+        vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
+        reset();
+        assert(config.exclude_vectors);
+        assert(!config.use_64_bit_dimensions);
+
+        serialization_result_t result = file.open_if_not();
+        assert(result);
+        std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
+        offset += sizeof(metadata_buffer);
+        return {};
+    }
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_nodes_from_stream(memory_mapped_file_t file, index_serialized_header_t& header,
+                                                  std::size_t offset = 0, progress_at& progress = {}) noexcept {
+        serialization_result_t result = file.open_if_not();
+        std::memcpy(&header, file.data() + offset, sizeof(header));
+        if (!header.size) {
+            reset();
+            return result;
+        }
+        index_config_t config;
+        config.connectivity = header.connectivity;
+        config.connectivity_base = header.connectivity_base;
+        pre_ = node_t::precompute_(config);
+        buffer_gt<std::size_t> offsets(header.size);
+        assert(offsets);
+        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
+        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
+        for (std::size_t i = 1; i < header.size; ++i)
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
+        if (!reserve(header.size)) {
+            reset();
+            return result.failed("Out of memory");
+        }
+
+        // Rapidly address all the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
+            if (!progress(i + 1, header.size))
+                return result.failed("Terminated by user");
+        }
+        viewed_file_ = std::move(file);
+        return {};
+    }
+};
+
+using dummy_dummy_storage = dummy_storage_single_threaded<default_key_t, default_slot_t>;
+ASSERT_VALID_STORAGE(dummy_dummy_storage);
+
+} // namespace usearch
+} // namespace unum
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 3be1dab48..3b87d4f65 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -191,7 +191,6 @@ using serialization_config_t = index_dense_serialization_config_t;
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                               \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                      \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
-    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, size, std::size_t());                                                 \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool());                                                \
                                                                                                                        \
     ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
@@ -272,6 +271,24 @@ class storage_interface {
 };
 
 /**
+ * @brief   Storage abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam tape_allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      It would never `deallocate` separate entries, and would only free all the space at once.
+ *      The allocated buffers may be uninitialized.
+ *
  * NOTE:
  * The class below used to inherit from storage_interface via:
  * class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,

From c6ad76dc20c42ad987c45730ac8a895b802710ed Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 09:56:39 +0000
Subject: [PATCH 61/80] Add storage choice to the tests

---
 cpp/test.cpp | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 59c1a457a..e2001d9cb 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -10,6 +10,7 @@
 #include <usearch/index.hpp>
 #include <usearch/index_dense.hpp>
 #include <usearch/index_plugins.hpp>
+#include <usearch/simple_storage.hpp>
 
 using namespace unum::usearch;
 using namespace unum;
@@ -154,17 +155,16 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     }
 }
 
-template <typename scalar_at, typename key_at, typename slot_at> //
+template <typename storage_at, typename scalar_at, typename key_at, typename slot_at> //
 void test_cosine(std::size_t collection_size, std::size_t dimensions) {
 
+    using storage_t = storage_at;
     using scalar_t = scalar_at;
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
     // using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
-    // using index_storage_t = dummy_storage_single_threaded<vector_key_t, slot_t>;
-    using index_storage_t = storage_v2<vector_key_t, slot_t>;
-    using index_typed_t = index_gt<index_storage_t, float, vector_key_t, slot_t>;
+    using index_typed_t = index_gt<storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
 
@@ -202,7 +202,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         std::vector<node_at<vector_key_t, slot_t>> nodes;
         bitset_gt nodes_mutexes;
         // index_storage_t storage{&nodes, &nodes_mutexes, config};
-        index_storage_t storage{config};
+        storage_t storage{config};
         index_typed_t index_typed(storage, config);
         test_cosine<false>(index_typed, matrix, metric);
     }
@@ -316,9 +316,24 @@ int main(int, char**) {
     for (std::size_t collection_size : {10, 500})
         for (std::size_t dimensions : {97, 256}) {
             std::printf("Indexing %zu vectors with cos: <float, std::int64_t, std::uint32_t> \n", collection_size);
-            test_cosine<float, std::int64_t, std::uint32_t>(collection_size, dimensions);
-            std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
-            test_cosine<float, std::int64_t, uint40_t>(collection_size, dimensions);
+            using key_t = std::int64_t;
+            {
+                using slot_t = std::uint32_t;
+                using v2 = storage_v2<key_t, slot_t>;
+                using ss = simple_storage<key_t, slot_t>;
+
+                test_cosine<v2, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+                test_cosine<ss, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+            }
+            {
+                using slot_t = uint40_t;
+                using v2 = storage_v2<key_t, slot_t>;
+                using ss = simple_storage<key_t, slot_t>;
+
+                std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
+                test_cosine<v2, float, key_t, slot_t>(collection_size, dimensions);
+                test_cosine<ss, float, key_t, slot_t>(collection_size, dimensions);
+            }
         }
 
     for (std::size_t connectivity : {3, 13, 50})

From 25c7ca4c4c66bcc8e57fc24d6e1206d3564df1b3 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 18:20:00 +0000
Subject: [PATCH 62/80] Cleanup and rename std storage

---
 cpp/test.cpp                                  |  6 ++--
 .../{simple_storage.hpp => std_storage.hpp}   | 33 +++++++++----------
 2 files changed, 19 insertions(+), 20 deletions(-)
 rename include/usearch/{simple_storage.hpp => std_storage.hpp} (91%)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index e2001d9cb..9335b29f5 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -10,7 +10,7 @@
 #include <usearch/index.hpp>
 #include <usearch/index_dense.hpp>
 #include <usearch/index_plugins.hpp>
-#include <usearch/simple_storage.hpp>
+#include <usearch/std_storage.hpp>
 
 using namespace unum::usearch;
 using namespace unum;
@@ -320,7 +320,7 @@ int main(int, char**) {
             {
                 using slot_t = std::uint32_t;
                 using v2 = storage_v2<key_t, slot_t>;
-                using ss = simple_storage<key_t, slot_t>;
+                using ss = std_storage_at<key_t, slot_t>;
 
                 test_cosine<v2, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
                 test_cosine<ss, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
@@ -328,7 +328,7 @@ int main(int, char**) {
             {
                 using slot_t = uint40_t;
                 using v2 = storage_v2<key_t, slot_t>;
-                using ss = simple_storage<key_t, slot_t>;
+                using ss = std_storage_at<key_t, slot_t>;
 
                 std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
                 test_cosine<v2, float, key_t, slot_t>(collection_size, dimensions);
diff --git a/include/usearch/simple_storage.hpp b/include/usearch/std_storage.hpp
similarity index 91%
rename from include/usearch/simple_storage.hpp
rename to include/usearch/std_storage.hpp
index d4fa59c41..44983f4a6 100644
--- a/include/usearch/simple_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -11,7 +11,8 @@ namespace unum {
 namespace usearch {
 
 /**
- * @brief   Storage abstraction for HNSW graph and associated vector data
+ * @brief  A simple Storage implementation that uses standard cpp containers and complies with the usearch storage
+ *abstraction for HNSW graph and associated vector data
  *
  *  @tparam key_at
  *      The type of primary objects stored in the index.
@@ -32,7 +33,7 @@ namespace usearch {
  **/
 template <typename key_at, typename compressed_slot_at,
           typename tape_allocator_at = std::allocator<byte_t>> //
-class dummy_storage_single_threaded {
+class std_storage_at {
   public:
     using node_t = node_at<key_at, compressed_slot_at>;
 
@@ -50,7 +51,7 @@ class dummy_storage_single_threaded {
         "Tape allocator must allocate separate addressable bytes");
 
   public:
-    dummy_storage_single_threaded(index_config_t config, tape_allocator_at tape_allocator = {})
+    std_storage_at(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
@@ -58,17 +59,15 @@ class dummy_storage_single_threaded {
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
     bool is_immutable() const noexcept { return bool(viewed_file_); }
 
-    // exported for client-side lock-declaration
-    // alternatively, could just use auto in client side
-    // ideally, there would be a way to make this "void", but I could not make it work
-    // as client side ends up declaring a void variable
-    // the downside of passing a primitive like "int" here is the "unused variable" compiler warning
-    // for the dummy lock guard variable.
-    struct dummy_lock {
-        // destructor necessary to avoid "unused variable warning"
-        // will this get properly optimized away?
-        ~dummy_lock() {}
-    };
+    /* To get a single-threaded implementation of storage with no locking, replace lock_type
+     *  with the following and return dummy_lock{} from node_lock()
+     *      struct dummy_lock {
+     *          // destructor necessary to avoid "unused variable warning"
+     *          // at callcites of node_lock()
+     *          ~dummy_lock() = default;
+     *      };
+     *      using lock_type = dummy_lock;
+     */
     using lock_type = std::unique_lock<std::mutex>;
 
     bool reserve(std::size_t count) {
@@ -130,7 +129,7 @@ class dummy_storage_single_threaded {
 
     void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
     tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
-    // dummy lock just to satisfy the interface
+
     inline lock_type node_lock(std::size_t i) const noexcept { return std::unique_lock(locks_[i]); }
 
     // serialization
@@ -253,8 +252,8 @@ class dummy_storage_single_threaded {
     }
 };
 
-using dummy_dummy_storage = dummy_storage_single_threaded<default_key_t, default_slot_t>;
-ASSERT_VALID_STORAGE(dummy_dummy_storage);
+using dummy_std_storage_t = std_storage_at<default_key_t, default_slot_t>;
+ASSERT_VALID_STORAGE(dummy_std_storage_t);
 
 } // namespace usearch
 } // namespace unum

From 6ff2c8ffd94690b039d464566ca9368210f5470b Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 22:53:35 +0000
Subject: [PATCH 63/80] Improve std storage code

---
 cpp/test.cpp                    |  12 ++-
 include/usearch/std_storage.hpp | 176 +++++++++++++++++++-------------
 2 files changed, 111 insertions(+), 77 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 9335b29f5..07e759170 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -211,7 +211,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     for (bool multi : {false, true}) {
         for (std::size_t connectivity : {3, 13, 50}) {
             std::printf("- punned with connectivity %zu \n", connectivity);
-            using index_t = index_dense_gt<vector_key_t, slot_t>;
+            using index_t = index_dense_gt<vector_key_t, slot_t, storage_t>;
             metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
             index_dense_config_t config(connectivity);
             config.multi = multi;
@@ -319,15 +319,17 @@ int main(int, char**) {
             using key_t = std::int64_t;
             {
                 using slot_t = std::uint32_t;
-                using v2 = storage_v2<key_t, slot_t>;
-                using ss = std_storage_at<key_t, slot_t>;
+                using v2 =
+                    storage_v2_at<key_t, slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
+                using std_storage_t =
+                    std_storage_at<key_t, slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
 
                 test_cosine<v2, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
-                test_cosine<ss, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+                test_cosine<std_storage_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
             }
             {
                 using slot_t = uint40_t;
-                using v2 = storage_v2<key_t, slot_t>;
+                using v2 = storage_v2_at<key_t, slot_t>;
                 using ss = std_storage_at<key_t, slot_t>;
 
                 std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index 44983f4a6..75ade41bf 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -31,31 +31,43 @@ namespace usearch {
  *      The allocated buffers may be uninitialized.
  *
  **/
-template <typename key_at, typename compressed_slot_at,
-          typename tape_allocator_at = std::allocator<byte_t>> //
+template <typename key_at, typename compressed_slot_at, typename allocator_at = std::allocator<byte_t>> //
 class std_storage_at {
   public:
     using node_t = node_at<key_at, compressed_slot_at>;
 
   private:
     using nodes_t = std::vector<node_t>;
+    using span_bytes_t = span_gt<byte_t>;
+    using vectors_t = std::vector<span_bytes_t>;
 
     nodes_t nodes_{};
+    vectors_t vectors_{};
     precomputed_constants_t pre_{};
-    tape_allocator_at tape_allocator_{};
+    allocator_at allocator_{};
+    static_assert(!has_reset<allocator_at>(), "reset()-able memory allocators not supported for this storage provider");
     memory_mapped_file_t viewed_file_{};
     mutable std::deque<std::mutex> locks_{};
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
+    // the next three are used only in serialization/deserialization routines to know how to serialize vectors
+    // since this is only for serde/vars are marked mutable to still allow const-ness of saving method interface on
+    // storage instance
+    mutable size_t node_count_{};
+    mutable size_t vector_size_{};
+    // defaulted to true because that is what test.cpp assumes when using this storage directly
+    mutable bool exclude_vectors_ = true;
+
+    // used in place of error handling throughout the class
+    static void expect(bool must_be_true) {
+        if (!must_be_true)
+            throw std::runtime_error("Failed!");
+    }
 
   public:
-    std_storage_at(index_config_t config, tape_allocator_at tape_allocator = {})
-        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+    std_storage_at(index_config_t config, allocator_at allocator = {})
+        : pre_(node_t::precompute_(config)), allocator_(allocator) {}
 
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
-    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return nullptr; }
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_[idx].data(); }
     inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
     bool is_immutable() const noexcept { return bool(viewed_file_); }
 
@@ -68,12 +80,13 @@ class std_storage_at {
      *      };
      *      using lock_type = dummy_lock;
      */
-    using lock_type = std::unique_lock<std::mutex>;
+    using lock_type = std::unique_lock<std::mutex>;
 
     bool reserve(std::size_t count) {
         if (count < nodes_.size())
             return true;
         nodes_.resize(count);
+        vectors_.resize(count);
         locks_.resize(count);
         return true;
     }
@@ -86,25 +99,28 @@ class std_storage_at {
                 if (nodes_[i])
                     node_free(i, nodes_[i]);
             }
+            n = vectors_.size();
+            for (std::size_t i = 0; i != n; ++i) {
+                span_bytes_t v = vectors_[i];
+                if (v.data()) {
+                    allocator_.deallocate(v.data(), v.size());
+                }
+            }
         }
+        if (vectors_.data())
+            std::fill(vectors_.begin(), vectors_.end(), span_bytes_t{});
         if (nodes_.data())
             std::fill(nodes_.begin(), nodes_.end(), node_t{});
     }
     void reset() noexcept { clear(); }
 
-    using span_bytes_t = span_gt<byte_t>;
-
     span_bytes_t node_malloc(level_t level) noexcept {
         std::size_t node_size = node_t::node_size_bytes(pre_, level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        byte_t* data = (byte_t*)allocator_.allocate(node_size);
         return data ? span_bytes_t{data, node_size} : span_bytes_t{};
     }
     void node_free(size_t slot, node_t node) {
-        if (!has_reset<tape_allocator_at>()) {
-            tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
-        } else {
-            tape_allocator_.deallocate(nullptr, 0);
-        }
+        allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
         nodes_[slot] = node_t{};
     }
     node_t node_make(key_at key, level_t level) noexcept {
@@ -118,110 +134,127 @@ class std_storage_at {
         node.level(level);
         return node;
     }
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
+    void set_vector_at(size_t slot, const byte_t* vector_data, size_t vector_size, bool copy_vector, bool reuse_node) {
 
-    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-    //     if (!data)
-    //         return {};
-    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
-    //     return node_t{data};
-    // }
+        usearch_assert_m(!(reuse_node && !copy_vector),
+                         "Cannot reuse node when not copying as there is no allocation needed");
+        if (copy_vector) {
+            if (!reuse_node)
+                vectors_[slot] = span_bytes_t{allocator_.allocate(vector_size), vector_size};
+            std::memcpy(vectors_[slot].data(), vector_data, vector_size);
+        } else
+            vectors_[slot] = span_bytes_t{(byte_t*)vector_data, vector_size};
+    }
 
-    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
-    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    allocator_at const& node_allocator() const noexcept { return allocator_; }
 
     inline lock_type node_lock(std::size_t i) const noexcept { return std::unique_lock(locks_[i]); }
 
     // serialization
 
     template <typename output_callback_at, typename vectors_metadata_at>
-    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t,
-                                                  std::uint64_t, //
+    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes,
+                                                  std::uint64_t node_count, //
                                                   const vectors_metadata_at& metadata_buffer,
                                                   serialization_config_t config = {}) const {
-        assert(config.exclude_vectors);
-        assert(!config.use_64_bit_dimensions);
-        bool ok = output(metadata_buffer, sizeof(metadata_buffer));
-        assert(ok);
+        expect(!config.use_64_bit_dimensions);
+        expect(output(metadata_buffer, sizeof(metadata_buffer)));
+
+        vector_size_ = vector_size_bytes;
+        node_count_ = node_count;
+        exclude_vectors_ = config.exclude_vectors;
         return {};
     }
+
     template <typename output_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
                                                 progress_at& = {}) const {
-        bool ok = output(&header, sizeof(header));
-        assert(ok);
+        expect(output(&header, sizeof(header)));
+        expect(output(&vector_size_, sizeof(vector_size_)));
+        expect(output(&node_count_, sizeof(node_count_)));
         for (std::size_t i = 0; i != header.size; ++i) {
             node_t node = get_node_at(i);
             level_t level = node.level();
-            ok = output(&level, sizeof(level));
-            assert(ok);
+            expect(output(&level, sizeof(level)));
         }
 
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
             span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
-            ok = output(node_bytes.data(), node_bytes.size());
-            assert(ok);
+            expect(output(node_bytes.data(), node_bytes.size()));
+            if (!exclude_vectors_) {
+                byte_t* vector_bytes = get_vector_at(i);
+                expect(output(vector_bytes, vector_size_));
+            }
         }
         return {};
     }
+
     template <typename input_callback_at, typename vectors_metadata_at>
     serialization_result_t load_vectors_from_stream(input_callback_at& input, //
                                                     vectors_metadata_at& metadata_buffer,
                                                     serialization_config_t config = {}) {
-        assert(config.exclude_vectors);
-        assert(!config.use_64_bit_dimensions);
-        bool ok = input(metadata_buffer, sizeof(metadata_buffer));
-        assert(ok);
+        expect(!config.use_64_bit_dimensions);
+        expect(input(metadata_buffer, sizeof(metadata_buffer)));
+        exclude_vectors_ = config.exclude_vectors;
         return {};
     }
+
     template <typename input_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
                                                   progress_at& = {}) noexcept {
-
-        bool ok = input(&header, sizeof(header));
-        assert(ok);
+        expect(input(&header, sizeof(header)));
+        expect(input(&vector_size_, sizeof(vector_size_)));
+        expect(input(&node_count_, sizeof(node_count_)));
         if (!header.size) {
             reset();
             return {};
         }
         buffer_gt<level_t> levels(header.size);
-        assert(levels);
-        ok = input(levels, header.size * sizeof(level_t));
-        assert(ok);
-
-        ok = reserve(header.size);
-        assert(ok);
+        expect(levels);
+        expect(input(levels, header.size * sizeof(level_t)));
+        expect(reserve(header.size));
 
         // Load the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
             span_bytes_t node_bytes = node_malloc(levels[i]);
-            ok = input(node_bytes.data(), node_bytes.size());
-            assert(ok);
+            expect(input(node_bytes.data(), node_bytes.size()));
             node_store(i, node_t{node_bytes.data()});
+            if (!exclude_vectors_) {
+                byte_t* vector_bytes = allocator_.allocate(vector_size_);
+                expect(input(vector_bytes, vector_size_));
+                set_vector_at(i, vector_bytes, vector_size_, false, false);
+            }
         }
         return {};
     }
+
     template <typename vectors_metadata_at>
     serialization_result_t view_vectors_from_stream(
         memory_mapped_file_t& file, //
                                     //// todo!! document that offset is a reference, or better - do not do it this way
         vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
         reset();
-        assert(config.exclude_vectors);
-        assert(!config.use_64_bit_dimensions);
+        exclude_vectors_ = config.exclude_vectors;
+        expect(!config.use_64_bit_dimensions);
 
-        serialization_result_t result = file.open_if_not();
-        assert(result);
+        expect(bool(file.open_if_not()));
         std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
         offset += sizeof(metadata_buffer);
         return {};
     }
+
     template <typename progress_at = dummy_progress_t>
     serialization_result_t view_nodes_from_stream(memory_mapped_file_t file, index_serialized_header_t& header,
-                                                  std::size_t offset = 0, progress_at& progress = {}) noexcept {
+                                                  std::size_t offset = 0, progress_at& = {}) noexcept {
         serialization_result_t result = file.open_if_not();
         std::memcpy(&header, file.data() + offset, sizeof(header));
+        offset += sizeof(header);
+        std::memcpy(&vector_size_, file.data() + offset, sizeof(vector_size_));
+        offset += sizeof(vector_size_);
+        std::memcpy(&node_count_, file.data() + offset, sizeof(node_count_));
+        offset += sizeof(node_count_);
         if (!header.size) {
             reset();
             return result;
@@ -231,29 +264,28 @@ class std_storage_at {
         config.connectivity_base = header.connectivity_base;
         pre_ = node_t::precompute_(config);
         buffer_gt<std::size_t> offsets(header.size);
-        assert(offsets);
-        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
-        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
+        expect(offsets);
+        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset};
+        offset += sizeof(level_t) * header.size;
+        offsets[0u] = offset;
         for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
-        if (!reserve(header.size)) {
-            reset();
-            return result.failed("Out of memory");
-        }
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]) + vector_size_;
+        expect(reserve(header.size));
 
         // Rapidly address all the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
             node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
-            if (!progress(i + 1, header.size))
-                return result.failed("Terminated by user");
+            set_vector_at(i, (byte_t*)file.data() + offsets[i] + node_size_bytes(i), vector_size_, false, false);
         }
         viewed_file_ = std::move(file);
         return {};
     }
 };
 
-using dummy_std_storage_t = std_storage_at<default_key_t, default_slot_t>;
-ASSERT_VALID_STORAGE(dummy_std_storage_t);
+using default_std_storage_t = std_storage_at<default_key_t, default_slot_t>;
+
+template <typename key_at, typename slot_at> using default_allocator_std_storage_at = std_storage_at<key_at, slot_at>;
+ASSERT_VALID_STORAGE(default_std_storage_t);
 
 } // namespace usearch
 } // namespace unum

From a7cc87ccf34d162b0c93ed3157646b742c33fac9 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 22:55:12 +0000
Subject: [PATCH 64/80] Add storage argument to index_dense_gt as well

---
 cpp/test.cpp                    |  3 +--
 include/usearch/index_dense.hpp | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 07e759170..e678c81b1 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -321,8 +321,7 @@ int main(int, char**) {
                 using slot_t = std::uint32_t;
                 using v2 =
                     storage_v2_at<key_t, slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
-                using std_storage_t =
-                    std_storage_at<key_t, slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
+                using std_storage_t = default_allocator_std_storage_at<key_t, slot_t>;
 
                 test_cosine<v2, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
                 test_cosine<std_storage_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 32d4a98fc..7ecf1f0e5 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -17,7 +17,7 @@
 namespace unum {
 namespace usearch {
 
-template <typename, typename> class index_dense_gt;
+template <typename, typename, typename> class index_dense_gt;
 
 /**
  *  @brief  The "magic" sequence helps infer the type of the file.
@@ -274,6 +274,10 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
 
     return result.failed("Not a dense USearch index!");
 }
+
+using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+using tape_allocator_t = memory_mapping_allocator_gt<64>;
+using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
 /**
  *  @brief  Oversimplified type-punned index for equidimensional vectors
  *          with automatic @b down-casting, hardware-specific @b SIMD metrics,
@@ -290,7 +294,11 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
  *  The second (2.) starts with @b "usearch"-magic-string, used to infer the file type on open.
  *  The third (3.) is implemented by the underlying `index_gt` class.
  */
-template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
+template <typename key_at = default_key_t,                                                //
+          typename compressed_slot_at = default_slot_t,                                   //
+          typename storage_at = storage_v2_at<key_at, compressed_slot_at,                 //
+                                              tape_allocator_t, vectors_tape_allocator_t, //
+                                              dynamic_allocator_t>>                       //
 class index_dense_gt {
   public:
     using vector_key_t = key_at;
@@ -308,14 +316,9 @@ class index_dense_gt {
     using head_result_t = index_dense_head_result_t;
 
     using serialization_config_t = index_dense_serialization_config_t;
-
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    using tape_allocator_t = memory_mapping_allocator_gt<64>;
+    using storage_t = storage_at;
 
   private:
-    using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
-    using storage_t =
-        storage_v2<vector_key_t, compressed_slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.

From d74263a13b323b9cb3be605a271b672e2041b5fa Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 23:11:50 +0000
Subject: [PATCH 65/80] Add a note on later using node_t from storage_, instead
 of re-including it in index_* classes

---
 include/usearch/index.hpp       | 1 +
 include/usearch/index_dense.hpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index ab2f9405e..20dedf8f2 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1793,6 +1793,7 @@ class index_gt {
     using member_ref_t = member_ref_gt<vector_key_t>;
 
     using node_t = node_at<vector_key_t, compressed_slot_t>;
+    // using node_t = typename storage_t::node_t;
 
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 7ecf1f0e5..024cb1ff0 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -305,6 +305,7 @@ class index_dense_gt {
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
+    // using node_t = typename storage_at::node_t;
     using node_t = node_at<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
@@ -442,6 +443,7 @@ class index_dense_gt {
     index_dense_gt(index_dense_gt&& other)
         : config_(std::move(other.config_)),
 
+          // exchange does not work for typed_ when one of its template allocator types is
           typed_(exchange(other.typed_, nullptr)),     //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //

From 69cdbdb4b33c3df2ef95c01c49bb74e6d05fa859 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 23:17:35 +0000
Subject: [PATCH 66/80] Add note on exchange not working with std::allocator
 template argument

---
 include/usearch/index_dense.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 024cb1ff0..604f48f4a 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -444,7 +444,9 @@ class index_dense_gt {
         : config_(std::move(other.config_)),
 
           // exchange does not work for typed_ when one of its template allocator types is
-          typed_(exchange(other.typed_, nullptr)),     //
+          // the std::allocator
+          // todo:: ask-Ashot: not sure why, but this seems to fix it
+          typed_(std::move(other.typed_)),             //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
@@ -584,7 +586,6 @@ class index_dense_gt {
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
     std::size_t memory_usage() const {
-        return                                          //
             typed_->memory_usage(0) +                   //
             storage_.node_allocator().total_wasted() +  //
             storage_.node_allocator().total_reserved(); //

From 96d9c5379bfb4bf0a22e5b2197be2046ccb997a4 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 23:27:48 +0000
Subject: [PATCH 67/80] Add a todo on storage::memory_usage

---
 include/usearch/index_dense.hpp | 8 +++-----
 include/usearch/std_storage.hpp | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 604f48f4a..5900c1804 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -586,11 +586,9 @@ class index_dense_gt {
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
     std::size_t memory_usage() const {
-            typed_->memory_usage(0) +                   //
-            storage_.node_allocator().total_wasted() +  //
-            storage_.node_allocator().total_reserved(); //
-
-        // vectors_tape_allocator_.total_allocated();
+        size_t res = typed_->memory_usage(0);
+        // todo:: add some memory_usage() interface to storage_
+        return res;
     }
 
     static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index 75ade41bf..7f6dfb04f 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -80,7 +80,7 @@ class std_storage_at {
      *      };
      *      using lock_type = dummy_lock;
      */
-    using lock_type = std::unique_lock<std::mutex>;
+    using lock_type = std::unique_lock<std::mutex>;
 
     bool reserve(std::size_t count) {
         if (count < nodes_.size())

From e024fcae8d8a6e5aefd02aabf055054ac3f4fcdc Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Mon, 8 Jan 2024 23:28:22 +0000
Subject: [PATCH 68/80] Rename storage_v2 -> storage_v2_at

---
 include/usearch/storage.hpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 3b87d4f65..a835e4736 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -193,6 +193,11 @@ using serialization_config_t = index_dense_serialization_config_t;
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool());                                                \
                                                                                                                        \
+    /*Setters*/                                                                                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, set_vector_at,                                                                       \
+                        void(std::size_t idx, const byte_t* vector_data, std::size_t vector_bytes, bool copy_vector,   \
+                             bool reuse_node));                                                                        \
+    /*Container methods */                                                                                             \
     ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void());                                                             \
@@ -300,7 +305,7 @@ template <typename key_at, typename compressed_slot_at,           //
           typename tape_allocator_at = std::allocator<byte_t>,    //
           typename vectors_allocator_at = tape_allocator_at,      //
           typename dynamic_allocator_at = std::allocator<byte_t>> //
-class storage_v2 {
+class storage_v2_at {
   public:
     using node_t = node_at<key_at, compressed_slot_at>;
 
@@ -347,7 +352,7 @@ class storage_v2 {
     };
 
   public:
-    storage_v2(index_config_t config, tape_allocator_at tape_allocator = {})
+    storage_v2_at(index_config_t config, tape_allocator_at tape_allocator = {})
         : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
 
     inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
@@ -378,7 +383,7 @@ class storage_v2 {
         if (!new_mutexes || !new_nodes)
             return false;
         if (nodes_)
-            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
+            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * nodes_.size());
 
         nodes_mutexes_ = std::move(new_mutexes);
         nodes_ = std::move(new_nodes);
@@ -455,7 +460,6 @@ class storage_v2 {
     // }
 
     void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
-    inline size_t size() const noexcept { return nodes_.size(); }
     tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
     // dummy lock just to satisfy the interface
     constexpr inline lock_type node_lock(std::size_t slot) const noexcept {
@@ -800,7 +804,7 @@ class storage_v2 {
 #pragma endregion
 };
 
-using dummy_storage = storage_v2<default_key_t, default_slot_t>;
+using dummy_storage = storage_v2_at<default_key_t, default_slot_t>;
 
 ASSERT_VALID_STORAGE(dummy_storage);
 

From 9b979bd3325b7b07723dc7b5e3b18fd07e3c226f Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 00:05:46 +0000
Subject: [PATCH 69/80] Add a note on index_* not being movable

---
 include/usearch/index.hpp       | 5 ++---
 include/usearch/index_dense.hpp | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 20dedf8f2..9ea0d2c4a 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2103,10 +2103,9 @@ class index_gt {
         std::swap(pre_, other.pre_);
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
-        assert(false);
-        // std::swap(nodes_, other.nodes_);
-        // std::swap(nodes_mutexes_, other.nodes_mutexes_);
         std::swap(contexts_, other.contexts_);
+        // not movable because of storage_t& reference-member
+        assert(false);
 
         // Non-atomic parts.
         std::size_t capacity_copy = nodes_capacity_;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 5900c1804..fa5ccf2fd 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -480,6 +480,7 @@ class index_dense_gt {
         std::swap(slot_lookup_, other.slot_lookup_);
         std::swap(free_keys_, other.free_keys_);
         std::swap(free_key_, other.free_key_);
+        // not movable because of storage_t& reference-member
         assert(false);
     }
 

From 6e112587e448e85aafde4ed5eb37fb9fa2d46035 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 00:11:36 +0000
Subject: [PATCH 70/80] Rename view sub-interfaces

---
 include/usearch/index.hpp       |  2 +-
 include/usearch/index_dense.hpp |  2 +-
 include/usearch/std_storage.hpp |  6 +++---
 include/usearch/storage.hpp     | 10 +++++-----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 9ea0d2c4a..93362c92c 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2885,7 +2885,7 @@ class index_gt {
         // storage_ may already have some relevant stuff...
         serialization_result_t result;
         index_serialized_header_t header;
-        result = storage_.view_nodes_from_stream(std::move(file), header, offset, progress);
+        result = storage_.view_nodes_from_file(std::move(file), header, offset, progress);
         if (!result)
             return result;
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index fa5ccf2fd..43c1bc5ce 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -901,7 +901,7 @@ class index_dense_gt {
         serialization_result_t result;
         // Note that buffer and offset are passed by reference
         index_dense_head_buffer_t buffer;
-        result = storage_.view_vectors_from_stream(file, buffer, offset, config);
+        result = storage_.view_vectors_from_file(file, buffer, offset, config);
         if (!result)
             return result;
         // Load metadata and choose the right metric
diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index 7f6dfb04f..6093ca8ae 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -231,7 +231,7 @@ class std_storage_at {
     }
 
     template <typename vectors_metadata_at>
-    serialization_result_t view_vectors_from_stream(
+    serialization_result_t view_vectors_from_file(
         memory_mapped_file_t& file, //
                                     //// todo!! document that offset is a reference, or better - do not do it this way
         vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
@@ -246,8 +246,8 @@ class std_storage_at {
     }
 
     template <typename progress_at = dummy_progress_t>
-    serialization_result_t view_nodes_from_stream(memory_mapped_file_t file, index_serialized_header_t& header,
-                                                  std::size_t offset = 0, progress_at& = {}) noexcept {
+    serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header,
+                                                std::size_t offset = 0, progress_at& = {}) noexcept {
         serialization_result_t result = file.open_if_not();
         std::memcpy(&header, file.data() + offset, sizeof(header));
         offset += sizeof(header);
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index a835e4736..f091c8948 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -219,11 +219,11 @@ using serialization_config_t = index_dense_serialization_config_t;
                                                dummy_progress_t& progress));                                           \
                                                                                                                        \
     /* View from file API*/                                                                                            \
-    ASSERT_HAS_FUNCTION(CHECK_AT, view_vectors_from_stream,                                                            \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_vectors_from_file,                                                              \
                         serialization_result_t(memory_mapped_file_t& file,                                             \
                                                dummy_vectors_metadata_buffer_t& metadata_buffer, std::size_t& offset,  \
                                                serialization_config_t config));                                        \
-    ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_stream,                                                              \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_file,                                                                \
                         serialization_result_t(memory_mapped_file_t file, index_serialized_header_t& metadata_buffer,  \
                                                std::size_t& offset, dummy_progress_t& progress));                      \
     static_assert(true, "this is to require a semicolon at the end of macro call")
@@ -675,7 +675,7 @@ class storage_v2_at {
      *  @return Outcome descriptor explicitly convertible to boolean.
      */
     template <typename vectors_metadata_at>
-    serialization_result_t view_vectors_from_stream(
+    serialization_result_t view_vectors_from_file(
         memory_mapped_file_t& file, //
                                     //// todo!! document that offset is a reference, or better - do not do it this way
         vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
@@ -738,8 +738,8 @@ class storage_v2_at {
      *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
      */
     template <typename progress_at = dummy_progress_t>
-    serialization_result_t view_nodes_from_stream(memory_mapped_file_t file, index_serialized_header_t& header,
-                                                  std::size_t offset = 0, progress_at& progress = {}) noexcept {
+    serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header,
+                                                std::size_t offset = 0, progress_at& progress = {}) noexcept {
 
         serialization_result_t result = file.open_if_not();
         if (!result)

From ea5cdbbeae5416095f0c4fe448732812362d1af7 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 00:29:46 +0000
Subject: [PATCH 71/80] Add setters to the enforced storage API

---
 include/usearch/std_storage.hpp |  5 +++--
 include/usearch/storage.hpp     | 21 ++++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index 6093ca8ae..bcd996774 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -34,11 +34,12 @@ namespace usearch {
 template <typename key_at, typename compressed_slot_at, typename allocator_at = std::allocator<byte_t>> //
 class std_storage_at {
   public:
-    using node_t = node_at<key_at, compressed_slot_at>;
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
+    using span_bytes_t = span_gt<byte_t>;
 
   private:
     using nodes_t = std::vector<node_t>;
-    using span_bytes_t = span_gt<byte_t>;
     using vectors_t = std::vector<span_bytes_t>;
 
     nodes_t nodes_{};
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index f091c8948..b4454e26c 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -187,20 +187,25 @@ using serialization_config_t = index_dense_serialization_config_t;
  *implementation takes a copy
  **/
 #define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
-    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                               \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                      \
     ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
     ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool());                                                \
                                                                                                                        \
-    /*Setters*/                                                                                                        \
-    ASSERT_HAS_FUNCTION(CHECK_AT, set_vector_at,                                                                       \
-                        void(std::size_t idx, const byte_t* vector_data, std::size_t vector_bytes, bool copy_vector,   \
-                             bool reuse_node));                                                                        \
     /*Container methods */                                                                                             \
     ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
     ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void());                                                             \
+    /*Setters*/                                                                                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_malloc, CHECK_AT::span_bytes_t(level_t level));                                 \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_free, void(std::size_t slot, CHECK_AT::node_t node));                           \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_make, CHECK_AT::node_t(CHECK_AT::key_t key, level_t level));                    \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_store, void(std::size_t slot, CHECK_AT::node_t node));                          \
+    ASSERT_HAS_FUNCTION(CHECK_AT, set_vector_at,                                                                       \
+                        void(std::size_t idx, const byte_t* vector_data, std::size_t vector_bytes, bool copy_vector,   \
+                             bool reuse_node));                                                                        \
+    /*Locking*/                                                                                                        \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
     /*Save/Restore API enforcement*/                                                                                   \
     ASSERT_HAS_FUNCTION(CHECK_AT, save_vectors_to_stream,                                                              \
                         serialization_result_t(                                                                        \
@@ -238,7 +243,8 @@ template <typename key_at, typename compressed_slot_at, //
           typename dynamic_allocator_at>                //
 class storage_interface {
   public:
-    using node_t = node_at<key_at, compressed_slot_at>;
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
     // storage_interface(index_config_t conig, tape_allocator_at allocator = {});
 
     struct lock_type;
@@ -307,7 +313,8 @@ template <typename key_at, typename compressed_slot_at,           //
           typename dynamic_allocator_at = std::allocator<byte_t>> //
 class storage_v2_at {
   public:
-    using node_t = node_at<key_at, compressed_slot_at>;
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
 
   private:
     using nodes_mutexes_t = bitset_gt<dynamic_allocator_at>;

From a74543994501a9f19c012b8328efeadab714a1aa Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 01:29:45 +0000
Subject: [PATCH 72/80] Fix std storage commentdoc

---
 include/usearch/std_storage.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index bcd996774..8e4476f01 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -25,9 +25,8 @@ namespace usearch {
  *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
  *      Which makes the most sense for 4B+ entry indexes.
  *
- *  @tparam tape_allocator_at
+ *  @tparam allocator_at
  *      Potentially different memory allocator for primary allocations of nodes and vectors.
- *      It would never `deallocate` separate entries, and would only free all the space at once.
  *      The allocated buffers may be uninitialized.
  *
  **/

From de89cef7bfdaea1a45753a8047d0047219faf428 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 02:38:55 +0000
Subject: [PATCH 73/80] Cleanup and improve storage naming in test.cpp

---
 cpp/test.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index e678c81b1..61049c7b4 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -163,7 +163,6 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
-    // using index_storage_t = storage_proxy_t<vector_key_t, slot_t>;
     using index_typed_t = index_gt<storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
@@ -199,9 +198,6 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         std::printf("- templates with connectivity %zu \n", connectivity);
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
-        std::vector<node_at<vector_key_t, slot_t>> nodes;
-        bitset_gt nodes_mutexes;
-        // index_storage_t storage{&nodes, &nodes_mutexes, config};
         storage_t storage{config};
         index_typed_t index_typed(storage, config);
         test_cosine<false>(index_typed, matrix, metric);
@@ -319,21 +315,20 @@ int main(int, char**) {
             using key_t = std::int64_t;
             {
                 using slot_t = std::uint32_t;
-                using v2 =
-                    storage_v2_at<key_t, slot_t, tape_allocator_t, vectors_tape_allocator_t, dynamic_allocator_t>;
-                using std_storage_t = default_allocator_std_storage_at<key_t, slot_t>;
+                using storage_v2_t = storage_v2_at<key_t, slot_t>;
+                using std_storage_t = std_storage_at<key_t, slot_t>;
 
-                test_cosine<v2, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+                test_cosine<storage_v2_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
                 test_cosine<std_storage_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
             }
             {
                 using slot_t = uint40_t;
-                using v2 = storage_v2_at<key_t, slot_t>;
-                using ss = std_storage_at<key_t, slot_t>;
+                using storage_v2_t = storage_v2_at<key_t, slot_t>;
+                using std_storage_t = std_storage_at<key_t, slot_t>;
 
                 std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
-                test_cosine<v2, float, key_t, slot_t>(collection_size, dimensions);
-                test_cosine<ss, float, key_t, slot_t>(collection_size, dimensions);
+                test_cosine<storage_v2_t, float, key_t, slot_t>(collection_size, dimensions);
+                test_cosine<std_storage_t, float, key_t, slot_t>(collection_size, dimensions);
             }
         }
 

From 72d68a01b30ba7de8c2da8ca960de4c444a99f5e Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 02:39:51 +0000
Subject: [PATCH 74/80] Bugfix: make sure default storage types allocate memory
 with proper alignment

---
 include/usearch/index_dense.hpp | 11 +++--------
 include/usearch/std_storage.hpp |  6 +++---
 include/usearch/storage.hpp     | 12 ++++++++----
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 43c1bc5ce..1d43dd9de 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -275,9 +275,6 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
     return result.failed("Not a dense USearch index!");
 }
 
-using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-using tape_allocator_t = memory_mapping_allocator_gt<64>;
-using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
 /**
  *  @brief  Oversimplified type-punned index for equidimensional vectors
  *          with automatic @b down-casting, hardware-specific @b SIMD metrics,
@@ -294,11 +291,9 @@ using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
  *  The second (2.) starts with @b "usearch"-magic-string, used to infer the file type on open.
  *  The third (3.) is implemented by the underlying `index_gt` class.
  */
-template <typename key_at = default_key_t,                                                //
-          typename compressed_slot_at = default_slot_t,                                   //
-          typename storage_at = storage_v2_at<key_at, compressed_slot_at,                 //
-                                              tape_allocator_t, vectors_tape_allocator_t, //
-                                              dynamic_allocator_t>>                       //
+template <typename key_at = default_key_t,                                 //
+          typename compressed_slot_at = default_slot_t,                    //
+          typename storage_at = storage_v2_at<key_at, compressed_slot_at>> //
 class index_dense_gt {
   public:
     using vector_key_t = key_at;
diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index 8e4476f01..fefd1aca4 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -28,9 +28,11 @@ namespace usearch {
  *  @tparam allocator_at
  *      Potentially different memory allocator for primary allocations of nodes and vectors.
  *      The allocated buffers may be uninitialized.
+ *      Note that we are using a memory aaligned allocator in place of std::allocator<byte_t>
+ *      Because of scalar_t memory requirements in index_*
  *
  **/
-template <typename key_at, typename compressed_slot_at, typename allocator_at = std::allocator<byte_t>> //
+template <typename key_at, typename compressed_slot_at, typename allocator_at = aligned_allocator_gt<byte_t, 64>> //
 class std_storage_at {
   public:
     using key_t = key_at;
@@ -283,8 +285,6 @@ class std_storage_at {
 };
 
 using default_std_storage_t = std_storage_at<default_key_t, default_slot_t>;
-
-template <typename key_at, typename slot_at> using default_allocator_std_storage_at = std_storage_at<key_at, slot_at>;
 ASSERT_VALID_STORAGE(default_std_storage_t);
 
 } // namespace usearch
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index b4454e26c..910b26a0e 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -281,6 +281,10 @@ class storage_interface {
     std::size_t memory_usage();
 };
 
+/*Default allocators for storage_v2 */
+using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+using tape_allocator_t = memory_mapping_allocator_gt<64>;
+using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
 /**
  * @brief   Storage abstraction for HNSW graph and associated vector data
  *
@@ -307,10 +311,10 @@ class storage_interface {
  * I disabled inheritence for now as interface compatibility is more
  * thoroughly enforced via the macros at the beginning of this file
  **/
-template <typename key_at, typename compressed_slot_at,           //
-          typename tape_allocator_at = std::allocator<byte_t>,    //
-          typename vectors_allocator_at = tape_allocator_at,      //
-          typename dynamic_allocator_at = std::allocator<byte_t>> //
+template <typename key_at, typename compressed_slot_at,             //
+          typename tape_allocator_at = tape_allocator_t,            //
+          typename vectors_allocator_at = vectors_tape_allocator_t, //
+          typename dynamic_allocator_at = dynamic_allocator_t>      //
 class storage_v2_at {
   public:
     using key_t = key_at;

From 87ab0731c737f3f89fd68ae5a7a051c39be03122 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 05:23:19 +0000
Subject: [PATCH 75/80] Add proper alignment for std_storage so UBsan will not
 complain from view()

---
 include/usearch/std_storage.hpp | 60 ++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
index fefd1aca4..a1f7c0443 100644
--- a/include/usearch/std_storage.hpp
+++ b/include/usearch/std_storage.hpp
@@ -57,12 +57,25 @@ class std_storage_at {
     mutable size_t vector_size_{};
     // defaulted to true because that is what test.cpp assumes when using this storage directly
     mutable bool exclude_vectors_ = true;
+    // used to maintain proper alignment in stored indexes to make sure view() does not result in misaligned accesses
+    mutable size_t file_offset_{};
 
     // used in place of error handling throughout the class
     static void expect(bool must_be_true) {
         if (!must_be_true)
             throw std::runtime_error("Failed!");
     }
+    // padding buffer, some prefix of which will be used every time we need padding in the serialization
+    // of the index.
+    // Rest of the array will be zeros but we will also never need paddings that large
+    // The pattern is to help in debugging
+    constexpr static byte_t padding_buffer[64] = {0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42};
+
+    template <typename A, typename T> size_t align(T v) const {
+        return (sizeof(A) - (size_t)v % sizeof(A)) % sizeof(A);
+    }
+
+    template <typename T> size_t align4(T v) const { return align<float>(v); }
 
   public:
     std_storage_at(index_config_t config, allocator_at allocator = {})
@@ -163,6 +176,7 @@ class std_storage_at {
         expect(!config.use_64_bit_dimensions);
         expect(output(metadata_buffer, sizeof(metadata_buffer)));
 
+        file_offset_ = sizeof(metadata_buffer);
         vector_size_ = vector_size_bytes;
         node_count_ = node_count;
         exclude_vectors_ = config.exclude_vectors;
@@ -175,19 +189,29 @@ class std_storage_at {
         expect(output(&header, sizeof(header)));
         expect(output(&vector_size_, sizeof(vector_size_)));
         expect(output(&node_count_, sizeof(node_count_)));
+        file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_);
+        // Save node levels, for offset calculation
         for (std::size_t i = 0; i != header.size; ++i) {
             node_t node = get_node_at(i);
             level_t level = node.level();
             expect(output(&level, sizeof(level)));
         }
 
+        file_offset_ += header.size * sizeof(level_t);
+
         // After that dump the nodes themselves
         for (std::size_t i = 0; i != header.size; ++i) {
             span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
             expect(output(node_bytes.data(), node_bytes.size()));
+            file_offset_ += node_bytes.size();
             if (!exclude_vectors_) {
+                // add padding for proper alignment
+                int16_t padding_size = align4(file_offset_);
+                expect(output(&padding_buffer, padding_size));
+                file_offset_ += padding_size;
                 byte_t* vector_bytes = get_vector_at(i);
                 expect(output(vector_bytes, vector_size_));
+                file_offset_ += vector_size_;
             }
         }
         return {};
@@ -199,6 +223,7 @@ class std_storage_at {
                                                     serialization_config_t config = {}) {
         expect(!config.use_64_bit_dimensions);
         expect(input(metadata_buffer, sizeof(metadata_buffer)));
+        file_offset_ = sizeof(metadata_buffer);
         exclude_vectors_ = config.exclude_vectors;
         return {};
     }
@@ -206,9 +231,11 @@ class std_storage_at {
     template <typename input_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
                                                   progress_at& = {}) noexcept {
+        byte_t in_padding_buffer[64] = {0};
         expect(input(&header, sizeof(header)));
         expect(input(&vector_size_, sizeof(vector_size_)));
         expect(input(&node_count_, sizeof(node_count_)));
+        file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_);
         if (!header.size) {
             reset();
             return {};
@@ -218,14 +245,21 @@ class std_storage_at {
         expect(input(levels, header.size * sizeof(level_t)));
         expect(reserve(header.size));
 
+        file_offset_ += header.size * sizeof(level_t);
         // Load the nodes
         for (std::size_t i = 0; i != header.size; ++i) {
             span_bytes_t node_bytes = node_malloc(levels[i]);
             expect(input(node_bytes.data(), node_bytes.size()));
+            file_offset_ += node_bytes.size();
             node_store(i, node_t{node_bytes.data()});
             if (!exclude_vectors_) {
+                int16_t padding_size = align4(file_offset_);
+                expect(input(&in_padding_buffer, padding_size));
+                file_offset_ += padding_size;
+                expect(std::memcmp(in_padding_buffer, padding_buffer, padding_size) == 0);
                 byte_t* vector_bytes = allocator_.allocate(vector_size_);
                 expect(input(vector_bytes, vector_size_));
+                file_offset_ += vector_size_;
                 set_vector_at(i, vector_bytes, vector_size_, false, false);
             }
         }
@@ -243,6 +277,7 @@ class std_storage_at {
 
         expect(bool(file.open_if_not()));
         std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
+        file_offset_ = sizeof(metadata_buffer);
         offset += sizeof(metadata_buffer);
         return {};
     }
@@ -270,14 +305,31 @@ class std_storage_at {
         misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset};
         offset += sizeof(level_t) * header.size;
         offsets[0u] = offset;
-        for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]) + vector_size_;
+        for (std::size_t i = 1; i < header.size; ++i) {
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
+            if (!exclude_vectors_) {
+                // add room for vector alignment
+                offsets[i] += align4(offsets[i]);
+                offsets[i] += vector_size_;
+            }
+        }
         expect(reserve(header.size));
 
-        // Rapidly address all the nodes
+        // Rapidly address all the nodes and vectors
         for (std::size_t i = 0; i != header.size; ++i) {
             node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
-            set_vector_at(i, (byte_t*)file.data() + offsets[i] + node_size_bytes(i), vector_size_, false, false);
+            expect(node_size_bytes(i) == node_t::node_size_bytes(pre_, levels[i]));
+
+            if (!exclude_vectors_) {
+                size_t vector_offset = offsets[i] + node_size_bytes(i);
+                expect(std::memcmp((byte_t*)file.data() + vector_offset, padding_buffer, align4(vector_offset)) == 0);
+                vector_offset += align4(vector_offset);
+
+                // expect proper alignment
+                expect(align4(vector_offset) == 0);
+                expect(align4((byte_t*)file.data() + vector_offset) == 0);
+                set_vector_at(i, (byte_t*)file.data() + vector_offset, vector_size_, false, false);
+            }
         }
         viewed_file_ = std::move(file);
         return {};

From b2a3d3adec972536d52b2e36bdcf86192d8fdcbd Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Tue, 9 Jan 2024 05:32:31 +0000
Subject: [PATCH 76/80] Bring back original clang-tidy

---
 .clang-tidy    | 28 ++++++----------------------
 CMakeLists.txt |  1 -
 2 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 990b9503d..00063a7d4 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,9 +1,4 @@
-Checks: '-*,
-         clang-diagnostic-*,
-         clang-analyzer-*,
-	 cppcoreguidelines-*,
-	performance-move-constructor-init,
-	pugprone-*,
+Checks: '*,
          -altera-id-dependent-backward-branch,
          -altera-struct-pack-align,
          -altera-unroll-loops,
@@ -12,7 +7,6 @@ Checks: '-*,
          -cert-err58-cpp,
          -concurrency-mt-unsafe,
          -cppcoreguidelines-avoid-const-or-ref-data-members,
-	 cppcoreguidelines-pro-type-member-init,
          -cppcoreguidelines-avoid-do-while,
          -cppcoreguidelines-avoid-goto,
          -cppcoreguidelines-avoid-magic-numbers,
@@ -29,13 +23,10 @@ Checks: '-*,
          -fuchsia-default-arguments-declarations,
          -fuchsia-overloaded-operator,
          -google-explicit-constructor,
-         -google-readability-braces-around-statements,
-         -google-readability-casting,
-         # -google-readability-function-size,
+         -google-readability-function-size,
          -google-runtime-int,
          -google-runtime-references,
          -hicpp-avoid-goto,
-	-hicpp-braces-around-statements,
          -hicpp-explicit-conversions,
          -hicpp-function-size,
          -hicpp-no-array-decay,
@@ -44,6 +35,7 @@ Checks: '-*,
          -hicpp-uppercase-literal-suffix,
          -llvm-header-guard,
          -llvm-include-order,
+         -llvmlibc-*,
          -misc-use-anonymous-namespace,
          -misc-confusable-identifiers,
          -misc-no-recursion,
@@ -52,25 +44,17 @@ Checks: '-*,
          -modernize-type-traits,
          -modernize-use-nodiscard,
          -modernize-use-trailing-return-type,
-         -readability-isolate-declaration,
-	-readability-braces-around-statements,
          -readability-function-cognitive-complexity,
          -readability-function-size,
          -readability-identifier-length,
          -readability-magic-numbers,
          -readability-redundant-access-specifiers,
          -readability-simplify-boolean-expr,
-	-readability-braces-around-statements,
-         -readability-uppercase-literal-suffix,
-         -llvmlibc-*'
+         -readability-uppercase-literal-suffix'
+
 CheckOptions:
   - key: hicpp-special-member-functions.AllowSoleDefaultDtor
     value: 1
-  # -google-readability-braces-around-statements does not require this to suppress, for some reason the others do
-  - key: hicpp-braces-around-statements.ShortStatementLines
-    value: 20
-  - key: readability-braces-around-statements.ShortStatementLines
-    value: 20
-
 
+WarningsAsErrors: '*'
 HeaderFilterRegex: '.*hpp$'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5e664757..0a3deb6da 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -153,7 +153,6 @@ function (setup_target TARGET_NAME)
                     >
                     -ffast-math
                     -fPIC
-		    -fpermissive
                     -Wall
                     -Wextra
                     -Wno-conversion

From c504e18105d3c7330b784298b0e82382e261ae7b Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Thu, 11 Jan 2024 05:09:54 +0000
Subject: [PATCH 77/80] Update vectors allocator back to default since custom
 one has compile issues under some conditions

---
 include/usearch/storage.hpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
index 910b26a0e..30f82f35f 100644
--- a/include/usearch/storage.hpp
+++ b/include/usearch/storage.hpp
@@ -331,7 +331,10 @@ class storage_v2_at {
     // todo:: ask-Ashot: in the older version vectors_lookup_ was using the default vector allocator,
     // and not the dynamic_allocator_at that was passed it.
     // Can remove this if the previous approach was intentional
-    using vectors_t = std::vector<byte_t*, vectors_allocator_t>;
+    // Update (Jan 10): It seems giving vectors_allocator_t as vectors_t
+    // allocator below only works when CMAKE_HAVE_LIBC_PTHREAD is false
+    // otherwise, I get a compile error
+    using vectors_t = std::vector<byte_t*>;
 
     /// @brief  C-style array of `node_t` smart-pointers.
     // buffer_gt<node_t, nodes_allocator_t> nodes_{};
@@ -815,9 +818,9 @@ class storage_v2_at {
 #pragma endregion
 };
 
-using dummy_storage = storage_v2_at<default_key_t, default_slot_t>;
+using default_storage_v2_t = storage_v2_at<default_key_t, default_slot_t>;
 
-ASSERT_VALID_STORAGE(dummy_storage);
+ASSERT_VALID_STORAGE(default_storage_v2_t);
 
 } // namespace usearch
 } // namespace unum

From 8dccd68689d80ea898183bf3019ca5dc999c68c2 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 14 Jan 2024 03:27:59 +0000
Subject: [PATCH 78/80] Store storage pointer instead of storage reference in
 typed index to allow move

---
 include/usearch/index.hpp       | 81 ++++++++++++++++++---------------
 include/usearch/index_dense.hpp |  2 +-
 2 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 93362c92c..dcda651aa 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1813,8 +1813,8 @@ class index_gt {
         using pointer = void;
         using reference = ref_t;
 
-        reference operator*() const noexcept { return {index_->storage_.get_node_at(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->storage_.get_node_at(slot_).key(); }
+        reference operator*() const noexcept { return {index_->storage_->get_node_at(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->storage_->get_node_at(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -1951,7 +1951,8 @@ class index_gt {
     };
 
     // todo:: do I have to init this?
-    storage_t& storage_;
+    // A: Yes! matters a lot in move constructors!!
+    storage_t* storage_{};
     index_config_t config_{};
     index_limits_t limits_{};
 
@@ -1987,14 +1988,14 @@ class index_gt {
     std::size_t max_level() const noexcept { return nodes_count_ ? static_cast<std::size_t>(max_level_) : 0; }
     index_config_t const& config() const noexcept { return config_; }
     index_limits_t const& limits() const noexcept { return limits_; }
-    bool is_immutable() const noexcept { return storage_.is_immutable(); }
+    bool is_immutable() const noexcept { return storage_->is_immutable(); }
 
     /**
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
     explicit index_gt(      //
-        storage_t& storage, //
+        storage_t* storage, //
         index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept
         : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
           pre_(node_t::precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
@@ -2004,7 +2005,10 @@ class index_gt {
      */
     index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_}; }
 
-    ~index_gt() noexcept { reset(); }
+    ~index_gt() noexcept {
+        reset();
+        storage_ = nullptr;
+    }
 
     index_gt(index_gt&& other) noexcept { swap(other); }
 
@@ -2054,8 +2058,8 @@ class index_gt {
     member_iterator_t begin() noexcept { return {this, 0}; }
     member_iterator_t end() noexcept { return {this, size()}; }
 
-    member_ref_t at(std::size_t slot) noexcept { return {storage_.get_node_at(slot).key(), slot}; }
-    member_cref_t at(std::size_t slot) const noexcept { return {storage_.get_node_at(slot).ckey(), slot}; }
+    member_ref_t at(std::size_t slot) noexcept { return {storage_->get_node_at(slot).key(), slot}; }
+    member_cref_t at(std::size_t slot) const noexcept { return {storage_->get_node_at(slot).ckey(), slot}; }
     member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; }
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
@@ -2070,7 +2074,8 @@ class index_gt {
      *  Will keep the number of available threads/contexts the same as it was.
      */
     void clear() noexcept {
-        storage_.clear();
+        if (storage_)
+            storage_->clear();
 
         nodes_count_ = 0;
         max_level_ = -1;
@@ -2087,7 +2092,8 @@ class index_gt {
     void reset() noexcept {
         clear();
 
-        storage_.reset();
+        if (storage_)
+            storage_->reset();
         contexts_ = {};
         limits_ = index_limits_t{0, 0};
         nodes_capacity_ = 0;
@@ -2127,7 +2133,7 @@ class index_gt {
             && limits.members <= limits_.members)
             return true;
 
-        bool storage_reserved = storage_.reserve(limits.members);
+        bool storage_reserved = storage_->reserve(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
         if (!new_contexts || !storage_reserved)
             return false;
@@ -2193,7 +2199,7 @@ class index_gt {
 
         friend class index_gt;
         inline search_result_t(index_gt const& index, top_candidates_t& top) noexcept
-            : storage_(&index.storage_), top_(&top) {}
+            : storage_(index.storage_), top_(&top) {}
 
       public:
         /** @brief  Number of search results found. */
@@ -2347,7 +2353,7 @@ class index_gt {
         }
 
         // Allocate the neighbors
-        node_t node = storage_.node_make(key, target_level);
+        node_t node = storage_->node_make(key, target_level);
         if (!node) {
             nodes_count_.fetch_sub(1);
             return result.failed("Out of memory!");
@@ -2355,11 +2361,11 @@ class index_gt {
         if (target_level <= max_level_copy)
             new_level_lock.unlock();
 
-        storage_.node_store(new_slot, node);
+        storage_->node_store(new_slot, node);
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
-        node_lock_t new_lock = storage_.node_lock(new_slot);
+        node_lock_t new_lock = storage_->node_lock(new_slot);
 
         // Do nothing for the first element
         if (!new_slot) {
@@ -2443,8 +2449,8 @@ class index_gt {
         if (!next.reserve(config.expansion))
             return result.failed("Out of memory!");
 
-        node_lock_t new_lock = storage_.node_lock(old_slot);
-        node_t node = storage_.get_node_at(old_slot);
+        node_lock_t new_lock = storage_->node_lock(old_slot);
+        node_t node = storage_->get_node_at(old_slot);
 
         level_t node_level = node.level();
         span_bytes_t node_bytes = node.node_bytes(pre_);
@@ -2596,14 +2602,14 @@ class index_gt {
         stats_t result{};
 
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.get_node_at(i);
+            node_t node = storage_->get_node_at(i);
             std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
             std::size_t edges = 0;
             for (level_t level = 0; level <= node.level(); ++level)
                 edges += neighbors_(node, level).size();
 
             ++result.nodes;
-            result.allocated_bytes += storage_.node_size_bytes(i);
+            result.allocated_bytes += storage_->node_size_bytes(i);
             result.edges += edges;
             result.max_edges += max_edges;
         }
@@ -2615,7 +2621,7 @@ class index_gt {
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.get_node_at(i);
+            node_t node = storage_->get_node_at(i);
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
 
@@ -2633,7 +2639,7 @@ class index_gt {
 
         std::size_t head_bytes = node_t::head_size_bytes();
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = storage_.get_node_at(i);
+            node_t node = storage_->get_node_at(i);
 
             stats_per_level[0].nodes++;
             stats_per_level[0].edges += neighbors_(node, 0).size();
@@ -2670,7 +2676,7 @@ class index_gt {
      */
     std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept {
         std::size_t total = 0;
-        if (!storage_.is_immutable()) {
+        if (!storage_->is_immutable()) {
             stats_t s = stats();
             total += s.allocated_bytes;
             total += s.nodes * allocator_entry_bytes;
@@ -2696,7 +2702,7 @@ class index_gt {
     std::size_t serialized_length() const noexcept {
         std::size_t neighbors_length = 0;
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_t::node_size_bytes(pre_, storage_.get_node_at(i).level()) + sizeof(level_t);
+            neighbors_length += node_t::node_size_bytes(pre_, storage_->get_node_at(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2716,7 +2722,7 @@ class index_gt {
         header.max_level = max_level_;
         header.entry_slot = entry_slot_;
 
-        return storage_.save_nodes_to_stream(output, header, progress);
+        return storage_->save_nodes_to_stream(output, header, progress);
     }
 
     /**
@@ -2730,7 +2736,7 @@ class index_gt {
 
         // Pull basic metadata
         index_serialized_header_t header;
-        result = storage_.load_nodes_from_stream(input, header, progress);
+        result = storage_->load_nodes_from_stream(input, header, progress);
         if (!result) {
             reset();
             return result;
@@ -2885,7 +2891,7 @@ class index_gt {
         // storage_ may already have some relevant stuff...
         serialization_result_t result;
         index_serialized_header_t header;
-        result = storage_.view_nodes_from_file(std::move(file), header, offset, progress);
+        result = storage_->view_nodes_from_file(std::move(file), header, offset, progress);
         if (!result)
             return result;
 
@@ -2945,7 +2951,7 @@ class index_gt {
     std::size_t connect_new_node_( //
         metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.get_node_at(new_slot);
+        node_t new_node = storage_->get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -2956,7 +2962,8 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= storage_.get_node_at(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage_->get_node_at(top_view[idx].slot).level(),
+                                 "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -2969,7 +2976,7 @@ class index_gt {
         metric_at&& metric, std::size_t new_slot, value_at&& value, level_t level,
         context_t& context) usearch_noexcept_m {
 
-        node_t new_node = storage_.get_node_at(new_slot);
+        node_t new_node = storage_->get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -2978,8 +2985,8 @@ class index_gt {
         for (compressed_slot_t close_slot : new_neighbors) {
             if (close_slot == new_slot)
                 continue;
-            node_lock_t close_lock = storage_.node_lock(close_slot);
-            node_t close_node = storage_.get_node_at(close_slot);
+            node_lock_t close_lock = storage_->node_lock(close_slot);
+            node_t close_node = storage_->get_node_at(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3095,8 +3102,8 @@ class index_gt {
             bool changed;
             do {
                 changed = false;
-                node_lock_t closest_lock = storage_.node_lock(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_.get_node_at(closest_slot), level);
+                node_lock_t closest_lock = storage_->node_lock(closest_slot);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_->get_node_at(closest_slot), level);
 
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
@@ -3161,8 +3168,8 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = storage_.get_node_at(candidate_slot);
-            node_lock_t candidate_lock = storage_.node_lock(candidate_slot);
+            node_t candidate_ref = storage_->get_node_at(candidate_slot);
+            node_lock_t candidate_lock = storage_->node_lock(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
             // Optional prefetching
@@ -3232,7 +3239,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_.get_node_at(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_->get_node_at(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3253,7 +3260,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{storage_.get_node_at(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage_->get_node_at(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 1d43dd9de..71f2de4d1 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -515,7 +515,7 @@ class index_dense_gt {
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
         result.storage_ = storage_t(config);
-        new (raw) index_t(result.storage_, config);
+        new (raw) index_t(&result.storage_, config);
         result.typed_ = raw;
         return result;
     }

From cae72103f03c14367036ecb61ff523fcc160c964 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 14 Jan 2024 03:38:53 +0000
Subject: [PATCH 79/80] Add (quite questionable) move constructors

---
 cpp/test.cpp                    |  3 ++-
 include/usearch/index.hpp       |  8 ++++++--
 include/usearch/index_dense.hpp | 22 ++++++++++++----------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 61049c7b4..7a93cbfc4 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -199,7 +199,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
         storage_t storage{config};
-        index_typed_t index_typed(storage, config);
+        index_typed_t index_typed_tmp(&storage, config);
+        index_typed_t index_typed = std::move(index_typed_tmp);
         test_cosine<false>(index_typed, matrix, metric);
     }
 
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index dcda651aa..7817fbe09 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2099,10 +2099,16 @@ class index_gt {
         nodes_capacity_ = 0;
     }
 
+    /**
+     * @brief replace internal storage pointer with the new one
+     */
+    void reset_storage(storage_t* storage) { storage_ = storage; }
+
     /**
      *  @brief  Swaps the underlying memory buffers and thread contexts.
      */
     void swap(index_gt& other) noexcept {
+        std::swap(storage_, other.storage_);
         std::swap(config_, other.config_);
         std::swap(limits_, other.limits_);
         std::swap(dynamic_allocator_, other.dynamic_allocator_);
@@ -2110,8 +2116,6 @@ class index_gt {
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
         std::swap(contexts_, other.contexts_);
-        // not movable because of storage_t& reference-member
-        assert(false);
 
         // Non-atomic parts.
         std::size_t capacity_copy = nodes_capacity_;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 71f2de4d1..2f8e266ad 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -436,22 +436,20 @@ class index_dense_gt {
 
     index_dense_gt() = default;
     index_dense_gt(index_dense_gt&& other)
-        : config_(std::move(other.config_)),
-
-          // exchange does not work for typed_ when one of its template allocator types is
-          // the std::allocator
-          // todo:: ask-Ashot: not sure why, but this seems to fix it
-          typed_(std::move(other.typed_)),             //
+        : config_(std::move(other.config_)),           //
+          typed_(exchange(other.typed_, nullptr)),     //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
+          storage_(std::move(other.storage_)),         //
 
           available_threads_(std::move(other.available_threads_)), //
           slot_lookup_(std::move(other.slot_lookup_)),             //
           free_keys_(std::move(other.free_keys_)),                 //
           free_key_(std::move(other.free_key_)) {
-
-        assert(false);
+        // Could do this in the _proxy pattern to void this
+        // The problem will also go away if/when we make typed_ not do any allocations
+        typed_->reset_storage(&storage_);
     } //
 
     index_dense_gt& operator=(index_dense_gt&& other) {
@@ -470,13 +468,17 @@ class index_dense_gt {
         std::swap(cast_buffer_, other.cast_buffer_);
         std::swap(casts_, other.casts_);
         std::swap(metric_, other.metric_);
+        std::swap(storage_, other.storage_);
 
         std::swap(available_threads_, other.available_threads_);
         std::swap(slot_lookup_, other.slot_lookup_);
         std::swap(free_keys_, other.free_keys_);
         std::swap(free_key_, other.free_key_);
-        // not movable because of storage_t& reference-member
-        assert(false);
+        // Could do this in the _proxy pattern to void this
+        // The problem will also go away if/when we make typed_ not do any allocations
+        typed_->reset_storage(&storage_);
+        if (other.typed_)
+            other.typed_->reset_storage(&other.storage_);
     }
 
     ~index_dense_gt() {

From ec3ed826be9904d06da60bd2d052e452b1b49d69 Mon Sep 17 00:00:00 2001
From: Narek Galstyan <narekg@berkeley.edu>
Date: Sun, 14 Jan 2024 04:27:00 +0000
Subject: [PATCH 80/80] Initialize skiped usearch opt opts.multi

---
 c/test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/c/test.c b/c/test.c
index 96e08212a..d704b1621 100644
--- a/c/test.c
+++ b/c/test.c
@@ -32,6 +32,7 @@ usearch_init_options_t create_options(size_t const dimensions) {
     opts.dimensions = dimensions;
     opts.expansion_add = 40;    // 40 in faiss
     opts.expansion_search = 16; // 10 in faiss
+    opts.multi = false;
     opts.metric_kind = usearch_metric_ip_k;
     opts.metric = NULL;
     opts.quantization = usearch_scalar_f32_k;