diff --git a/CMakeLists.txt b/CMakeLists.txt
index d058470a..0a3deb6d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,18 +63,11 @@ else ()
 endif ()
 
 # Core compilation settings affecting "index.hpp"
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_OPENMP}>>:USEARCH_USE_OPENMP=0>
-)
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_OPENMP=$<BOOL:${USEARCH_USE_OPENMP}>")
 
 # Supplementary compilation settings affecting "index_plugins.hpp"
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_FP16LIB}>>:USEARCH_USE_FP16LIB=1>
-)
-
-target_compile_definitions(
-    ${USEARCH_TARGET_NAME} INTERFACE $<$<NOT:$<BOOL:${USEARCH_USE_SIMSIMD}>>:USEARCH_USE_SIMSIMD=0>
-)
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_FP16LIB=$<BOOL:${USEARCH_USE_FP16LIB}>")
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_SIMSIMD=$<BOOL:${USEARCH_USE_SIMSIMD}>")
 
 target_include_directories(
     ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $<BUILD_INTERFACE:${USEARCH_INCLUDE_BUILD_DIR}>
@@ -296,14 +289,12 @@ function (setup_target TARGET_NAME)
     endif ()
 
     # Core compilation settings affecting "index.hpp"
-    target_compile_definitions(${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_OPENMP}>>:USEARCH_USE_OPENMP=0>)
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_OPENMP=$<BOOL:${USEARCH_USE_OPENMP}>")
 
     # Supplementary compilation settings affecting "index_plugins.hpp"
-    target_compile_definitions(
-        ${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_FP16LIB}>>:USEARCH_USE_FP16LIB=1>
-    )
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_FP16LIB=$<BOOL:${USEARCH_USE_FP16LIB}>")
+    target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_SIMSIMD=$<BOOL:${USEARCH_USE_SIMSIMD}>")
 
-    target_compile_definitions(${TARGET_NAME} PRIVATE $<$<NOT:$<BOOL:${USEARCH_USE_SIMSIMD}>>:USEARCH_USE_SIMSIMD=0>)
 
 endfunction ()
 
diff --git a/c/test.c b/c/test.c
index 96e08212..d704b162 100644
--- a/c/test.c
+++ b/c/test.c
@@ -32,6 +32,7 @@ usearch_init_options_t create_options(size_t const dimensions) {
     opts.dimensions = dimensions;
     opts.expansion_add = 40;    // 40 in faiss
     opts.expansion_search = 16; // 10 in faiss
+    opts.multi = false;
     opts.metric_kind = usearch_metric_ip_k;
     opts.metric = NULL;
     opts.quantization = usearch_scalar_f32_k;
diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index 4b3efe60..fbf70f90 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -42,8 +42,6 @@
 #include <omp.h> // `omp_set_num_threads()`
 #endif
 
-#include <simsimd/simsimd.h>
-
 #include <usearch/index_dense.hpp>
 
 using namespace unum::usearch;
@@ -615,4 +613,4 @@ int main(int argc, char** argv) {
         run_punned<index_dense_gt<default_key_t, std::uint32_t>>(dataset, args, config, limits);
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/cpp/test.cpp b/cpp/test.cpp
index aa9b0c56..7a93cbfc 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -5,12 +5,12 @@
 #include <cassert>   // `assert`
 #include <random>    // `std::default_random_engine`
 #include <stdexcept>
-#include <unordered_map>
 #include <vector> // for std::vector
 
 #include <usearch/index.hpp>
 #include <usearch/index_dense.hpp>
 #include <usearch/index_plugins.hpp>
+#include <usearch/std_storage.hpp>
 
 using namespace unum::usearch;
 using namespace unum;
@@ -77,7 +77,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     expect((index.stats(0).nodes == 3));
 
     // Check if clustering endpoint compiles
-    index.cluster(vector_first, 0, args...);
+    // index.cluster(vector_first, 0, args...);
 
     // Try removals and replacements
     if constexpr (punned_ak) {
@@ -141,8 +141,8 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
         index.get(key_second, vec_recovered_from_view.data());
         expect(std::equal(vector_second, vector_second + dimensions, vec_recovered_from_view.data()));
 
-        auto compaction_result = index.compact();
-        expect(bool(compaction_result));
+        // auto compaction_result = index.compact();
+        // expect(bool(compaction_result));
     }
 
     expect(index.memory_usage() > 0);
@@ -155,14 +155,15 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     }
 }
 
-template <typename scalar_at, typename key_at, typename slot_at> //
+template <typename storage_at, typename scalar_at, typename key_at, typename slot_at> //
 void test_cosine(std::size_t collection_size, std::size_t dimensions) {
 
+    using storage_t = storage_at;
     using scalar_t = scalar_at;
     using vector_key_t = key_at;
     using slot_t = slot_at;
 
-    using index_typed_t = index_gt<float, vector_key_t, slot_t>;
+    using index_typed_t = index_gt<storage_t, float, vector_key_t, slot_t>;
     using member_cref_t = typename index_typed_t::member_cref_t;
     using member_citerator_t = typename index_typed_t::member_citerator_t;
 
@@ -197,7 +198,9 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
         std::printf("- templates with connectivity %zu \n", connectivity);
         metric_t metric{&matrix, dimensions};
         index_config_t config(connectivity);
-        index_typed_t index_typed(config);
+        storage_t storage{config};
+        index_typed_t index_typed_tmp(&storage, config);
+        index_typed_t index_typed = std::move(index_typed_tmp);
         test_cosine<false>(index_typed, matrix, metric);
     }
 
@@ -205,7 +208,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     for (bool multi : {false, true}) {
         for (std::size_t connectivity : {3, 13, 50}) {
             std::printf("- punned with connectivity %zu \n", connectivity);
-            using index_t = index_dense_gt<vector_key_t, slot_t>;
+            using index_t = index_dense_gt<vector_key_t, slot_t, storage_t>;
             metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
             index_dense_config_t config(connectivity);
             config.multi = multi;
@@ -310,9 +313,24 @@ int main(int, char**) {
     for (std::size_t collection_size : {10, 500})
         for (std::size_t dimensions : {97, 256}) {
             std::printf("Indexing %zu vectors with cos: <float, std::int64_t, std::uint32_t> \n", collection_size);
-            test_cosine<float, std::int64_t, std::uint32_t>(collection_size, dimensions);
-            std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
-            test_cosine<float, std::int64_t, uint40_t>(collection_size, dimensions);
+            using key_t = std::int64_t;
+            {
+                using slot_t = std::uint32_t;
+                using storage_v2_t = storage_v2_at<key_t, slot_t>;
+                using std_storage_t = std_storage_at<key_t, slot_t>;
+
+                test_cosine<storage_v2_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+                test_cosine<std_storage_t, float, std::int64_t, std::uint32_t>(collection_size, dimensions);
+            }
+            {
+                using slot_t = uint40_t;
+                using storage_v2_t = storage_v2_at<key_t, slot_t>;
+                using std_storage_t = std_storage_at<key_t, slot_t>;
+
+                std::printf("Indexing %zu vectors with cos: <float, std::int64_t, uint40_t> \n", collection_size);
+                test_cosine<storage_v2_t, float, key_t, slot_t>(collection_size, dimensions);
+                test_cosine<std_storage_t, float, key_t, slot_t>(collection_size, dimensions);
+            }
         }
 
     for (std::size_t connectivity : {3, 13, 50})
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index b14f7724..2840c6a8 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -55,7 +55,7 @@ Also worth noting, 8-bit quantization results in almost no quantization loss and
 
 Within this repository you will find two commonly used utilities:
 
-- `cpp/bench.cpp` the produces the `bench` binary for broad USearch benchmarks.
+- `cpp/bench.cpp` the produces the `bench_cpp` binary for broad USearch benchmarks.
 - `python/bench.py` and `python/bench.ipynb` for interactive charts against FAISS.
 
 To achieve best highest results we suggest compiling locally for the target architecture.
@@ -64,17 +64,19 @@ To achieve best highest results we suggest compiling locally for the target arch
 cmake -B ./build_release \
     -DCMAKE_BUILD_TYPE=Release \
     -DUSEARCH_USE_OPENMP=1 \
+    -DUSEARCH_USE_SIMSIMD=1 \
+    -DUSEARCH_USE_FP16LIB=0 \
     -DUSEARCH_USE_JEMALLOC=1 && \
     make -C ./build_release -j
 
-./build_release/bench --help
+./build_release/bench_cpp --help
 ```
 
 Which would print the following instructions.
 
 ```txt
 SYNOPSIS
-        ./build_release/bench [--vectors <path>] [--queries <path>] [--neighbors <path>] [-b] [-j
+        ./build_release/bench_cpp [--vectors <path>] [--queries <path>] [--neighbors <path>] [-b] [-j
                               <integer>] [-c <integer>] [--expansion-add <integer>]
                               [--expansion-search <integer>] [--native|--f16quant|--i8quant]
                               [--ip|--l2sq|--cos|--haversine] [-h]
@@ -115,12 +117,12 @@ OPTIONS
 Here is an example of running the C++ benchmark:
 
 ```sh
-./build_release/bench \
+./build_release/bench_cpp \
     --vectors datasets/wiki_1M/base.1M.fbin \
     --queries datasets/wiki_1M/query.public.100K.fbin \
     --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin
 
-./build_release/bench \
+./build_release/bench_cpp \
     --vectors datasets/t2i_1B/base.1B.fbin \
     --queries datasets/t2i_1B/query.public.100K.fbin \
     --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \
@@ -205,17 +207,17 @@ With `perf`:
 
 ```sh
 # Pass environment variables with `-E`, and `-d` for details
-sudo -E perf stat -d ./build_release/bench ...
-sudo -E perf mem -d ./build_release/bench ...
+sudo -E perf stat -d ./build_release/bench_cpp ...
+sudo -E perf mem -d ./build_release/bench_cpp ...
 # Sample on-CPU functions for the specified command, at 1 Kilo Hertz:
-sudo -E perf record -F 1000 ./build_release/bench ...
-perf record -d -e arm_spe// -- ./build_release/bench ..
+sudo -E perf record -F 1000 ./build_release/bench_cpp ...
+perf record -d -e arm_spe// -- ./build_release/bench_cpp ..
 ```
 
 ### Caches
 
 ```sh
-sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench ...
+sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench_cpp ...
 ```
 
 Typical output on a 1M vectors dataset is:
@@ -242,4 +244,3 @@ sudo sysctl -w vm.nr_hugepages=2048
 sudo reboot
 sudo cat /proc/sys/vm/nr_hugepages
 ```
-
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index e5616c15..7817fbe0 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -76,6 +76,7 @@
 #include <algorithm> // `std::sort_heap`
 #include <atomic>    // `std::atomic`
 #include <bitset>    // `std::bitset`
+#include <cassert>
 #include <climits>   // `CHAR_BIT`
 #include <cmath>     // `std::sqrt`
 #include <cstring>   // `std::memset`
@@ -401,6 +402,8 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     static constexpr std::size_t slots(std::size_t bits) { return divide_round_up<bits_per_slot()>(bits); }
 
     compressed_slot_t* slots_{};
+    /// @brief size - number of bits in the bitset
+    std::size_t size_{};
     /// @brief Number of slots.
     std::size_t count_{};
 
@@ -409,6 +412,7 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     ~bitset_gt() noexcept { reset(); }
 
     explicit operator bool() const noexcept { return slots_; }
+    std::size_t size() const noexcept { return size_; }
     void clear() noexcept {
         if (slots_)
             std::memset(slots_, 0, count_ * sizeof(compressed_slot_t));
@@ -423,18 +427,20 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
 
     bitset_gt(std::size_t capacity) noexcept
         : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))),
-          count_(slots_ ? slots(capacity) : 0u) {
+          size_(slots_ ? capacity : 0u), count_(slots_ ? slots(capacity) : 0u) {
         clear();
     }
 
     bitset_gt(bitset_gt&& other) noexcept {
         slots_ = exchange(other.slots_, nullptr);
         count_ = exchange(other.count_, 0);
+        size_ = exchange(other.size_, 0);
     }
 
     bitset_gt& operator=(bitset_gt&& other) noexcept {
         std::swap(slots_, other.slots_);
         std::swap(count_, other.count_);
+        std::swap(size_, other.size_);
         return *this;
     }
 
@@ -1599,6 +1605,90 @@ template <typename key_at = default_key_t> struct member_ref_gt {
 template <typename key_at> inline std::size_t get_slot(member_ref_gt<key_at> const& m) noexcept { return m.slot; }
 template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m) noexcept { return m.key; }
 
+using level_t = std::int16_t;
+
+struct precomputed_constants_t {
+    double inverse_log_connectivity{};
+    std::size_t neighbors_bytes{};
+    std::size_t neighbors_base_bytes{};
+};
+
+// todo:: this is public, but then we make assumptions which are not communicated via this interface
+// clean these up later
+//
+/**
+ *  @brief  A loosely-structured handle for every node. One such node is created for every member.
+ *          To minimize memory usage and maximize the number of entries per cache-line, it only
+ *          stores to pointers. The internal tape starts with a `vector_key_t` @b key, then
+ *          a `level_t` for the number of graph @b levels in which this member appears,
+ *          then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences
+ *          for @b each-level.
+ */
+template <typename key_at, typename slot_at> class node_at {
+    byte_t* tape_{};
+
+    inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_at node) const noexcept {
+        return node_neighbors_bytes_(pre, node.level());
+    }
+    static inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) noexcept {
+        return pre.neighbors_base_bytes + pre.neighbors_bytes * level;
+    }
+
+  public:
+    using vector_key_t = key_at;
+    using slot_t = slot_at;
+    /**
+     *  @brief  Integer for the number of node neighbors at a specific level of the
+     *          multi-level graph. It's selected to be `std::uint32_t` to improve the
+     *          alignment in most common cases.
+     */
+    using neighbors_count_t = std::uint32_t;
+    using span_bytes_t = span_gt<byte_t>;
+    explicit node_at(byte_t* tape) noexcept : tape_(tape) {}
+    byte_t* tape() const noexcept { return tape_; }
+    /**
+     *  @brief  How many bytes of memory are needed to form the "head" of the node.
+     */
+    static constexpr std::size_t head_size_bytes() { return sizeof(vector_key_t) + sizeof(level_t); }
+    byte_t* neighbors_tape() const noexcept { return tape_ + head_size_bytes(); }
+    explicit operator bool() const noexcept { return tape_; }
+
+    inline span_bytes_t node_bytes(const precomputed_constants_t& pre) const noexcept {
+        return {tape(), node_size_bytes(pre, level())};
+    }
+    inline std::size_t node_size_bytes(const precomputed_constants_t& pre) noexcept {
+        return head_size_bytes() + node_neighbors_bytes_(pre, level());
+    }
+    static inline std::size_t node_size_bytes(const precomputed_constants_t& pre, level_t level) noexcept {
+        return head_size_bytes() + node_neighbors_bytes_(pre, level);
+    }
+
+    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
+        precomputed_constants_t pre;
+        // todo:: ask-Ashot:: inverse_log_connectibity does not relly belong here, but the other two do.
+        // maybe we can separate these?
+        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
+        pre.neighbors_bytes = config.connectivity * sizeof(slot_t) + sizeof(neighbors_count_t);
+        pre.neighbors_base_bytes = config.connectivity_base * sizeof(slot_t) + sizeof(neighbors_count_t);
+        return pre;
+    }
+
+    node_at() = default;
+    node_at(node_at const&) = default;
+    node_at& operator=(node_at const&) = default;
+
+    misaligned_ref_gt<vector_key_t const> ckey() const noexcept { return {tape_}; }
+    misaligned_ref_gt<vector_key_t> key() const noexcept { return {tape_}; }
+    misaligned_ref_gt<level_t> level() const noexcept { return {tape_ + sizeof(vector_key_t)}; }
+
+    void key(vector_key_t v) noexcept { return misaligned_store<vector_key_t>(tape_, v); }
+    void level(level_t v) noexcept { return misaligned_store<level_t>(tape_ + sizeof(vector_key_t), v); }
+};
+
+static_assert(std::is_trivially_copy_constructible<node_at<default_key_t, default_slot_t>>::value,
+              "Nodes must be light!");
+static_assert(std::is_trivially_destructible<node_at<default_key_t, default_slot_t>>::value, "Nodes must be light!");
+
 /**
  *  @brief  Approximate Nearest Neighbors Search @b index-structure using the
  *          Hierarchical Navigable Small World @b (HNSW) graphs algorithm.
@@ -1609,6 +1699,14 @@ template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m)
  *  not just within equi-dimensional vectors. Examples range from texts to similar Chess
  *  positions.
  *
+ *  @tparam storage_at
+ *      The storage provider for index_gt. The index uses the storage_at
+ *      API to store and retrieve hnsw index nodes and vectors.
+ *      see `dummy_storage_single_threaded` for a minimal storage implementation
+ *      and interface reference for storage_at.
+ *      NOTE: Storage object is taken by reference. It is the caller's responsibility
+ *      to make sure the reference is valid whenever the index is being used
+ *
  *  @tparam key_at
  *      The type of primary objects stored in the index.
  *      The values, to which those map, are not managed by the same index structure.
@@ -1625,10 +1723,6 @@ template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m)
  *      priority queues, needed during construction and traversals of graphs.
  *      The allocated buffers may be uninitialized.
  *
- *  @tparam tape_allocator_at
- *      Potentially different memory allocator for primary allocations of nodes and vectors.
- *      It would never `deallocate` separate entries, and would only free all the space at once.
- *      The allocated buffers may be uninitialized.
  *
  *  @section Features
  *
@@ -1679,24 +1773,28 @@ template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m)
  *      -   `member_gt` contains an already prefetched copy of the key.
  *
  */
-template <typename distance_at = default_distance_t,              //
+template <typename storage_at,                                    //
+          typename distance_at = default_distance_t,              //
           typename key_at = default_key_t,                        //
           typename compressed_slot_at = default_slot_t,           //
-          typename dynamic_allocator_at = std::allocator<byte_t>, //
-          typename tape_allocator_at = dynamic_allocator_at>      //
+          typename dynamic_allocator_at = std::allocator<byte_t>> //
 class index_gt {
   public:
+    using storage_t = storage_at;
+    using node_lock_t = typename storage_t::lock_type;
     using distance_t = distance_at;
     using vector_key_t = key_at;
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using dynamic_allocator_t = dynamic_allocator_at;
-    using tape_allocator_t = tape_allocator_at;
     static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense.");
 
     using member_cref_t = member_cref_gt<vector_key_t>;
     using member_ref_t = member_ref_gt<vector_key_t>;
 
+    using node_t = node_at<vector_key_t, compressed_slot_t>;
+    // using node_t = typename storage_t::node_t;
+
     template <typename ref_at, typename index_at> class member_iterator_gt {
         using ref_t = ref_at;
         using index_t = index_at;
@@ -1715,8 +1813,8 @@ class index_gt {
         using pointer = void;
         using reference = ref_t;
 
-        reference operator*() const noexcept { return {index_->node_at_(slot_).key(), slot_}; }
-        vector_key_t key() const noexcept { return index_->node_at_(slot_).key(); }
+        reference operator*() const noexcept { return {index_->storage_->get_node_at(slot_).key(), slot_}; }
+        vector_key_t key() const noexcept { return index_->storage_->get_node_at(slot_).key(); }
 
         friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; }
         friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
@@ -1759,10 +1857,7 @@ class index_gt {
         sizeof(byte_t) == 1, //
         "Primary allocator must allocate separate addressable bytes");
 
-    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_t>;
-    static_assert(                                                 //
-        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
-        "Tape allocator must allocate separate addressable bytes");
+    using span_bytes_t = span_gt<byte_t>;
 
   private:
     /**
@@ -1771,22 +1866,9 @@ class index_gt {
      *          alignment in most common cases.
      */
     using neighbors_count_t = std::uint32_t;
-    using level_t = std::int16_t;
-
-    /**
-     *  @brief  How many bytes of memory are needed to form the "head" of the node.
-     */
-    static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
-
-    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
 
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
-    struct precomputed_constants_t {
-        double inverse_log_connectivity{};
-        std::size_t neighbors_bytes{};
-        std::size_t neighbors_base_bytes{};
-    };
     /// @brief A space-efficient internal data-structure used in graph traversal queues.
     struct candidate_t {
         distance_t distance;
@@ -1799,38 +1881,6 @@ class index_gt {
     using top_candidates_t = sorted_buffer_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>;
     using next_candidates_t = max_heap_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>;
 
-    /**
-     *  @brief  A loosely-structured handle for every node. One such node is created for every member.
-     *          To minimize memory usage and maximize the number of entries per cache-line, it only
-     *          stores to pointers. The internal tape starts with a `vector_key_t` @b key, then
-     *          a `level_t` for the number of graph @b levels in which this member appears,
-     *          then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences
-     *          for @b each-level.
-     */
-    class node_t {
-        byte_t* tape_{};
-
-      public:
-        explicit node_t(byte_t* tape) noexcept : tape_(tape) {}
-        byte_t* tape() const noexcept { return tape_; }
-        byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); }
-        explicit operator bool() const noexcept { return tape_; }
-
-        node_t() = default;
-        node_t(node_t const&) = default;
-        node_t& operator=(node_t const&) = default;
-
-        misaligned_ref_gt<vector_key_t const> ckey() const noexcept { return {tape_}; }
-        misaligned_ref_gt<vector_key_t> key() const noexcept { return {tape_}; }
-        misaligned_ref_gt<level_t> level() const noexcept { return {tape_ + sizeof(vector_key_t)}; }
-
-        void key(vector_key_t v) noexcept { return misaligned_store<vector_key_t>(tape_, v); }
-        void level(level_t v) noexcept { return misaligned_store<level_t>(tape_ + sizeof(vector_key_t), v); }
-    };
-
-    static_assert(std::is_trivially_copy_constructible<node_t>::value, "Nodes must be light!");
-    static_assert(std::is_trivially_destructible<node_t>::value, "Nodes must be light!");
-
     /**
      *  @brief  A slice of the node's tape, containing a the list of neighbors
      *          for a node in a single graph level. It's pre-allocated to fit
@@ -1900,14 +1950,15 @@ class index_gt {
         }
     };
 
+    // todo:: do I have to init this?
+    // A: Yes! matters a lot in move constructors!!
+    storage_t* storage_{};
     index_config_t config_{};
     index_limits_t limits_{};
 
     mutable dynamic_allocator_t dynamic_allocator_{};
-    tape_allocator_t tape_allocator_{};
 
     precomputed_constants_t pre_{};
-    memory_mapped_file_t viewed_file_{};
 
     /// @brief  Number of "slots" available for `node_t` objects. Equals to @b `limits_.members`.
     usearch_align_m mutable std::atomic<std::size_t> nodes_capacity_{};
@@ -1925,14 +1976,6 @@ class index_gt {
     /// @brief  The slot in which the only node of the top-level graph is stored.
     std::size_t entry_slot_{};
 
-    using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
-
-    /// @brief  C-style array of `node_t` smart-pointers.
-    buffer_gt<node_t, nodes_allocator_t> nodes_{};
-
-    /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable nodes_mutexes_t nodes_mutexes_{};
-
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
 
     /// @brief  Array of thread-specific buffers for temporary data.
@@ -1945,25 +1988,27 @@ class index_gt {
     std::size_t max_level() const noexcept { return nodes_count_ ? static_cast<std::size_t>(max_level_) : 0; }
     index_config_t const& config() const noexcept { return config_; }
     index_limits_t const& limits() const noexcept { return limits_; }
-    bool is_immutable() const noexcept { return bool(viewed_file_); }
+    bool is_immutable() const noexcept { return storage_->is_immutable(); }
 
     /**
      *  @section Exceptions
      *      Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction.
      */
-    explicit index_gt( //
-        index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {},
-        tape_allocator_t tape_allocator = {}) noexcept
-        : config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
-          tape_allocator_(std::move(tape_allocator)), pre_(precompute_(config)), nodes_count_(0u), max_level_(-1),
-          entry_slot_(0u), nodes_(), nodes_mutexes_(), contexts_() {}
+    explicit index_gt(      //
+        storage_t* storage, //
+        index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept
+        : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)),
+          pre_(node_t::precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {}
 
     /**
      *  @brief  Clones the structure with the same hyper-parameters, but without contents.
      */
-    index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_, tape_allocator_}; }
+    index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_}; }
 
-    ~index_gt() noexcept { reset(); }
+    ~index_gt() noexcept {
+        reset();
+        storage_ = nullptr;
+    }
 
     index_gt(index_gt&& other) noexcept { swap(other); }
 
@@ -1986,18 +2031,20 @@ class index_gt {
     copy_result_t copy(index_copy_config_t config = {}) const noexcept {
         copy_result_t result;
         index_gt& other = result.index;
-        other = index_gt(config_, dynamic_allocator_, tape_allocator_);
+        other = index_gt(config_, dynamic_allocator_);
         if (!other.reserve(limits_))
             return result.failed("Failed to reserve the contexts");
 
         // Now all is left - is to allocate new `node_t` instances and populate
         // the `other.nodes_` array into it.
-        for (std::size_t i = 0; i != nodes_count_; ++i)
-            other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i]));
 
-        other.nodes_count_ = nodes_count_.load();
-        other.max_level_ = max_level_;
-        other.entry_slot_ = entry_slot_;
+        assert(false);
+        // for (std::size_t i = 0; i != nodes_count_; ++i)
+        //     other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i]));
+
+        // other.nodes_count_ = nodes_count_.load();
+        // other.max_level_ = max_level_;
+        // other.entry_slot_ = entry_slot_;
 
         // This controls nothing for now :)
         (void)config;
@@ -2011,13 +2058,12 @@ class index_gt {
     member_iterator_t begin() noexcept { return {this, 0}; }
     member_iterator_t end() noexcept { return {this, size()}; }
 
-    member_ref_t at(std::size_t slot) noexcept { return {nodes_[slot].key(), slot}; }
-    member_cref_t at(std::size_t slot) const noexcept { return {nodes_[slot].ckey(), slot}; }
+    member_ref_t at(std::size_t slot) noexcept { return {storage_->get_node_at(slot).key(), slot}; }
+    member_cref_t at(std::size_t slot) const noexcept { return {storage_->get_node_at(slot).ckey(), slot}; }
     member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; }
     member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; }
 
     dynamic_allocator_t const& dynamic_allocator() const noexcept { return dynamic_allocator_; }
-    tape_allocator_t const& tape_allocator() const noexcept { return tape_allocator_; }
 
 #pragma region Adjusting Configuration
 
@@ -2028,12 +2074,9 @@ class index_gt {
      *  Will keep the number of available threads/contexts the same as it was.
      */
     void clear() noexcept {
-        if (!has_reset<tape_allocator_t>()) {
-            std::size_t n = nodes_count_;
-            for (std::size_t i = 0; i != n; ++i)
-                node_free_(i);
-        } else
-            tape_allocator_.deallocate(nullptr, 0);
+        if (storage_)
+            storage_->clear();
+
         nodes_count_ = 0;
         max_level_ = -1;
         entry_slot_ = 0u;
@@ -2049,29 +2092,29 @@ class index_gt {
     void reset() noexcept {
         clear();
 
-        nodes_ = {};
+        if (storage_)
+            storage_->reset();
         contexts_ = {};
-        nodes_mutexes_ = {};
         limits_ = index_limits_t{0, 0};
         nodes_capacity_ = 0;
-        viewed_file_ = memory_mapped_file_t{};
-        tape_allocator_ = {};
     }
 
+    /**
+     * @brief replace internal storage pointer with the new one
+     */
+    void reset_storage(storage_t* storage) { storage_ = storage; }
+
     /**
      *  @brief  Swaps the underlying memory buffers and thread contexts.
      */
     void swap(index_gt& other) noexcept {
+        std::swap(storage_, other.storage_);
         std::swap(config_, other.config_);
         std::swap(limits_, other.limits_);
         std::swap(dynamic_allocator_, other.dynamic_allocator_);
-        std::swap(tape_allocator_, other.tape_allocator_);
         std::swap(pre_, other.pre_);
-        std::swap(viewed_file_, other.viewed_file_);
         std::swap(max_level_, other.max_level_);
         std::swap(entry_slot_, other.entry_slot_);
-        std::swap(nodes_, other.nodes_);
-        std::swap(nodes_mutexes_, other.nodes_mutexes_);
         std::swap(contexts_, other.contexts_);
 
         // Non-atomic parts.
@@ -2094,21 +2137,14 @@ class index_gt {
             && limits.members <= limits_.members)
             return true;
 
-        nodes_mutexes_t new_mutexes(limits.members);
-        buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
+        bool storage_reserved = storage_->reserve(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
-        if (!new_nodes || !new_contexts || !new_mutexes)
+        if (!new_contexts || !storage_reserved)
             return false;
 
-        // Move the nodes info, and deallocate previous buffers.
-        if (nodes_)
-            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
-
         limits_ = limits;
         nodes_capacity_ = limits.members;
-        nodes_ = std::move(new_nodes);
         contexts_ = std::move(new_contexts);
-        nodes_mutexes_ = std::move(new_mutexes);
         return true;
     }
 
@@ -2162,12 +2198,12 @@ class index_gt {
     };
 
     class search_result_t {
-        node_t const* nodes_{};
+        storage_t const* storage_{};
         top_candidates_t const* top_{};
 
         friend class index_gt;
         inline search_result_t(index_gt const& index, top_candidates_t& top) noexcept
-            : nodes_(index.nodes_), top_(&top) {}
+            : storage_(index.storage_), top_(&top) {}
 
       public:
         /** @brief  Number of search results found. */
@@ -2203,7 +2239,7 @@ class index_gt {
         inline match_t at(std::size_t i) const noexcept {
             candidate_t const* top_ordered = top_->data();
             candidate_t candidate = top_ordered[i];
-            node_t node = nodes_[candidate.slot];
+            node_t node = storage_->get_node_at(candidate.slot);
             return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance};
         }
         inline std::size_t merge_into(                 //
@@ -2321,7 +2357,7 @@ class index_gt {
         }
 
         // Allocate the neighbors
-        node_t node = node_make_(key, target_level);
+        node_t node = storage_->node_make(key, target_level);
         if (!node) {
             nodes_count_.fetch_sub(1);
             return result.failed("Out of memory!");
@@ -2329,11 +2365,11 @@ class index_gt {
         if (target_level <= max_level_copy)
             new_level_lock.unlock();
 
-        nodes_[new_slot] = node;
+        storage_->node_store(new_slot, node);
         result.new_size = new_slot + 1;
         result.slot = new_slot;
         callback(at(new_slot));
-        node_lock_t new_lock = node_lock_(new_slot);
+        node_lock_t new_lock = storage_->node_lock(new_slot);
 
         // Do nothing for the first element
         if (!new_slot) {
@@ -2417,11 +2453,11 @@ class index_gt {
         if (!next.reserve(config.expansion))
             return result.failed("Out of memory!");
 
-        node_lock_t new_lock = node_lock_(old_slot);
-        node_t node = node_at_(old_slot);
+        node_lock_t new_lock = storage_->node_lock(old_slot);
+        node_t node = storage_->get_node_at(old_slot);
 
         level_t node_level = node.level();
-        span_bytes_t node_bytes = node_bytes_(node);
+        span_bytes_t node_bytes = node.node_bytes(pre_);
         std::memset(node_bytes.data(), 0, node_bytes.size());
         node.level(node_level);
 
@@ -2570,14 +2606,14 @@ class index_gt {
         stats_t result{};
 
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
+            node_t node = storage_->get_node_at(i);
             std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
             std::size_t edges = 0;
             for (level_t level = 0; level <= node.level(); ++level)
                 edges += neighbors_(node, level).size();
 
             ++result.nodes;
-            result.allocated_bytes += node_bytes_(node).size();
+            result.allocated_bytes += storage_->node_size_bytes(i);
             result.edges += edges;
             result.max_edges += max_edges;
         }
@@ -2589,13 +2625,13 @@ class index_gt {
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
+            node_t node = storage_->get_node_at(i);
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
 
             ++result.nodes;
             result.edges += neighbors_(node, level).size();
-            result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
+            result.allocated_bytes += node_t::head_size_bytes() + neighbors_bytes;
         }
 
         std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity;
@@ -2605,9 +2641,9 @@ class index_gt {
 
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept {
 
-        std::size_t head_bytes = node_head_bytes_();
+        std::size_t head_bytes = node_t::head_size_bytes();
         for (std::size_t i = 0; i != size(); ++i) {
-            node_t node = node_at_(i);
+            node_t node = storage_->get_node_at(i);
 
             stats_per_level[0].nodes++;
             stats_per_level[0].edges += neighbors_(node, 0).size();
@@ -2620,7 +2656,6 @@ class index_gt {
                 stats_per_level[l].allocated_bytes += pre_.neighbors_bytes;
             }
         }
-
         // The `max_edges` parameter can be inferred from `nodes`
         stats_per_level[0].max_edges = stats_per_level[0].nodes * config_.connectivity_base;
         for (std::size_t l = 1; l <= max_level; ++l)
@@ -2645,7 +2680,7 @@ class index_gt {
      */
     std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept {
         std::size_t total = 0;
-        if (!viewed_file_) {
+        if (!storage_->is_immutable()) {
             stats_t s = stats();
             total += s.allocated_bytes;
             total += s.nodes * allocator_entry_bytes;
@@ -2659,7 +2694,7 @@ class index_gt {
         return total;
     }
 
-    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_bytes_(level); }
+    std::size_t memory_usage_per_node(level_t level) const noexcept { return node_t::node_size_bytes(pre_, level); }
 
 #pragma endregion
 
@@ -2671,7 +2706,7 @@ class index_gt {
     std::size_t serialized_length() const noexcept {
         std::size_t neighbors_length = 0;
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t);
+            neighbors_length += node_t::node_size_bytes(pre_, storage_->get_node_at(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 
@@ -2690,92 +2725,41 @@ class index_gt {
         header.connectivity_base = config_.connectivity_base;
         header.max_level = max_level_;
         header.entry_slot = entry_slot_;
-        if (!output(&header, sizeof(header)))
-            return result.failed("Failed to serialize the header into stream");
-
-        // Progress status
-        std::size_t processed = 0;
-        std::size_t const total = 2 * header.size;
-
-        // Export the number of levels per node
-        // That is both enough to estimate the overall memory consumption,
-        // and to be able to estimate the offsets of every entry in the file.
-        for (std::size_t i = 0; i != header.size; ++i) {
-            node_t node = node_at_(i);
-            level_t level = node.level();
-            if (!output(&level, sizeof(level)))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
-
-        // After that dump the nodes themselves
-        for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_bytes_(node_at_(i));
-            if (!output(node_bytes.data(), node_bytes.size()))
-                return result.failed("Failed to serialize into stream");
-            if (!progress(++processed, total))
-                return result.failed("Terminated by user");
-        }
 
-        return {};
+        return storage_->save_nodes_to_stream(output, header, progress);
     }
 
     /**
      *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     *  Note: assumes storage is properly reset and ready for loading the hnsw graph
      */
     template <typename input_callback_at, typename progress_at = dummy_progress_t>
     serialization_result_t load_from_stream(input_callback_at&& input, progress_at&& progress = {}) noexcept {
 
         serialization_result_t result;
 
-        // Remove previously stored objects
-        reset();
-
         // Pull basic metadata
         index_serialized_header_t header;
-        if (!input(&header, sizeof(header)))
-            return result.failed("Failed to pull the header from the stream");
-
-        // We are loading an empty index, no more work to do
-        if (!header.size) {
+        result = storage_->load_nodes_from_stream(input, header, progress);
+        if (!result) {
             reset();
             return result;
         }
 
-        // Allocate some dynamic memory to read all the levels
-        using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
-        buffer_gt<level_t, levels_allocator_t> levels(header.size);
-        if (!levels)
-            return result.failed("Out of memory");
-        if (!input(levels, header.size * sizeof(level_t)))
-            return result.failed("Failed to pull nodes levels from the stream");
-
         // Submit metadata
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
-        pre_ = precompute_(config_);
+        pre_ = node_t::precompute_(config_);
+        nodes_count_ = header.size;
+        max_level_ = static_cast<level_t>(header.max_level);
+        entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
+        // allocate dynamic contexts for queries (storage has already been allocated for the deserialization process)
         index_limits_t limits;
         limits.members = header.size;
         if (!reserve(limits)) {
             reset();
             return result.failed("Out of memory");
         }
-        nodes_count_ = header.size;
-        max_level_ = static_cast<level_t>(header.max_level);
-        entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
-
-        // Load the nodes
-        for (std::size_t i = 0; i != header.size; ++i) {
-            span_bytes_t node_bytes = node_malloc_(levels[i]);
-            if (!input(node_bytes.data(), node_bytes.size())) {
-                reset();
-                return result.failed("Failed to pull nodes from the stream");
-            }
-            nodes_[i] = node_t{node_bytes.data()};
-            if (!progress(i + 1, header.size))
-                return result.failed("Terminated by user");
-        }
         return {};
     }
 
@@ -2800,7 +2784,7 @@ class index_gt {
             return io_result;
 
         serialization_result_t stream_result = save_to_stream(
-            [&](void* buffer, std::size_t length) {
+            [&](const void* buffer, std::size_t length) {
                 io_result = file.write(buffer, length);
                 return !!io_result;
             },
@@ -2824,7 +2808,7 @@ class index_gt {
             return io_result;
 
         serialization_result_t stream_result = save_to_stream(
-            [&](void* buffer, std::size_t length) {
+            [&](const void* buffer, std::size_t length) {
                 if (offset + length > file.size())
                     return false;
                 std::memcpy(file.data() + offset, buffer, length);
@@ -2848,6 +2832,9 @@ class index_gt {
         if (!io_result)
             return io_result;
 
+        // Remove previously stored objects
+        reset();
+
         serialization_result_t stream_result = load_from_stream(
             [&](void* buffer, std::size_t length) {
                 io_result = file.read(buffer, length);
@@ -2873,6 +2860,9 @@ class index_gt {
         if (!io_result)
             return io_result;
 
+        // Remove previously stored objects
+        reset();
+
         serialization_result_t stream_result = load_from_stream(
             [&](void* buffer, std::size_t length) {
                 if (offset + length > file.size())
@@ -2896,42 +2886,22 @@ class index_gt {
 
         // Remove previously stored objects
         reset();
-
-        serialization_result_t result = file.open_if_not();
-        if (!result)
-            return result;
-
-        // Pull basic metadata
+        return view_internal(std::move(file), offset, progress);
+    }
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_internal(memory_mapped_file_t file, std::size_t offset = 0,
+                                         progress_at&& progress = {}) noexcept {
+        // shall not call reset()
+        // storage_ may already have some relevant stuff...
+        serialization_result_t result;
         index_serialized_header_t header;
-        if (file.size() - offset < sizeof(header))
-            return result.failed("File is corrupted and lacks a header");
-        std::memcpy(&header, file.data() + offset, sizeof(header));
-
-        if (!header.size) {
-            reset();
+        result = storage_->view_nodes_from_file(std::move(file), header, offset, progress);
+        if (!result)
             return result;
-        }
-
-        // Precompute offsets of every node, but before that we need to update the configs
-        // This could have been done with `std::exclusive_scan`, but it's only available from C++17.
-        using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
-        buffer_gt<std::size_t, offsets_allocator_t> offsets(header.size);
-        if (!offsets)
-            return result.failed("Out of memory");
 
         config_.connectivity = header.connectivity;
         config_.connectivity_base = header.connectivity_base;
-        pre_ = precompute_(config_);
-        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
-        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
-        for (std::size_t i = 1; i < header.size; ++i)
-            offsets[i] = offsets[i - 1] + node_bytes_(levels[i - 1]);
-
-        std::size_t total_bytes = offsets[header.size - 1] + node_bytes_(levels[header.size - 1]);
-        if (file.size() < total_bytes) {
-            reset();
-            return result.failed("File is corrupted and can't fit all the nodes");
-        }
+        pre_ = node_t::precompute_(config_);
 
         // Submit metadata and reserve memory
         index_limits_t limits;
@@ -2944,236 +2914,13 @@ class index_gt {
         max_level_ = static_cast<level_t>(header.max_level);
         entry_slot_ = static_cast<compressed_slot_t>(header.entry_slot);
 
-        // Rapidly address all the nodes
-        for (std::size_t i = 0; i != header.size; ++i) {
-            nodes_[i] = node_t{(byte_t*)file.data() + offsets[i]};
-            if (!progress(i + 1, header.size))
-                return result.failed("Terminated by user");
-        }
-        viewed_file_ = std::move(file);
         return {};
     }
 
 #pragma endregion
 
-    /**
-     *  @brief  Performs compaction on the whole HNSW index, purging some entries
-     *          and links to them, while also generating a more efficient mapping,
-     *          putting the more frequently used entries closer together.
-     *
-     *
-     * Scans the whole collection, removing the links leading towards
-     *          banned entries. This essentially isolates some nodes from the rest
-     *          of the graph, while keeping their outgoing links, in case the node
-     *          is structurally relevant and has a crucial role in the index.
-     *          It won't reclaim the memory.
-     *
-     *  @param[in] allow_member Predicate to mark nodes for isolation.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     */
-    template <typename values_at, typename metric_at,                   //
-              typename slot_transition_at = dummy_key_to_key_mapping_t, //
-              typename executor_at = dummy_executor_t,                  //
-              typename progress_at = dummy_progress_t,                  //
-              typename prefetch_at = dummy_prefetch_t>
-    void compact(                             //
-        values_at&& values,                   //
-        metric_at&& metric,                   //
-        slot_transition_at&& slot_transition, //
-
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}, //
-        prefetch_at&& prefetch = prefetch_at{}) noexcept {
-
-        // Export all the keys, slots, and levels.
-        // Partition them with the predicate.
-        // Sort the allowed entries in descending order of their level.
-        // Create a new array mapping old slots to the new ones (INT_MAX for deleted items).
-        struct slot_level_t {
-            compressed_slot_t old_slot;
-            compressed_slot_t cluster;
-            level_t level;
-        };
-        using slot_level_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<slot_level_t>;
-        buffer_gt<slot_level_t, slot_level_allocator_t> slots_and_levels(size());
-
-        // Progress status
-        std::atomic<bool> do_tasks{true};
-        std::atomic<std::size_t> processed{0};
-        std::size_t const total = 3 * slots_and_levels.size();
-
-        // For every bottom level node, determine its parent cluster
-        executor.dynamic(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
-            context_t& context = contexts_[thread_idx];
-            std::size_t cluster = search_for_one_( //
-                values[citerator_at(old_slot)],    //
-                metric, prefetch,                  //
-                entry_slot_, max_level_, 0, context);
-            slots_and_levels[old_slot] = {                                          //
-                                          static_cast<compressed_slot_t>(old_slot), //
-                                          static_cast<compressed_slot_t>(cluster),  //
-                                          node_at_(old_slot).level()};
-            ++processed;
-            if (thread_idx == 0)
-                do_tasks = progress(processed.load(), total);
-            return do_tasks.load();
-        });
-        if (!do_tasks.load())
-            return;
-
-        // Where the actual permutation happens:
-        std::sort(slots_and_levels.begin(), slots_and_levels.end(), [](slot_level_t const& a, slot_level_t const& b) {
-            return a.level == b.level ? a.cluster < b.cluster : a.level > b.level;
-        });
-
-        using size_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
-        buffer_gt<std::size_t, size_allocator_t> old_slot_to_new(slots_and_levels.size());
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot)
-            old_slot_to_new[slots_and_levels[new_slot].old_slot] = new_slot;
-
-        // Erase all the incoming links
-        buffer_gt<node_t, nodes_allocator_t> reordered_nodes(slots_and_levels.size());
-        tape_allocator_t reordered_tape;
-
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) {
-            std::size_t old_slot = slots_and_levels[new_slot].old_slot;
-            node_t old_node = node_at_(old_slot);
-
-            std::size_t node_bytes = node_bytes_(old_node.level());
-            byte_t* new_data = (byte_t*)reordered_tape.allocate(node_bytes);
-            node_t new_node{new_data};
-            std::memcpy(new_data, old_node.tape(), node_bytes);
-
-            for (level_t level = 0; level <= old_node.level(); ++level)
-                for (misaligned_ref_gt<compressed_slot_t> neighbor : neighbors_(new_node, level))
-                    neighbor = static_cast<compressed_slot_t>(old_slot_to_new[compressed_slot_t(neighbor)]);
-
-            reordered_nodes[new_slot] = new_node;
-            if (!progress(++processed, total))
-                return;
-        }
-
-        for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) {
-            std::size_t old_slot = slots_and_levels[new_slot].old_slot;
-            slot_transition(node_at_(old_slot).ckey(),                //
-                            static_cast<compressed_slot_t>(old_slot), //
-                            static_cast<compressed_slot_t>(new_slot));
-            if (!progress(++processed, total))
-                return;
-        }
-
-        nodes_ = std::move(reordered_nodes);
-        tape_allocator_ = std::move(reordered_tape);
-        entry_slot_ = old_slot_to_new[entry_slot_];
-    }
-
-    /**
-     *  @brief  Scans the whole collection, removing the links leading towards
-     *          banned entries. This essentially isolates some nodes from the rest
-     *          of the graph, while keeping their outgoing links, in case the node
-     *          is structurally relevant and has a crucial role in the index.
-     *          It won't reclaim the memory.
-     *
-     *  @param[in] allow_member Predicate to mark nodes for isolation.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     */
-    template <                                        //
-        typename allow_member_at = dummy_predicate_t, //
-        typename executor_at = dummy_executor_t,      //
-        typename progress_at = dummy_progress_t       //
-        >
-    void isolate(                               //
-        allow_member_at&& allow_member,         //
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}) noexcept {
-
-        // Progress status
-        std::atomic<bool> do_tasks{true};
-        std::atomic<std::size_t> processed{0};
-
-        // Erase all the incoming links
-        std::size_t nodes_count = size();
-        executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
-            node_t node = node_at_(node_idx);
-            for (level_t level = 0; level <= node.level(); ++level) {
-                neighbors_ref_t neighbors = neighbors_(node, level);
-                std::size_t old_size = neighbors.size();
-                neighbors.clear();
-                for (std::size_t i = 0; i != old_size; ++i) {
-                    compressed_slot_t neighbor_slot = neighbors[i];
-                    node_t neighbor = node_at_(neighbor_slot);
-                    if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
-                        neighbors.push_back(neighbor_slot);
-                }
-            }
-            ++processed;
-            if (thread_idx == 0)
-                do_tasks = progress(processed.load(), nodes_count);
-            return do_tasks.load();
-        });
-
-        // At the end report the latest numbers, because the reporter thread may be finished earlier
-        progress(processed.load(), nodes_count);
-    }
-
   private:
-    inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
-        precomputed_constants_t pre;
-        pre.inverse_log_connectivity = 1.0 / std::log(static_cast<double>(config.connectivity));
-        pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_t) + sizeof(neighbors_count_t);
-        pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_t) + sizeof(neighbors_count_t);
-        return pre;
-    }
-
-    using span_bytes_t = span_gt<byte_t>;
-
-    inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; }
-    inline std::size_t node_bytes_(level_t level) const noexcept {
-        return node_head_bytes_() + node_neighbors_bytes_(level);
-    }
-    inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); }
-    inline std::size_t node_neighbors_bytes_(level_t level) const noexcept {
-        return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level;
-    }
-
-    span_bytes_t node_malloc_(level_t level) noexcept {
-        std::size_t node_bytes = node_bytes_(level);
-        byte_t* data = (byte_t*)tape_allocator_.allocate(node_bytes);
-        return data ? span_bytes_t{data, node_bytes} : span_bytes_t{};
-    }
-
-    node_t node_make_(vector_key_t key, level_t level) noexcept {
-        span_bytes_t node_bytes = node_malloc_(level);
-        if (!node_bytes)
-            return {};
-
-        std::memset(node_bytes.data(), 0, node_bytes.size());
-        node_t node{(byte_t*)node_bytes.data()};
-        node.key(key);
-        node.level(level);
-        return node;
-    }
-
-    node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
-        byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
-        if (!data)
-            return {};
-        std::memcpy(data, old_bytes.data(), old_bytes.size());
-        return node_t{data};
-    }
-
-    void node_free_(std::size_t idx) noexcept {
-        if (viewed_file_)
-            return;
-
-        node_t& node = nodes_[idx];
-        tape_allocator_.deallocate(node.tape(), node_bytes_(node).size());
-        node = node_t{};
-    }
-
-    inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; }
+    // todo:: these can also be moved to node_at, along with class neighbors_ref_t definition
     inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; }
 
     inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept {
@@ -3184,18 +2931,6 @@ class index_gt {
         return level ? neighbors_non_base_(node, level) : neighbors_base_(node);
     }
 
-    struct node_lock_t {
-        nodes_mutexes_t& mutexes;
-        std::size_t slot;
-        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
-    };
-
-    inline node_lock_t node_lock_(std::size_t slot) const noexcept {
-        while (nodes_mutexes_.atomic_set(slot))
-            ;
-        return {nodes_mutexes_, slot};
-    }
-
     template <typename value_at, typename metric_at, typename prefetch_at>
     void connect_node_across_levels_(                                                           //
         value_at&& value, metric_at&& metric, prefetch_at&& prefetch,                           //
@@ -3220,7 +2955,7 @@ class index_gt {
     std::size_t connect_new_node_( //
         metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m {
 
-        node_t new_node = node_at_(new_slot);
+        node_t new_node = storage_->get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
 
         // Outgoing links from `new_slot`:
@@ -3231,7 +2966,8 @@ class index_gt {
 
             for (std::size_t idx = 0; idx != top_view.size(); idx++) {
                 usearch_assert_m(!new_neighbors[idx], "Possible memory corruption");
-                usearch_assert_m(level <= node_at_(top_view[idx].slot).level(), "Linking to missing level");
+                usearch_assert_m(level <= storage_->get_node_at(top_view[idx].slot).level(),
+                                 "Linking to missing level");
                 new_neighbors.push_back(top_view[idx].slot);
             }
         }
@@ -3244,7 +2980,7 @@ class index_gt {
         metric_at&& metric, std::size_t new_slot, value_at&& value, level_t level,
         context_t& context) usearch_noexcept_m {
 
-        node_t new_node = node_at_(new_slot);
+        node_t new_node = storage_->get_node_at(new_slot);
         top_candidates_t& top = context.top_candidates;
         neighbors_ref_t new_neighbors = neighbors_(new_node, level);
 
@@ -3253,8 +2989,8 @@ class index_gt {
         for (compressed_slot_t close_slot : new_neighbors) {
             if (close_slot == new_slot)
                 continue;
-            node_lock_t close_lock = node_lock_(close_slot);
-            node_t close_node = node_at_(close_slot);
+            node_lock_t close_lock = storage_->node_lock(close_slot);
+            node_t close_node = storage_->get_node_at(close_slot);
 
             neighbors_ref_t close_header = neighbors_(close_node, level);
             usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption");
@@ -3336,7 +3072,7 @@ class index_gt {
         bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; }
         bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; }
 
-        vector_key_t key() const noexcept { return index_->node_at_(slot()).key(); }
+        vector_key_t key() const noexcept { return index_->get_node_at(slot()).key(); }
         compressed_slot_t slot() const noexcept { return neighbors_[current_]; }
         friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); }
         friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); }
@@ -3370,8 +3106,8 @@ class index_gt {
             bool changed;
             do {
                 changed = false;
-                node_lock_t closest_lock = node_lock_(closest_slot);
-                neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level);
+                node_lock_t closest_lock = storage_->node_lock(closest_slot);
+                neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_->get_node_at(closest_slot), level);
 
                 // Optional prefetching
                 if (!is_dummy<prefetch_at>()) {
@@ -3436,8 +3172,8 @@ class index_gt {
             compressed_slot_t candidate_slot = candidacy.slot;
             if (new_slot == candidate_slot)
                 continue;
-            node_t candidate_ref = node_at_(candidate_slot);
-            node_lock_t candidate_lock = node_lock_(candidate_slot);
+            node_t candidate_ref = storage_->get_node_at(candidate_slot);
+            node_lock_t candidate_lock = storage_->node_lock(candidate_slot);
             neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level);
 
             // Optional prefetching
@@ -3507,7 +3243,7 @@ class index_gt {
             next.pop();
             context.iteration_cycles++;
 
-            neighbors_ref_t candidate_neighbors = neighbors_base_(node_at_(candidate.slot));
+            neighbors_ref_t candidate_neighbors = neighbors_base_(storage_->get_node_at(candidate.slot));
 
             // Optional prefetching
             if (!is_dummy<prefetch_at>()) {
@@ -3528,7 +3264,7 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (!is_dummy<predicate_at>())
-                        if (!predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot}))
+                        if (!predicate(member_cref_t{storage_->get_node_at(successor_slot).ckey(), successor_slot}))
                             continue;
 
                     // This will automatically evict poor matches:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index e151b929..2f8e266a 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -8,6 +8,7 @@
 
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
+#include <usearch/storage.hpp>
 
 #if defined(USEARCH_DEFINED_CPP17)
 #include <shared_mutex> // `std::shared_mutex`
@@ -16,7 +17,7 @@
 namespace unum {
 namespace usearch {
 
-template <typename, typename> class index_dense_gt;
+template <typename, typename, typename> class index_dense_gt;
 
 /**
  *  @brief  The "magic" sequence helps infer the type of the file.
@@ -123,11 +124,6 @@ struct index_dense_clustering_config_t {
     } mode = merge_smallest_k;
 };
 
-struct index_dense_serialization_config_t {
-    bool exclude_vectors = false;
-    bool use_64_bit_dimensions = false;
-};
-
 struct index_dense_copy_config_t : public index_copy_config_t {
     bool force_vector_copy = true;
 
@@ -295,13 +291,17 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map
  *  The second (2.) starts with @b "usearch"-magic-string, used to infer the file type on open.
  *  The third (3.) is implemented by the underlying `index_gt` class.
  */
-template <typename key_at = default_key_t, typename compressed_slot_at = default_slot_t> //
+template <typename key_at = default_key_t,                                 //
+          typename compressed_slot_at = default_slot_t,                    //
+          typename storage_at = storage_v2_at<key_at, compressed_slot_at>> //
 class index_dense_gt {
   public:
     using vector_key_t = key_at;
     using key_t = vector_key_t;
     using compressed_slot_t = compressed_slot_at;
     using distance_t = distance_punned_t;
+    // using node_t = typename storage_at::node_t;
+    using node_t = node_at<vector_key_t, compressed_slot_at>;
     using metric_t = metric_punned_t;
 
     using member_ref_t = member_ref_gt<vector_key_t>;
@@ -312,17 +312,16 @@ class index_dense_gt {
     using head_result_t = index_dense_head_result_t;
 
     using serialization_config_t = index_dense_serialization_config_t;
-
-    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
-    using tape_allocator_t = memory_mapping_allocator_gt<64>;
+    using storage_t = storage_at;
 
   private:
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
     using index_t = index_gt<                        //
+        storage_t,                                   //
         distance_t, vector_key_t, compressed_slot_t, //
-        dynamic_allocator_t, tape_allocator_t>;
+        dynamic_allocator_t>;
     using index_allocator_t = aligned_allocator_gt<index_t, 64>;
 
     using member_iterator_t = typename index_t::member_iterator_t;
@@ -345,8 +344,10 @@ class index_dense_gt {
 
         inline distance_t operator()(byte_t const* a, byte_t const* b) const noexcept { return f(a, b); }
 
-        inline byte_t const* v(member_cref_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; }
-        inline byte_t const* v(member_citerator_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; }
+        inline byte_t const* v(member_cref_t m) const noexcept { return index_->storage_.get_vector_at(get_slot(m)); }
+        inline byte_t const* v(member_citerator_t m) const noexcept {
+            return index_->storage_.get_vector_at(get_slot(m));
+        }
         inline distance_t f(byte_t const* a, byte_t const* b) const noexcept { return index_->metric_(a, b); }
     };
 
@@ -371,14 +372,12 @@ class index_dense_gt {
     /// @brief An instance of a potentially stateful `metric_t` used to initialize copies and forks.
     metric_t metric_;
 
-    using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
-    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
-    vectors_tape_allocator_t vectors_tape_allocator_;
-
-    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
-    mutable std::vector<byte_t*> vectors_lookup_;
+    /// @brief The underlying storage provider for this index that determines file storage layout,
+    /// implements serialization/deserialization routines, and provides an API to add, update and
+    /// retrieve vectors and hnsw graph nodes.
+    storage_t storage_{config_};
 
-    /// @brief Originally forms and array of integers [0, threads], marking all
+    /// @brief Originally forms and array of integers [0, threads], marking all.
     mutable std::vector<std::size_t> available_threads_;
 
     /// @brief Mutex, controlling concurrent access to `available_threads_`.
@@ -437,20 +436,21 @@ class index_dense_gt {
 
     index_dense_gt() = default;
     index_dense_gt(index_dense_gt&& other)
-        : config_(std::move(other.config_)),
-
+        : config_(std::move(other.config_)),           //
           typed_(exchange(other.typed_, nullptr)),     //
           cast_buffer_(std::move(other.cast_buffer_)), //
           casts_(std::move(other.casts_)),             //
           metric_(std::move(other.metric_)),           //
-
-          vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), //
-          vectors_lookup_(std::move(other.vectors_lookup_)),                 //
+          storage_(std::move(other.storage_)),         //
 
           available_threads_(std::move(other.available_threads_)), //
           slot_lookup_(std::move(other.slot_lookup_)),             //
           free_keys_(std::move(other.free_keys_)),                 //
-          free_key_(std::move(other.free_key_)) {}                 //
+          free_key_(std::move(other.free_key_)) {
+        // Could do this in the _proxy pattern to void this
+        // The problem will also go away if/when we make typed_ not do any allocations
+        typed_->reset_storage(&storage_);
+    } //
 
     index_dense_gt& operator=(index_dense_gt&& other) {
         swap(other);
@@ -468,14 +468,17 @@ class index_dense_gt {
         std::swap(cast_buffer_, other.cast_buffer_);
         std::swap(casts_, other.casts_);
         std::swap(metric_, other.metric_);
-
-        std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_);
-        std::swap(vectors_lookup_, other.vectors_lookup_);
+        std::swap(storage_, other.storage_);
 
         std::swap(available_threads_, other.available_threads_);
         std::swap(slot_lookup_, other.slot_lookup_);
         std::swap(free_keys_, other.free_keys_);
         std::swap(free_key_, other.free_key_);
+        // Could do this in the _proxy pattern to void this
+        // The problem will also go away if/when we make typed_ not do any allocations
+        typed_->reset_storage(&storage_);
+        if (other.typed_)
+            other.typed_->reset_storage(&other.storage_);
     }
 
     ~index_dense_gt() {
@@ -513,7 +516,8 @@ class index_dense_gt {
 
         // Available since C11, but only C++17, so we use the C version.
         index_t* raw = index_allocator_t{}.allocate(1);
-        new (raw) index_t(config);
+        result.storage_ = storage_t(config);
+        new (raw) index_t(&result.storage_, config);
         result.typed_ = raw;
         return result;
     }
@@ -580,11 +584,9 @@ class index_dense_gt {
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
     std::size_t memory_usage() const {
-        return                                          //
-            typed_->memory_usage(0) +                   //
-            typed_->tape_allocator().total_wasted() +   //
-            typed_->tape_allocator().total_reserved() + //
-            vectors_tape_allocator_.total_allocated();
+        size_t res = typed_->memory_usage(0);
+        // todo:: add some memory_usage() interface to storage_
+        return res;
     }
 
     static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
@@ -646,9 +648,9 @@ class index_dense_gt {
                 return result;
 
             key_and_slot_t a_key_and_slot = *a_it;
-            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot);
             key_and_slot_t b_key_and_slot = *b_it;
-            byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+            byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot);
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
             result.mean = result.min = result.max = a_b_distance;
@@ -670,10 +672,10 @@ class index_dense_gt {
 
         while (a_range.first != a_range.second) {
             key_and_slot_t a_key_and_slot = *a_range.first;
-            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot);
             while (b_range.first != b_range.second) {
                 key_and_slot_t b_key_and_slot = *b_range.first;
-                byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+                byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot);
                 distance_t a_b_distance = metric_(a_vector, b_vector);
 
                 result.mean += a_b_distance;
@@ -713,7 +715,7 @@ class index_dense_gt {
         // Find the closest cluster for any vector under that key.
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* vector_data = vectors_lookup_[key_and_slot.slot];
+            byte_t const* vector_data = storage_.get_vector_at(key_and_slot.slot);
             cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow);
             if (!new_result)
                 return new_result;
@@ -730,10 +732,12 @@ class index_dense_gt {
      *  @return `true` if the memory reservation was successful, `false` otherwise.
      */
     bool reserve(index_limits_t limits) {
+        // todo:: ask-Ashot this seems to allow search() and add() on the dense index, concurrent to this reserve
+        // But that is not safe on typed_ as typed_->reserve() reallocates the lock buffer, discarding the old one
+        // without checking if anything is locked
         {
             unique_lock_t lock(slot_lookup_mutex_);
             slot_lookup_.reserve(limits.members);
-            vectors_lookup_.resize(limits.members);
         }
         return typed_->reserve(limits);
     }
@@ -748,11 +752,10 @@ class index_dense_gt {
         unique_lock_t lookup_lock(slot_lookup_mutex_);
 
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
+        // storage_ cleared by typed_ todo:: is this confusing?
         typed_->clear();
         slot_lookup_.clear();
-        vectors_lookup_.clear();
         free_keys_.clear();
-        vectors_tape_allocator_.reset();
     }
 
     /**
@@ -767,11 +770,10 @@ class index_dense_gt {
 
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
         std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
+        // storage is reset by typed_
         typed_->reset();
         slot_lookup_.clear();
-        vectors_lookup_.clear();
         free_keys_.clear();
-        vectors_tape_allocator_.reset();
 
         // Reset the thread IDs.
         available_threads_.resize(std::thread::hardware_concurrency());
@@ -787,41 +789,9 @@ class index_dense_gt {
                                           progress_at&& progress = {}) const {
 
         serialization_result_t result;
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-
-        // We may not want to put the vectors into the same file
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                dimensions[0] = static_cast<std::uint32_t>(typed_->size());
-                dimensions[1] = static_cast<std::uint32_t>(metric_.bytes_per_vector());
-                if (!output(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to serialize into stream");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            } else {
-                std::uint64_t dimensions[2];
-                dimensions[0] = static_cast<std::uint64_t>(typed_->size());
-                dimensions[1] = static_cast<std::uint64_t>(metric_.bytes_per_vector());
-                if (!output(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to serialize into stream");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            }
-
-            // Dump the vectors one after another
-            for (std::uint64_t i = 0; i != matrix_rows; ++i) {
-                byte_t* vector = vectors_lookup_[i];
-                if (!output(vector, matrix_cols))
-                    return result.failed("Failed to serialize into stream");
-            }
-        }
-
-        // Augment metadata
+        index_dense_head_buffer_t buffer;
+        // Prepare opaque header for Storage
         {
-            index_dense_head_buffer_t buffer;
             std::memset(buffer, 0, sizeof(buffer));
             index_dense_head_t head{buffer};
             std::memcpy(buffer, default_magic(), std::strlen(default_magic()));
@@ -842,11 +812,11 @@ class index_dense_gt {
             head.count_deleted = typed_->size() - size();
             head.dimensions = dimensions();
             head.multi = multi();
-
-            if (!output(&buffer, sizeof(buffer)))
-                return result.failed("Failed to serialize into stream");
         }
 
+        // save vectors and metadata to storage
+        storage_.save_vectors_to_stream(output, metric_.bytes_per_vector(), typed_->size(), buffer, config);
+
         // Save the actual proximity graph
         return typed_->save_to_stream(std::forward<output_callback_at>(output), std::forward<progress_at>(progress));
     }
@@ -875,46 +845,16 @@ class index_dense_gt {
                                             serialization_config_t config = {}, //
                                             progress_at&& progress = {}) {
 
-        // Discard all previous memory allocations of `vectors_tape_allocator_`
+        // Discard all previous memory allocations of
         reset();
 
         // Infer the new index size
         serialization_result_t result;
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-
-        // We may not want to load the vectors from the same file, or allow attaching them afterwards
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                if (!input(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to read 32-bit dimensions of the matrix");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            } else {
-                std::uint64_t dimensions[2];
-                if (!input(&dimensions, sizeof(dimensions)))
-                    return result.failed("Failed to read 64-bit dimensions of the matrix");
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-            }
-            // Load the vectors one after another
-            vectors_lookup_.resize(matrix_rows);
-            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
-                byte_t* vector = vectors_tape_allocator_.allocate(matrix_cols);
-                if (!input(vector, matrix_cols))
-                    return result.failed("Failed to read vectors");
-                vectors_lookup_[slot] = vector;
-            }
-        }
+        index_dense_head_buffer_t buffer;
+        storage_.load_vectors_from_stream(input, buffer, config);
 
         // Load metadata and choose the right metric
         {
-            index_dense_head_buffer_t buffer;
-            if (!input(buffer, sizeof(buffer)))
-                return result.failed("Failed to read the index ");
-
             index_dense_head_t head{buffer};
             if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0)
                 return result.failed("Magic header mismatch - the file isn't an index");
@@ -937,8 +877,6 @@ class index_dense_gt {
         result = typed_->load_from_stream(std::forward<input_callback_at>(input), std::forward<progress_at>(progress));
         if (!result)
             return result;
-        if (typed_->size() != static_cast<std::size_t>(matrix_rows))
-            return result.failed("Index size and the number of vectors doesn't match");
 
         reindex_keys_();
         return result;
@@ -955,50 +893,16 @@ class index_dense_gt {
                                 std::size_t offset = 0, serialization_config_t config = {}, //
                                 progress_at&& progress = {}) {
 
-        // Discard all previous memory allocations of `vectors_tape_allocator_`
+        // Discard all previous memory allocations.
         reset();
-
-        serialization_result_t result = file.open_if_not();
+        serialization_result_t result;
+        // Note that buffer and offset are passed by reference
+        index_dense_head_buffer_t buffer;
+        result = storage_.view_vectors_from_file(file, buffer, offset, config);
         if (!result)
             return result;
-
-        // Infer the new index size
-        std::uint64_t matrix_rows = 0;
-        std::uint64_t matrix_cols = 0;
-        span_punned_t vectors_buffer;
-
-        // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
-        if (!config.exclude_vectors) {
-            // Save the matrix size
-            if (!config.use_64_bit_dimensions) {
-                std::uint32_t dimensions[2];
-                if (file.size() - offset < sizeof(dimensions))
-                    return result.failed("File is corrupted and lacks matrix dimensions");
-                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-                offset += sizeof(dimensions);
-            } else {
-                std::uint64_t dimensions[2];
-                if (file.size() - offset < sizeof(dimensions))
-                    return result.failed("File is corrupted and lacks matrix dimensions");
-                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
-                matrix_rows = dimensions[0];
-                matrix_cols = dimensions[1];
-                offset += sizeof(dimensions);
-            }
-            vectors_buffer = {file.data() + offset, static_cast<std::size_t>(matrix_rows * matrix_cols)};
-            offset += vectors_buffer.size();
-        }
-
         // Load metadata and choose the right metric
         {
-            index_dense_head_buffer_t buffer;
-            if (file.size() - offset < sizeof(buffer))
-                return result.failed("File is corrupted and lacks a header");
-
-            std::memcpy(buffer, file.data() + offset, sizeof(buffer));
-
             index_dense_head_t head{buffer};
             if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0)
                 return result.failed("Magic header mismatch - the file isn't an index");
@@ -1015,21 +919,12 @@ class index_dense_gt {
 
             metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar);
             config_.multi = head.multi;
-            offset += sizeof(buffer);
         }
 
         // Pull the actual proximity graph
-        result = typed_->view(std::move(file), offset, std::forward<progress_at>(progress));
+        result = typed_->view_internal(std::move(file), offset, std::forward<progress_at>(progress));
         if (!result)
             return result;
-        if (typed_->size() != static_cast<std::size_t>(matrix_rows))
-            return result.failed("Index size and the number of vectors doesn't match");
-
-        // Address the vectors
-        vectors_lookup_.resize(matrix_rows);
-        if (!config.exclude_vectors)
-            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
-                vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot;
 
         reindex_keys_();
         return result;
@@ -1353,17 +1248,19 @@ class index_dense_gt {
             copy.free_keys_.push(free_keys_[i]);
 
         // Allocate buffers and move the vectors themselves
-        if (!config.force_vector_copy && copy.config_.exclude_vectors)
-            copy.vectors_lookup_ = vectors_lookup_;
-        else {
-            copy.vectors_lookup_.resize(vectors_lookup_.size());
-            for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
-                copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
-            if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr))
-                return result.failed("Out of memory!");
-            for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
-                std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector());
-        }
+        // if (!config.force_vector_copy && copy.config_.exclude_vectors)
+        //     copy.vectors_lookup_ = vectors_lookup_;
+        // else {
+        //     copy.vectors_lookup_.resize(vectors_lookup_.size());
+        //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
+        //         copy.vectors_lookup_[slot] =
+        //         copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector());
+        //     if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr))
+        //         return result.failed("Out of memory!");
+        //     for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot)
+        //         std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector());
+        // }
+        assert(false);
 
         copy.slot_lookup_ = slot_lookup_;
         *copy.typed_ = std::move(typed_result.index);
@@ -1428,263 +1325,6 @@ class index_dense_gt {
         return result;
     }
 
-    class values_proxy_t {
-        index_dense_gt const* index_;
-
-      public:
-        values_proxy_t(index_dense_gt const& index) noexcept : index_(&index) {}
-        byte_t const* operator[](compressed_slot_t slot) const noexcept { return index_->vectors_lookup_[slot]; }
-        byte_t const* operator[](member_citerator_t it) const noexcept { return index_->vectors_lookup_[get_slot(it)]; }
-    };
-
-    /**
-     *  @brief Performs compaction on the index, pruning links to removed entries.
-     *  @param executor The executor parallel processing. Default ::dummy_executor_t single-threaded.
-     *  @param progress The progress tracker instance to use. Default ::dummy_progress_t reports nothing.
-     *  @return The ::compaction_result_t indicating the result of the compaction operation.
-     *          `result.pruned_edges` will contain the number of edges that were removed.
-     *          `result.error` will contain an error message if an error occurred during the compaction operation.
-     */
-    template <typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
-    compaction_result_t compact(executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) {
-        compaction_result_t result;
-
-        std::vector<byte_t*> new_vectors_lookup(vectors_lookup_.size());
-        vectors_tape_allocator_t new_vectors_allocator;
-
-        auto track_slot_change = [&](vector_key_t, compressed_slot_t old_slot, compressed_slot_t new_slot) {
-            byte_t* new_vector = new_vectors_allocator.allocate(metric_.bytes_per_vector());
-            byte_t* old_vector = vectors_lookup_[old_slot];
-            std::memcpy(new_vector, old_vector, metric_.bytes_per_vector());
-            new_vectors_lookup[new_slot] = new_vector;
-        };
-        typed_->compact(values_proxy_t{*this}, metric_proxy_t{*this}, track_slot_change,
-                        std::forward<executor_at>(executor), std::forward<progress_at>(progress));
-        vectors_lookup_ = std::move(new_vectors_lookup);
-        vectors_tape_allocator_ = std::move(new_vectors_allocator);
-        return result;
-    }
-
-    template <                                                 //
-        typename man_to_woman_at = dummy_key_to_key_mapping_t, //
-        typename woman_to_man_at = dummy_key_to_key_mapping_t, //
-        typename executor_at = dummy_executor_t,               //
-        typename progress_at = dummy_progress_t                //
-        >
-    join_result_t join(                                     //
-        index_dense_gt const& women,                        //
-        index_join_config_t config = {},                    //
-        man_to_woman_at&& man_to_woman = man_to_woman_at{}, //
-        woman_to_man_at&& woman_to_man = woman_to_man_at{}, //
-        executor_at&& executor = executor_at{},             //
-        progress_at&& progress = progress_at{}) const {
-
-        index_dense_gt const& men = *this;
-        return unum::usearch::join(                      //
-            *men.typed_, *women.typed_,                  //
-            values_proxy_t{men}, values_proxy_t{women},  //
-            metric_proxy_t{men}, metric_proxy_t{women},  //
-            config,                                      //
-            std::forward<man_to_woman_at>(man_to_woman), //
-            std::forward<woman_to_man_at>(woman_to_man), //
-            std::forward<executor_at>(executor),         //
-            std::forward<progress_at>(progress));
-    }
-
-    struct clustering_result_t {
-        error_t error{};
-        std::size_t clusters{};
-        std::size_t visited_members{};
-        std::size_t computed_distances{};
-
-        explicit operator bool() const noexcept { return !error; }
-        clustering_result_t failed(error_t message) noexcept {
-            error = std::move(message);
-            return std::move(*this);
-        }
-    };
-
-    /**
-     *  @brief  Implements clustering, classifying the given objects (vectors of member keys)
-     *          into a given number of clusters.
-     *
-     *  @param[in] queries_begin Iterator pointing to the first query.
-     *  @param[in] queries_end Iterator pointing to the last query.
-     *  @param[in] executor Thread-pool to execute the job in parallel.
-     *  @param[in] progress Callback to report the execution progress.
-     *  @param[in] config Configuration parameters for clustering.
-     *
-     *  @param[out] cluster_keys Pointer to the array where the cluster keys will be exported.
-     *  @param[out] cluster_distances Pointer to the array where the distances to those centroids will be exported.
-     */
-    template <                                   //
-        typename queries_iterator_at,            //
-        typename executor_at = dummy_executor_t, //
-        typename progress_at = dummy_progress_t  //
-        >
-    clustering_result_t cluster(                //
-        queries_iterator_at queries_begin,      //
-        queries_iterator_at queries_end,        //
-        index_dense_clustering_config_t config, //
-        vector_key_t* cluster_keys,             //
-        distance_t* cluster_distances,          //
-        executor_at&& executor = executor_at{}, //
-        progress_at&& progress = progress_at{}) {
-
-        std::size_t const queries_count = queries_end - queries_begin;
-
-        // Find the first level (top -> down) that has enough nodes to exceed `config.min_clusters`.
-        std::size_t level = max_level();
-        if (config.min_clusters) {
-            for (; level > 1; --level) {
-                if (stats(level).nodes > config.min_clusters)
-                    break;
-            }
-        } else
-            level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2;
-
-        clustering_result_t result;
-        if (max_level() < 2)
-            return result.failed("Index too small to cluster!");
-
-        // A structure used to track the popularity of a specific cluster
-        struct cluster_t {
-            vector_key_t centroid;
-            vector_key_t merged_into;
-            std::size_t popularity;
-            byte_t* vector;
-        };
-
-        auto centroid_id = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
-        auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
-
-        std::atomic<std::size_t> visited_members(0);
-        std::atomic<std::size_t> computed_distances(0);
-        std::atomic<char const*> atomic_error{nullptr};
-
-        using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
-        using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
-        buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
-        if (!clusters)
-            return result.failed("Out of memory!");
-
-    map_to_clusters:
-        // Concurrently perform search until a certain depth
-        executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
-            auto result = cluster(queries_begin[query_idx], level, thread_idx);
-            if (!result) {
-                atomic_error = result.error.release();
-                return false;
-            }
-
-            cluster_keys[query_idx] = result.cluster.member.key;
-            cluster_distances[query_idx] = result.cluster.distance;
-
-            // Export in case we need to refine afterwards
-            clusters[query_idx].centroid = result.cluster.member.key;
-            clusters[query_idx].vector = vectors_lookup_[result.cluster.member.slot];
-            clusters[query_idx].merged_into = free_key();
-            clusters[query_idx].popularity = 1;
-
-            visited_members += result.visited_members;
-            computed_distances += result.computed_distances;
-            return true;
-        });
-
-        if (atomic_error)
-            return result.failed(atomic_error.load());
-
-        // Now once we have identified the closest clusters,
-        // we can try reducing their quantity, refining
-        std::sort(clusters.begin(), clusters.end(), centroid_id);
-
-        // Transform into run-length encoding, computing the number of unique clusters
-        std::size_t unique_clusters = 0;
-        {
-            std::size_t last_idx = 0;
-            for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
-                if (clusters[last_idx].centroid == clusters[current_idx].centroid) {
-                    clusters[last_idx].popularity++;
-                } else {
-                    last_idx++;
-                    clusters[last_idx] = clusters[current_idx];
-                }
-            }
-            unique_clusters = last_idx + 1;
-        }
-
-        // In some cases the queries may be co-located, all mapping into the same cluster on that
-        // level. In that case we refine the granularity and dive deeper into clusters:
-        if (unique_clusters < config.min_clusters && level > 1) {
-            level--;
-            goto map_to_clusters;
-        }
-
-        std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity);
-
-        // If clusters are too numerous, merge the ones that are too close to each other.
-        std::size_t merge_cycles = 0;
-    merge_nearby_clusters:
-        if (unique_clusters > config.max_clusters) {
-
-            cluster_t& merge_source = clusters[unique_clusters - 1];
-            std::size_t merge_target_idx = 0;
-            distance_t merge_distance = std::numeric_limits<distance_t>::max();
-
-            for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) {
-                distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector);
-                if (distance < merge_distance) {
-                    merge_distance = distance;
-                    merge_target_idx = candidate_idx;
-                }
-            }
-
-            merge_source.merged_into = clusters[merge_target_idx].centroid;
-            clusters[merge_target_idx].popularity += exchange(merge_source.popularity, 0);
-
-            // The target object may have to be swapped a few times to get to optimal position.
-            while (merge_target_idx &&
-                   clusters[merge_target_idx - 1].popularity < clusters[merge_target_idx].popularity)
-                std::swap(clusters[merge_target_idx - 1], clusters[merge_target_idx]), --merge_target_idx;
-
-            unique_clusters--;
-            merge_cycles++;
-            goto merge_nearby_clusters;
-        }
-
-        // Replace evicted clusters
-        if (merge_cycles) {
-            // Sort dropped clusters by name to accelerate future lookups
-            auto clusters_end = clusters.data() + config.max_clusters + merge_cycles;
-            std::sort(clusters.data(), clusters_end, centroid_id);
-
-            executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
-                vector_key_t& cluster_key = cluster_keys[query_idx];
-                distance_t& cluster_distance = cluster_distances[query_idx];
-
-                // Recursively trace replacements of that cluster
-                while (true) {
-                    // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
-                    cluster_t updated_cluster;
-                    updated_cluster.centroid = cluster_key;
-                    updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, centroid_id);
-                    if (updated_cluster.merged_into == free_key())
-                        break;
-                    cluster_key = updated_cluster.merged_into;
-                }
-
-                cluster_distance = distance_between(cluster_key, queries_begin[query_idx], thread_idx).mean;
-                return true;
-            });
-        }
-
-        result.computed_distances = computed_distances;
-        result.visited_members = visited_members;
-
-        (void)progress;
-        return result;
-    }
-
   private:
     struct thread_lock_t {
         index_dense_gt const& parent;
@@ -1745,12 +1385,7 @@ class index_dense_gt {
         auto on_success = [&](member_ref_t member) {
             unique_lock_t slot_lock(slot_lookup_mutex_);
             slot_lookup_.try_emplace(key_and_slot_t{key, static_cast<compressed_slot_t>(member.slot)});
-            if (copy_vector) {
-                if (!reuse_node)
-                    vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector());
-                std::memcpy(vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector());
-            } else
-                vectors_lookup_[member.slot] = (byte_t*)vector_data;
+            storage_.set_vector_at(member.slot, vector_data, metric_.bytes_per_vector(), copy_vector, reuse_node);
         };
 
         index_update_config_t update_config;
@@ -1839,7 +1474,7 @@ class index_dense_gt {
 
         while (key_range.first != key_range.second) {
             key_and_slot_t key_and_slot = *key_range.first;
-            byte_t const* a_vector = vectors_lookup_[key_and_slot.slot];
+            byte_t const* a_vector = storage_.get_vector_at(key_and_slot.slot);
             byte_t const* b_vector = vector_data;
             distance_t a_b_distance = metric_(a_vector, b_vector);
 
@@ -1900,7 +1535,7 @@ class index_dense_gt {
                 slot = (*it).slot;
             }
             // Export the entry
-            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.get_vector_at(slot));
             bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed);
             if (!casted)
                 std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector());
@@ -1913,7 +1548,7 @@ class index_dense_gt {
                  begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) {
                 //
                 compressed_slot_t slot = (*begin).slot;
-                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(storage_.get_vector_at(slot));
                 byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported;
                 bool casted = cast(punned_vector, dimensions(), reconstructed_vector);
                 if (!casted)
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 660d9d52..aac30199 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -50,8 +50,10 @@
 #define SIMSIMD_NATIVE_F16 !USEARCH_USE_FP16LIB
 
 #if !defined(SIMSIMD_TARGET_X86_AVX512) && defined(USEARCH_DEFINED_LINUX)
+#if defined(__AVX512F__) && defined(__AVX512FP16__) && defined(__AVX512VNNI__) && defined(__AVX512VPOPCNTDQ__)
 #define SIMSIMD_TARGET_X86_AVX512 1
 #endif
+#endif
 
 #if !defined(SIMSIMD_TARGET_ARM_SVE) && defined(USEARCH_DEFINED_LINUX)
 #define SIMSIMD_TARGET_ARM_SVE 1
diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp
new file mode 100644
index 00000000..a1f7c044
--- /dev/null
+++ b/include/usearch/std_storage.hpp
@@ -0,0 +1,343 @@
+
+#pragma once
+
+#include <deque>
+#include <mutex>
+#include <usearch/index.hpp>
+#include <usearch/index_plugins.hpp>
+#include <usearch/storage.hpp>
+
+namespace unum {
+namespace usearch {
+
+/**
+ * @brief  A simple Storage implementation that uses standard cpp containers and complies with the usearch storage
+ *abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      The allocated buffers may be uninitialized.
+ *      Note that we are using a memory aaligned allocator in place of std::allocator<byte_t>
+ *      Because of scalar_t memory requirements in index_*
+ *
+ **/
+template <typename key_at, typename compressed_slot_at, typename allocator_at = aligned_allocator_gt<byte_t, 64>> //
+class std_storage_at {
+  public:
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
+    using span_bytes_t = span_gt<byte_t>;
+
+  private:
+    using nodes_t = std::vector<node_t>;
+    using vectors_t = std::vector<span_bytes_t>;
+
+    nodes_t nodes_{};
+    vectors_t vectors_{};
+    precomputed_constants_t pre_{};
+    allocator_at allocator_{};
+    static_assert(!has_reset<allocator_at>(), "reset()-able memory allocators not supported for this storage provider");
+    memory_mapped_file_t viewed_file_{};
+    mutable std::deque<std::mutex> locks_{};
+    // the next three are used only in serialization/deserialization routines to know how to serialize vectors
+    // since this is only for serde/vars are marked mutable to still allow const-ness of saving method interface on
+    // storage instance
+    mutable size_t node_count_{};
+    mutable size_t vector_size_{};
+    // defaulted to true because that is what test.cpp assumes when using this storage directly
+    mutable bool exclude_vectors_ = true;
+    // used to maintain proper alignment in stored indexes to make sure view() does not result in misaligned accesses
+    mutable size_t file_offset_{};
+
+    // used in place of error handling throughout the class
+    static void expect(bool must_be_true) {
+        if (!must_be_true)
+            throw std::runtime_error("Failed!");
+    }
+    // padding buffer, some prefix of which will be used every time we need padding in the serialization
+    // of the index.
+    // Rest of the array will be zeros but we will also never need paddings that large
+    // The pattern is to help in debugging
+    constexpr static byte_t padding_buffer[64] = {0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42};
+
+    template <typename A, typename T> size_t align(T v) const {
+        return (sizeof(A) - (size_t)v % sizeof(A)) % sizeof(A);
+    }
+
+    template <typename T> size_t align4(T v) const { return align<float>(v); }
+
+  public:
+    std_storage_at(index_config_t config, allocator_at allocator = {})
+        : pre_(node_t::precompute_(config)), allocator_(allocator) {}
+
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_[idx].data(); }
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+    bool is_immutable() const noexcept { return bool(viewed_file_); }
+
+    /* To get a single-threaded implementation of storage with no locking, replace lock_type
+     *  with the following and return dummy_lock{} from node_lock()
+     *      struct dummy_lock {
+     *          // destructor necessary to avoid "unused variable warning"
+     *          // at callcites of node_lock()
+     *          ~dummy_lock() = default;
+     *      };
+     *      using lock_type = dummy_lock;
+     */
+    using lock_type = std::unique_lock<std::mutex>;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size())
+            return true;
+        nodes_.resize(count);
+        vectors_.resize(count);
+        locks_.resize(count);
+        return true;
+    }
+    void clear() noexcept {
+        if (!is_immutable()) {
+            std::size_t n = nodes_.size();
+            for (std::size_t i = 0; i != n; ++i) {
+                // we do not know which slots have been filled and which ones - no
+                // so we iterate over full reserved space
+                if (nodes_[i])
+                    node_free(i, nodes_[i]);
+            }
+            n = vectors_.size();
+            for (std::size_t i = 0; i != n; ++i) {
+                span_bytes_t v = vectors_[i];
+                if (v.data()) {
+                    allocator_.deallocate(v.data(), v.size());
+                }
+            }
+        }
+        if (vectors_.data())
+            std::fill(vectors_.begin(), vectors_.end(), span_bytes_t{});
+        if (nodes_.data())
+            std::fill(nodes_.begin(), nodes_.end(), node_t{});
+    }
+    void reset() noexcept { clear(); }
+
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
+    void set_vector_at(size_t slot, const byte_t* vector_data, size_t vector_size, bool copy_vector, bool reuse_node) {
+
+        usearch_assert_m(!(reuse_node && !copy_vector),
+                         "Cannot reuse node when not copying as there is no allocation needed");
+        if (copy_vector) {
+            if (!reuse_node)
+                vectors_[slot] = span_bytes_t{allocator_.allocate(vector_size), vector_size};
+            std::memcpy(vectors_[slot].data(), vector_data, vector_size);
+        } else
+            vectors_[slot] = span_bytes_t{(byte_t*)vector_data, vector_size};
+    }
+
+    allocator_at const& node_allocator() const noexcept { return allocator_; }
+
+    inline lock_type node_lock(std::size_t i) const noexcept { return std::unique_lock(locks_[i]); }
+
+    // serialization
+
+    template <typename output_callback_at, typename vectors_metadata_at>
+    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes,
+                                                  std::uint64_t node_count, //
+                                                  const vectors_metadata_at& metadata_buffer,
+                                                  serialization_config_t config = {}) const {
+        expect(!config.use_64_bit_dimensions);
+        expect(output(metadata_buffer, sizeof(metadata_buffer)));
+
+        file_offset_ = sizeof(metadata_buffer);
+        vector_size_ = vector_size_bytes;
+        node_count_ = node_count;
+        exclude_vectors_ = config.exclude_vectors;
+        return {};
+    }
+
+    template <typename output_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
+                                                progress_at& = {}) const {
+        expect(output(&header, sizeof(header)));
+        expect(output(&vector_size_, sizeof(vector_size_)));
+        expect(output(&node_count_, sizeof(node_count_)));
+        file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_);
+        // Save node levels, for offset calculation
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_t node = get_node_at(i);
+            level_t level = node.level();
+            expect(output(&level, sizeof(level)));
+        }
+
+        file_offset_ += header.size * sizeof(level_t);
+
+        // After that dump the nodes themselves
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
+            expect(output(node_bytes.data(), node_bytes.size()));
+            file_offset_ += node_bytes.size();
+            if (!exclude_vectors_) {
+                // add padding for proper alignment
+                int16_t padding_size = align4(file_offset_);
+                expect(output(&padding_buffer, padding_size));
+                file_offset_ += padding_size;
+                byte_t* vector_bytes = get_vector_at(i);
+                expect(output(vector_bytes, vector_size_));
+                file_offset_ += vector_size_;
+            }
+        }
+        return {};
+    }
+
+    template <typename input_callback_at, typename vectors_metadata_at>
+    serialization_result_t load_vectors_from_stream(input_callback_at& input, //
+                                                    vectors_metadata_at& metadata_buffer,
+                                                    serialization_config_t config = {}) {
+        expect(!config.use_64_bit_dimensions);
+        expect(input(metadata_buffer, sizeof(metadata_buffer)));
+        file_offset_ = sizeof(metadata_buffer);
+        exclude_vectors_ = config.exclude_vectors;
+        return {};
+    }
+
+    template <typename input_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
+                                                  progress_at& = {}) noexcept {
+        byte_t in_padding_buffer[64] = {0};
+        expect(input(&header, sizeof(header)));
+        expect(input(&vector_size_, sizeof(vector_size_)));
+        expect(input(&node_count_, sizeof(node_count_)));
+        file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_);
+        if (!header.size) {
+            reset();
+            return {};
+        }
+        buffer_gt<level_t> levels(header.size);
+        expect(levels);
+        expect(input(levels, header.size * sizeof(level_t)));
+        expect(reserve(header.size));
+
+        file_offset_ += header.size * sizeof(level_t);
+        // Load the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = node_malloc(levels[i]);
+            expect(input(node_bytes.data(), node_bytes.size()));
+            file_offset_ += node_bytes.size();
+            node_store(i, node_t{node_bytes.data()});
+            if (!exclude_vectors_) {
+                int16_t padding_size = align4(file_offset_);
+                expect(input(&in_padding_buffer, padding_size));
+                file_offset_ += padding_size;
+                expect(std::memcmp(in_padding_buffer, padding_buffer, padding_size) == 0);
+                byte_t* vector_bytes = allocator_.allocate(vector_size_);
+                expect(input(vector_bytes, vector_size_));
+                file_offset_ += vector_size_;
+                set_vector_at(i, vector_bytes, vector_size_, false, false);
+            }
+        }
+        return {};
+    }
+
+    template <typename vectors_metadata_at>
+    serialization_result_t view_vectors_from_file(
+        memory_mapped_file_t& file, //
+                                    //// todo!! document that offset is a reference, or better - do not do it this way
+        vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
+        reset();
+        exclude_vectors_ = config.exclude_vectors;
+        expect(!config.use_64_bit_dimensions);
+
+        expect(bool(file.open_if_not()));
+        std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
+        file_offset_ = sizeof(metadata_buffer);
+        offset += sizeof(metadata_buffer);
+        return {};
+    }
+
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header,
+                                                std::size_t offset = 0, progress_at& = {}) noexcept {
+        serialization_result_t result = file.open_if_not();
+        std::memcpy(&header, file.data() + offset, sizeof(header));
+        offset += sizeof(header);
+        std::memcpy(&vector_size_, file.data() + offset, sizeof(vector_size_));
+        offset += sizeof(vector_size_);
+        std::memcpy(&node_count_, file.data() + offset, sizeof(node_count_));
+        offset += sizeof(node_count_);
+        if (!header.size) {
+            reset();
+            return result;
+        }
+        index_config_t config;
+        config.connectivity = header.connectivity;
+        config.connectivity_base = header.connectivity_base;
+        pre_ = node_t::precompute_(config);
+        buffer_gt<std::size_t> offsets(header.size);
+        expect(offsets);
+        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset};
+        offset += sizeof(level_t) * header.size;
+        offsets[0u] = offset;
+        for (std::size_t i = 1; i < header.size; ++i) {
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
+            if (!exclude_vectors_) {
+                // add room for vector alignment
+                offsets[i] += align4(offsets[i]);
+                offsets[i] += vector_size_;
+            }
+        }
+        expect(reserve(header.size));
+
+        // Rapidly address all the nodes and vectors
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
+            expect(node_size_bytes(i) == node_t::node_size_bytes(pre_, levels[i]));
+
+            if (!exclude_vectors_) {
+                size_t vector_offset = offsets[i] + node_size_bytes(i);
+                expect(std::memcmp((byte_t*)file.data() + vector_offset, padding_buffer, align4(vector_offset)) == 0);
+                vector_offset += align4(vector_offset);
+
+                // expect proper alignment
+                expect(align4(vector_offset) == 0);
+                expect(align4((byte_t*)file.data() + vector_offset) == 0);
+                set_vector_at(i, (byte_t*)file.data() + vector_offset, vector_size_, false, false);
+            }
+        }
+        viewed_file_ = std::move(file);
+        return {};
+    }
+};
+
+using default_std_storage_t = std_storage_at<default_key_t, default_slot_t>;
+ASSERT_VALID_STORAGE(default_std_storage_t);
+
+} // namespace usearch
+} // namespace unum
diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp
new file mode 100644
index 00000000..30f82f35
--- /dev/null
+++ b/include/usearch/storage.hpp
@@ -0,0 +1,826 @@
+#pragma once
+
+#include <usearch/index.hpp>
+#include <usearch/index_plugins.hpp>
+
+namespace unum {
+namespace usearch {
+
+/**
+ *  @brief  This macro, `HAS_FUNCTION_TEMPLATE`, is a utility to check at
+ *  compile-time whether a given type (CHECK_AT) has a member function with a specific name (NAME_AK), signature
+ *  (SIGNATURE_AT=return_at(args_at...)), constness (CONST_AK=const|[empty]), and exception specification
+ *  (NOEXCEPT_AK=true|false).
+ *
+ *  It is based on has_reset_gt template:
+ *    1. Replace declval<at> with declval<OPTIONAL_CONST at&> to enforce function const-ness
+ *     method: https://stackoverflow.com/questions/30407754/how-to-test-if-a-method-is-const
+ *    2. Replace .reset with dynamic NAME_AK to support methods with other names
+ *    3. Add option to enforce noexcept
+ *  method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method
+ *
+ *  @param[in] CHECK_AT Placeholder type used within the template instantiation to denote the type to be checked.
+ *  @param[in] NAME_AK Name of the member function to be checked for. This name is incorporated in the generated
+ *  structure's name and used in the check.
+ *  @param[in] SIGNATURE_AT Placeholder for the function signature, employed in specializing the template for function
+ *  types.
+ *  @param[in] CONST_AK Indicates if the member function should be a const function. This forms part of the function
+ *  call signature within the check.
+ *  @param[in] NOEXCEPT_AK Indicates if the member function should be noexcept. This affects the check, particularly
+ *  important for ensuring exception safety in certain contexts.
+ *
+ *  generates a structure structure named `has_##NAME_AK##_gt` with a static constexpr boolean member `value`. This
+ *  member is true if the specified type has a member function that matches the name, signature, constness, and noexcept
+ *  status provided in the macro's arguments. Otherwise, it is false.
+ *
+ *  @example
+ *  Suppose you have a class `Foo` with that has an interface requirement of a const noexcept member function `bar` that
+ *  returns an `int` and takes a `const double`. To enforce the interface requirement, if this function exists, is
+ *  const, and noexcept, you would instantiate the generated template like so:
+ *  ```cpp
+ * struct Foo {
+ *    // CHECK CATCHES: expected double, got double*
+ *    // int bar(const double*) const noexcept { return 42; }
+ *    // CHECK CATCHES: wrong const-ness
+ *    // int bar(const double) noexcept { return 42; }
+ *    // CHECK CATCHES: wrong excempt-ness
+ *    // int bar(const double) const { return 42; }
+ *    // CHECK CATCHES because required int can be cast to double
+ *    // double bar(const double) const noexcept { return 42; }
+ *    // CHECK CATHCES wrong returned value
+ *    // int* bar(const double) const noexcept { return nullptr; }
+ *    // CHECK CATHCES wrong signature
+ *    // int bar(const double, int) const { return 42; }
+ *    //
+ *    // SUCCESS! the invariant  we wanted
+ *
+ *    int bar(const double) const noexcept { return 42; }
+ *
+ *    //
+ *    // Some PROBLEMS
+ *    // CHECK **DOES NOT** CATCH. assertion succeeds
+ *    // int bar(const double&) const noexcept { return 42; }
+ *    // CHECK **DOES NOT** CATCH. assertion succeeds
+ *    // int bar(const double&&) const noexcept { return 42; }
+ * };
+ *
+ * HAS_FUNCTION_TEMPLATE(Foo, bar, int(const double), const, true);
+ * static_assert(has_bar_gt<Foo, int(double)>::value);
+ *  ```
+ * If `Foo` indeed has a const noexcept member function `bar` matching this signature, the static assertion succeeds
+ * Otherwise, it will cause a compile failure
+ */
+#define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
+    template <typename, typename at> struct has_##NAME_AK##_gt {                                                       \
+        static_assert(std::integral_constant<at, false>::value,                                                        \
+                      "Second template parameter needs to be of function type.");                                      \
+    };                                                                                                                 \
+                                                                                                                       \
+    template <typename check_at, typename return_at, typename... args_at>                                              \
+    struct has_##NAME_AK##_gt<check_at, return_at(args_at...)> {                                                       \
+      private:                                                                                                         \
+        template <typename at>                                                                                         \
+        static constexpr auto check(at*) ->                                                                            \
+            typename std::is_same<decltype(std::declval<CONST_AK at&>().NAME_AK(std::declval<args_at>()...)),          \
+                                  return_at>::type;                                                                    \
+        template <typename> static constexpr std::false_type check(...);                                               \
+                                                                                                                       \
+        template <typename at> static constexpr bool f_is_noexcept(at*) {                                              \
+            return noexcept(std::declval<CONST_AK at&>().NAME_AK(std::declval<args_at>()...));                         \
+        }                                                                                                              \
+                                                                                                                       \
+        typedef decltype(check<check_at>(0)) type;                                                                     \
+                                                                                                                       \
+      public: /*                                    if NOEXCEPT_AK then f_is_noexcept(0)    */                         \
+        static constexpr bool value = type::value && (!NOEXCEPT_AK || f_is_noexcept<check_at>(0));                     \
+    };
+
+/**
+ * This is a wrapper around the macro above that allows getting less cryptic error messages
+ * in particular, it:
+ * 1. Wraps the defined template in a unique namespace to avoid collisions. If this ends up being used elsewhere,
+ *    probably it would be worth it to add a __FILE__ prefix to the namespace name as well
+ * 2. Regarless of the requrement, it runs signature check without taking into account const-ness and exception
+ *    requirement.
+ * 3. Only after the initial signature check succeeds, it takes into acount const and noexcept and runs relevant checks,
+ * printing descriptive error messages is the constraints are not satisfied
+ *
+ * The macro takes the same parameters as the one above
+ **/
+#define ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                 \
+    /************ check function signature without const or noexcept*/                                                 \
+    namespace CHECK_AT##__##NAME_AK {                                                                                  \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, , false)                                                \
+    }                                                                                                                  \
+    static_assert(CHECK_AT##__##NAME_AK::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,                            \
+                  " Function \"" #CHECK_AT "::" #NAME_AK                                                               \
+                  "\" does not exist or does not satisfy storage API signature");                                      \
+    /************ check function signature with const requirement but without noexcept*/                               \
+    namespace CHECK_AT##__##NAME_AK##_const {                                                                          \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, false)                                        \
+    }                                                                                                                  \
+    static_assert(CHECK_AT##__##NAME_AK##_const::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,                    \
+                  " Function \"" #CHECK_AT "::" #NAME_AK                                                               \
+                  "\" exists but does not satisfy const-requirement of storage API");                                  \
+    /************ check function signature with const and noexcept requirements */                                     \
+    namespace CHECK_AT##__##NAME_AK##_const_noexcept {                                                                 \
+        HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK)                                  \
+    }                                                                                                                  \
+    static_assert(                                                                                                     \
+        !NOEXCEPT_AK || CHECK_AT##__##NAME_AK##_const_noexcept::has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value,     \
+        " Function \"" #CHECK_AT "::" #NAME_AK "\" exists but does not satisfy noexcept requirement of storage API")
+
+/** Various commonly used shortcusts for the assertion macro above
+ * Note: NOCONST in comments indicates intentional lack of const qualifier
+ **/
+#define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                           \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, false)
+#define ASSERT_HAS_CONST_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                     \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, false)
+#define ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                                  \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, true)
+#define ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT)                                            \
+    ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, true)
+
+#define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt<CHECK_AT, SIGNATURE_AT>::value
+
+/**
+ *  @brief  An example of what a USearch-Storage-compatible output callback should look like.
+ *  The callback is called to store arbitrarily serialized usearch index data in the underlying
+ *  storage medium managed in the callback implementation
+ *
+ */
+struct dummy_output_callback_t {
+    inline bool operator()(const void* /*source memory*/, std::size_t /*size of the source*/) { return true; }
+};
+
+/**
+ *  @brief  An example of what a USearch-Storage-compatible input callback should look like.
+ *  The callback is called to read arbitrarily serialized usearch index data from the underlying
+ *  storage medium managed in the callback implementation
+ *
+ */
+struct dummy_input_callback_t {
+    inline bool operator()(void* /*destination memory*/, std::size_t /*size of the destination*/) { return true; }
+};
+
+/**
+ *  @brief  A dummy metadata buffer used in serialization/deserialization API checks below
+ *  An actual index implementation might need to keep some app-level constants in here to be serialized on the
+ *  stored index binary, but we do not need its structure for type-checking
+ *
+ */
+struct dummy_vectors_metadata_buffer_t {};
+
+struct index_dense_serialization_config_t {
+    // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
+    bool exclude_vectors = false;
+    bool use_64_bit_dimensions = false;
+};
+
+using serialization_config_t = index_dense_serialization_config_t;
+
+/**
+ * @brief The macro takes in a usearch Storage-provider type, and makes sure the type provides the necessary interface
+ * assumed in usearch internals N.B: the validation does notenforce reference argument types properly Validation
+ *succeeds even when in the sertions below an interface is required to take a reference type but the actual
+ *implementation takes a copy
+ **/
+#define ASSERT_VALID_STORAGE(CHECK_AT)                                                                                 \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx));                               \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx));                                      \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx));                                \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool());                                                \
+                                                                                                                       \
+    /*Container methods */                                                                                             \
+    ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count));                                                   \
+    ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void());                                                             \
+    ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void());                                                             \
+    /*Setters*/                                                                                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_malloc, CHECK_AT::span_bytes_t(level_t level));                                 \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_free, void(std::size_t slot, CHECK_AT::node_t node));                           \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_make, CHECK_AT::node_t(CHECK_AT::key_t key, level_t level));                    \
+    ASSERT_HAS_FUNCTION(CHECK_AT, node_store, void(std::size_t slot, CHECK_AT::node_t node));                          \
+    ASSERT_HAS_FUNCTION(CHECK_AT, set_vector_at,                                                                       \
+                        void(std::size_t idx, const byte_t* vector_data, std::size_t vector_bytes, bool copy_vector,   \
+                             bool reuse_node));                                                                        \
+    /*Locking*/                                                                                                        \
+    ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx));                     \
+    /*Save/Restore API enforcement*/                                                                                   \
+    ASSERT_HAS_FUNCTION(CHECK_AT, save_vectors_to_stream,                                                              \
+                        serialization_result_t(                                                                        \
+                            dummy_output_callback_t& cb, std::size_t vector_size_bytes, std::uint64_t node_count,      \
+                            const dummy_vectors_metadata_buffer_t& metadata_buffer, serialization_config_t config));   \
+    ASSERT_HAS_CONST_FUNCTION(CHECK_AT, save_nodes_to_stream,                                                          \
+                              serialization_result_t(dummy_output_callback_t& cb,                                      \
+                                                     const index_serialized_header_t& header,                          \
+                                                     dummy_progress_t& progress));                                     \
+    ASSERT_HAS_FUNCTION(CHECK_AT, load_vectors_from_stream,                                                            \
+                        serialization_result_t(dummy_input_callback_t& cb,                                             \
+                                               const dummy_vectors_metadata_buffer_t& metadata_buffer,                 \
+                                               serialization_config_t config));                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, load_nodes_from_stream,                                                              \
+                        serialization_result_t(dummy_input_callback_t& cb, index_serialized_header_t& header,          \
+                                               dummy_progress_t& progress));                                           \
+                                                                                                                       \
+    /* View from file API*/                                                                                            \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_vectors_from_file,                                                              \
+                        serialization_result_t(memory_mapped_file_t& file,                                             \
+                                               dummy_vectors_metadata_buffer_t& metadata_buffer, std::size_t& offset,  \
+                                               serialization_config_t config));                                        \
+    ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_file,                                                                \
+                        serialization_result_t(memory_mapped_file_t file, index_serialized_header_t& metadata_buffer,  \
+                                               std::size_t& offset, dummy_progress_t& progress));                      \
+    static_assert(true, "this is to require a semicolon at the end of macro call")
+
+/** I initially used this abstract class as a way to enforce storage API but ran into several limitations mentioned
+ * below I switched to macro+template based approach in the end, but left this around, in case there are ways to work
+ * around the issues below that I am not aware of.
+ **/
+template <typename key_at, typename compressed_slot_at, //
+          typename tape_allocator_at,                   //
+          typename vectors_allocator_at,                //
+          typename dynamic_allocator_at>                //
+class storage_interface {
+  public:
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
+    // storage_interface(index_config_t conig, tape_allocator_at allocator = {});
+
+    struct lock_type;
+
+    // q:: ask-Ashot can I enforce this interface function in inherited storages somehow?
+    // currently impossible because
+    // 1. can do virtual constexpr after c++2a
+    // 2. making this virtual would enforce this particular lock_type struct as the return type,
+    //  and not an equivalently named one in the child class
+    // I currently enforice it via macros
+    constexpr inline lock_type node_lock(std::size_t slot) const noexcept;
+
+    virtual inline node_t get_node_at(std::size_t idx) const noexcept = 0;
+    virtual inline std::size_t node_size_bytes(std::size_t idx) const noexcept = 0;
+    virtual inline byte_t* get_vector_at(std::size_t idx) const noexcept = 0;
+
+    inline void set_at(std::size_t idx, node_t node, byte_t* vector_data, std::size_t vector_size, bool reuse_node);
+
+    // the following functions take template arguments so cannot be type-enforced via virtual function inheritence
+    // as far as I can tell.
+    // virtual void load_vectors_from_stream() = 0;
+    // virtual void load_nodes_from_stream() = 0;
+
+    // serialization_result_t save_vectors_to_stream() const;
+    // serialization_result_t save_nodes_to_stream() const;
+
+    // serialization_result_t view_vectors_from_file() const;
+    // serialization_result_t view_nodes_from_file() const;
+
+    virtual std::size_t size() const noexcept = 0;
+    virtual bool reserve(std::size_t count) = 0;
+    virtual void clear() noexcept = 0;
+    virtual void reset() noexcept = 0;
+    std::size_t memory_usage();
+};
+
+/*Default allocators for storage_v2 */
+using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+using tape_allocator_t = memory_mapping_allocator_gt<64>;
+using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>;
+/**
+ * @brief   Storage abstraction for HNSW graph and associated vector data
+ *
+ *  @tparam key_at
+ *      The type of primary objects stored in the index.
+ *      The values, to which those map, are not managed by the same index structure.
+ *
+ *  @tparam compressed_slot_at
+ *      The smallest unsigned integer type to address indexed elements.
+ *      It is used internally to maximize space-efficiency and is generally
+ *      up-casted to @b `std::size_t` in public interfaces.
+ *      Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`.
+ *      Which makes the most sense for 4B+ entry indexes.
+ *
+ *  @tparam tape_allocator_at
+ *      Potentially different memory allocator for primary allocations of nodes and vectors.
+ *      It would never `deallocate` separate entries, and would only free all the space at once.
+ *      The allocated buffers may be uninitialized.
+ *
+ * NOTE:
+ * The class below used to inherit from storage_interface via:
+ * class storage_v2 : public storage_interface<key_at, compressed_slot_at, tape_allocator_at, vectors_allocator_at,
+ *                                            dynamic_allocator_at>
+ * I disabled inheritence for now as interface compatibility is more
+ * thoroughly enforced via the macros at the beginning of this file
+ **/
+template <typename key_at, typename compressed_slot_at,             //
+          typename tape_allocator_at = tape_allocator_t,            //
+          typename vectors_allocator_at = vectors_tape_allocator_t, //
+          typename dynamic_allocator_at = dynamic_allocator_t>      //
+class storage_v2_at {
+  public:
+    using key_t = key_at;
+    using node_t = node_at<key_t, compressed_slot_at>;
+
+  private:
+    using nodes_mutexes_t = bitset_gt<dynamic_allocator_at>;
+    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_at>;
+    using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
+    using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>;
+    using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
+    using vectors_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<byte_t*>;
+    using nodes_t = buffer_gt<node_t, nodes_allocator_t>;
+    // todo:: ask-Ashot: in the older version vectors_lookup_ was using the default vector allocator,
+    // and not the dynamic_allocator_at that was passed it.
+    // Can remove this if the previous approach was intentional
+    // Update (Jan 10): It seems giving vectors_allocator_t as vectors_t
+    // allocator below only works when CMAKE_HAVE_LIBC_PTHREAD is false
+    // otherwise, I get a compile error
+    using vectors_t = std::vector<byte_t*>;
+
+    /// @brief  C-style array of `node_t` smart-pointers.
+    // buffer_gt<node_t, nodes_allocator_t> nodes_{};
+
+    nodes_t nodes_{};
+
+    /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy.
+    vectors_t vectors_lookup_{};
+    /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    mutable nodes_mutexes_t nodes_mutexes_{};
+    precomputed_constants_t pre_{};
+    tape_allocator_at tape_allocator_{};
+    /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars.
+    vectors_allocator_at vectors_allocator_{};
+
+    std::uint64_t matrix_rows_ = 0;
+    std::uint64_t matrix_cols_ = 0;
+    bool vectors_loaded_{};
+    memory_mapped_file_t viewed_file_{};
+    using tape_allocator_traits_t = std::allocator_traits<tape_allocator_at>;
+    static_assert(                                                 //
+        sizeof(typename tape_allocator_traits_t::value_type) == 1, //
+        "Tape allocator must allocate separate addressable bytes");
+
+    struct node_lock_t {
+        nodes_mutexes_t& mutexes;
+        std::size_t slot;
+        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
+    };
+
+  public:
+    storage_v2_at(index_config_t config, tape_allocator_at tape_allocator = {})
+        : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {}
+
+    inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; }
+    // todo:: most of the time this is called for const* vector, maybe add a separate interface for const?
+    inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; }
+    inline void set_vector_at(std::size_t idx, const byte_t* vector_data, std::size_t bytes_per_vector,
+                              bool copy_vector, bool reuse_node) {
+        usearch_assert_m(!(reuse_node && !copy_vector),
+                         "Cannot reuse node when not copying as there is no allocation needed");
+        if (copy_vector) {
+            if (!reuse_node)
+                vectors_lookup_[idx] = vectors_allocator_.allocate(bytes_per_vector);
+            std::memcpy(vectors_lookup_[idx], vector_data, bytes_per_vector);
+        } else
+            vectors_lookup_[idx] = (byte_t*)vector_data;
+    }
+
+    inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); }
+    bool is_immutable() const noexcept { return bool(viewed_file_); }
+
+    using lock_type = node_lock_t;
+
+    bool reserve(std::size_t count) {
+        if (count < nodes_.size() && count < nodes_mutexes_.size())
+            return true;
+        nodes_mutexes_t new_mutexes(count);
+        nodes_t new_nodes(count);
+        if (!new_mutexes || !new_nodes)
+            return false;
+        if (nodes_)
+            std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * nodes_.size());
+
+        nodes_mutexes_ = std::move(new_mutexes);
+        nodes_ = std::move(new_nodes);
+        // todo:: make sure to only reserve this if vectors are not stored externally
+        // will probably need to pass the fact as storage config parameter
+        vectors_lookup_.resize(count);
+        return true;
+    }
+
+    void clear() noexcept {
+        if (!viewed_file_) {
+            if (!has_reset<tape_allocator_at>()) {
+                std::size_t n = nodes_.size();
+                for (std::size_t i = 0; i != n; ++i) {
+                    // we do not know which slots have been filled and which ones - no
+                    // so we iterate over full reserved space
+                    if (nodes_[i])
+                        node_free(i, nodes_[i]);
+                }
+            } else
+                tape_allocator_.deallocate(nullptr, 0);
+
+            if (!has_reset<vectors_allocator_at>()) {
+                std::size_t n = vectors_lookup_.size();
+                for (std::size_t i = 0; i != n; ++i) {
+                    if (vectors_lookup_[i])
+                        vectors_allocator_.deallocate(vectors_lookup_[i], matrix_cols_);
+                }
+            } else
+                tape_allocator_.deallocate(nullptr, 0);
+        }
+        std::fill(nodes_.begin(), nodes_.end(), node_t{});
+        viewed_file_ = {};
+    }
+
+    void reset() noexcept {
+        nodes_mutexes_ = {};
+        nodes_ = {};
+
+        vectors_lookup_.clear();
+        vectors_lookup_.shrink_to_fit();
+        viewed_file_ = {};
+    }
+
+    using span_bytes_t = span_gt<byte_t>;
+
+    span_bytes_t node_malloc(level_t level) noexcept {
+        std::size_t node_size = node_t::node_size_bytes(pre_, level);
+        byte_t* data = (byte_t*)tape_allocator_.allocate(node_size);
+        return data ? span_bytes_t{data, node_size} : span_bytes_t{};
+    }
+    void node_free(size_t slot, node_t node) {
+        tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_));
+        nodes_[slot] = node_t{};
+    }
+    node_t node_make(key_at key, level_t level) noexcept {
+        span_bytes_t node_bytes = node_malloc(level);
+        if (!node_bytes)
+            return {};
+
+        std::memset(node_bytes.data(), 0, node_bytes.size());
+        node_t node{(byte_t*)node_bytes.data()};
+        node.key(key);
+        node.level(level);
+        return node;
+    }
+
+    // node_t node_make_copy_(span_bytes_t old_bytes) noexcept {
+    //     byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size());
+    //     if (!data)
+    //         return {};
+    //     std::memcpy(data, old_bytes.data(), old_bytes.size());
+    //     return node_t{data};
+    // }
+
+    void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; }
+    tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; }
+    // dummy lock just to satisfy the interface
+    constexpr inline lock_type node_lock(std::size_t slot) const noexcept {
+        while (nodes_mutexes_.atomic_set(slot))
+            ;
+        return {nodes_mutexes_, slot};
+    }
+
+#pragma region Storage Serialization and Deserialization
+
+    /**
+     *  @brief  Saves serialized binary index vectors to a stream.
+     *  @param[in] output Output stream to which vectors will be saved to according to this storage format.
+     *  @param[in] metadata_buffer A buffer opaque to Storage, that will be serialized into output stream
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename output_callback_at, typename vectors_metadata_at>
+    serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes,
+                                                  std::uint64_t node_count, //
+                                                  const vectors_metadata_at& metadata_buffer,
+                                                  serialization_config_t config = {}) const {
+
+        serialization_result_t result;
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+
+        // We may not want to put the vectors into the same file
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                dimensions[0] = static_cast<std::uint32_t>(node_count);
+                dimensions[1] = static_cast<std::uint32_t>(vector_size_bytes);
+                if (!output(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to serialize into stream");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            } else {
+                std::uint64_t dimensions[2];
+                dimensions[0] = static_cast<std::uint64_t>(node_count);
+                dimensions[1] = static_cast<std::uint64_t>(vector_size_bytes);
+                if (!output(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to serialize into stream");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            }
+
+            // Dump the vectors one after another
+            for (std::uint64_t i = 0; i != matrix_rows; ++i) {
+                const byte_t* vector = get_vector_at(i);
+                if (!output(vector, matrix_cols))
+                    return result.failed("Failed to serialize into stream");
+            }
+        }
+
+        if (!output(&metadata_buffer, sizeof(metadata_buffer)))
+            return result.failed("Failed to read the index vector metadata");
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename output_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header,
+                                                progress_at& progress = {}) const {
+
+        serialization_result_t result;
+
+        if (!output(&header, sizeof(header)))
+            return result.failed("Failed to serialize the header into stream");
+
+        // Progress status
+        std::size_t processed = 0;
+        std::size_t const total = 2 * header.size;
+
+        // Export the number of levels per node
+        // That is both enough to estimate the overall memory consumption,
+        // and to be able to estimate the offsets of every entry in the file.
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_t node = get_node_at(i);
+            level_t level = node.level();
+            if (!output(&level, sizeof(level)))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
+
+        // After that dump the nodes themselves
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_);
+            if (!output(node_bytes.data(), node_bytes.size()))
+                return result.failed("Failed to serialize into stream");
+            if (!progress(++processed, total))
+                return result.failed("Terminated by user");
+        }
+        return result;
+    }
+
+    /**
+     *  @brief Parses the index from file to RAM.
+     *  @param[in] input Input stream from which vectors will be loaded according to this storage format.
+     *  @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be
+     *  loaded from input stream
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename input_callback_at, typename vectors_metadata_at>
+    serialization_result_t load_vectors_from_stream(input_callback_at& input, //
+                                                    vectors_metadata_at& metadata_buffer,
+                                                    serialization_config_t config = {}) {
+
+        reset();
+
+        // Infer the new index size
+        serialization_result_t result;
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+
+        // We may not want to load the vectors from the same file, or allow attaching them afterwards
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                if (!input(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to read 32-bit dimensions of the matrix");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            } else {
+                std::uint64_t dimensions[2];
+                if (!input(&dimensions, sizeof(dimensions)))
+                    return result.failed("Failed to read 64-bit dimensions of the matrix");
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+            }
+            // Load the vectors one after another
+            // most of this logic should move within storage class
+            reserve(matrix_rows);
+            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) {
+                byte_t* vector = vectors_allocator_.allocate(matrix_cols);
+                if (!input(vector, matrix_cols))
+                    return result.failed("Failed to read vectors");
+                vectors_lookup_[slot] = vector;
+            }
+            vectors_loaded_ = true;
+        }
+        matrix_rows_ = matrix_rows;
+        matrix_cols_ = matrix_cols;
+
+        if (!input(metadata_buffer, sizeof(metadata_buffer)))
+            return result.failed("Failed to read the index vector metadata");
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename input_callback_at, typename progress_at = dummy_progress_t>
+    serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header,
+                                                  progress_at& progress = {}) noexcept {
+
+        serialization_result_t result;
+
+        // Pull basic metadata directly into the return paramter
+        if (!input(&header, sizeof(header)))
+            return result.failed("Failed to pull the header from the stream");
+
+        // We are loading an empty index, no more work to do
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
+        // Allocate some dynamic memory to read all the levels
+        buffer_gt<level_t, levels_allocator_t> levels(header.size);
+        if (!levels)
+            return result.failed("Out of memory");
+        if (!input(levels, header.size * sizeof(level_t)))
+            return result.failed("Failed to pull nodes levels from the stream");
+
+        if (!reserve(header.size)) {
+            reset();
+            return result.failed("Out of memory");
+        }
+
+        // Load the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            span_bytes_t node_bytes = node_malloc(levels[i]);
+            if (!input(node_bytes.data(), node_bytes.size())) {
+                reset();
+                return result.failed("Failed to pull nodes from the stream");
+            }
+            node_store(i, node_t{node_bytes.data()});
+
+            if (!progress(i + 1, header.size))
+                return result.failed("Terminated by user");
+        }
+
+        if (vectors_loaded_ && header.size != static_cast<std::size_t>(matrix_rows_))
+            return result.failed("Index size and the number of vectors doesn't match");
+        return {};
+    }
+
+    /**
+     *  @brief Parses the index from file to RAM.
+     *  @param[in] file Memory mapped file from which vectors will be viewed according to this storage format.
+     *  @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be
+     *  loaded from input stream
+     *  @param[in] config Configuration parameters for imports.
+     *  @return Outcome descriptor explicitly convertible to boolean.
+     */
+    template <typename vectors_metadata_at>
+    serialization_result_t view_vectors_from_file(
+        memory_mapped_file_t& file, //
+                                    //// todo!! document that offset is a reference, or better - do not do it this way
+        vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) {
+
+        reset();
+
+        serialization_result_t result = file.open_if_not();
+        if (!result)
+            return result;
+
+        // Infer the new index size
+        std::uint64_t matrix_rows = 0;
+        std::uint64_t matrix_cols = 0;
+        span_punned_t vectors_buffer;
+
+        // We may not want to fetch the vectors from the same file, or allow attaching them afterwards
+        if (!config.exclude_vectors) {
+            // Save the matrix size
+            if (!config.use_64_bit_dimensions) {
+                std::uint32_t dimensions[2];
+                if (file.size() - offset < sizeof(dimensions))
+                    return result.failed("File is corrupted and lacks matrix dimensions");
+                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+                offset += sizeof(dimensions);
+            } else {
+                std::uint64_t dimensions[2];
+                if (file.size() - offset < sizeof(dimensions))
+                    return result.failed("File is corrupted and lacks matrix dimensions");
+                std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions));
+                matrix_rows = dimensions[0];
+                matrix_cols = dimensions[1];
+                offset += sizeof(dimensions);
+            }
+            vectors_buffer = {file.data() + offset, static_cast<std::size_t>(matrix_rows * matrix_cols)};
+            offset += vectors_buffer.size();
+            vectors_loaded_ = true;
+        }
+        matrix_rows_ = matrix_rows;
+        matrix_cols_ = matrix_cols;
+        // q:: how does this work when vectors are excluded?
+        // Address the vectors
+        reserve(matrix_rows);
+        if (!config.exclude_vectors)
+            for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
+                set_vector_at(slot, vectors_buffer.data() + matrix_cols * slot, matrix_cols, //
+                              false, false);
+
+        if (file.size() - offset < sizeof(metadata_buffer))
+            return result.failed("File is corrupted and lacks a header");
+
+        std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer));
+        offset += sizeof(metadata_buffer);
+
+        return result;
+    }
+
+    /**
+     *  @brief  Symmetric to `save_from_stream`, pulls data from a stream.
+     */
+    template <typename progress_at = dummy_progress_t>
+    serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header,
+                                                std::size_t offset = 0, progress_at& progress = {}) noexcept {
+
+        serialization_result_t result = file.open_if_not();
+        if (!result)
+            return result;
+
+        // Pull basic metadata
+        if (file.size() - offset < sizeof(header))
+            return result.failed("File is corrupted and lacks a header");
+        std::memcpy(&header, file.data() + offset, sizeof(header));
+
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
+        // update config_ and pre_ for correct node_t size calculations below
+        index_config_t config;
+        config.connectivity = header.connectivity;
+        config.connectivity_base = header.connectivity_base;
+        pre_ = node_t::precompute_(config);
+
+        buffer_gt<std::size_t, offsets_allocator_t> offsets(header.size);
+
+        if (!offsets)
+            return result.failed("Out of memory");
+
+        // before mapping levels[] from file, let's make sure the file is large enough
+        if (file.size() - offset - sizeof(header) - header.size * sizeof(level_t) < 0)
+            return result.failed("File is corrupted. Unable to parse node levels from file");
+
+        misaligned_ptr_gt<level_t> levels{(byte_t*)file.data() + offset + sizeof(header)};
+        offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size;
+
+        for (std::size_t i = 1; i < header.size; ++i)
+            offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]);
+
+        std::size_t total_bytes = offsets[header.size - 1] + node_t::node_size_bytes(pre_, levels[header.size - 1]);
+        if (file.size() < total_bytes) {
+            reset();
+            return result.failed("File is corrupted and can't fit all the nodes");
+        }
+
+        if (!reserve(header.size)) {
+            reset();
+            return result.failed("Out of memory");
+        }
+
+        // Rapidly address all the nodes
+        for (std::size_t i = 0; i != header.size; ++i) {
+            node_store(i, node_t{(byte_t*)file.data() + offsets[i]});
+            if (!progress(i + 1, header.size))
+                return result.failed("Terminated by user");
+        }
+        viewed_file_ = std::move(file);
+
+        if (vectors_loaded_ && header.size != static_cast<std::size_t>(matrix_rows_))
+            return result.failed("Index size and the number of vectors doesn't match");
+
+        return {};
+    }
+
+#pragma endregion
+};
+
+using default_storage_v2_t = storage_v2_at<default_key_t, default_slot_t>;
+
+ASSERT_VALID_STORAGE(default_storage_v2_t);
+
+} // namespace usearch
+} // namespace unum
diff --git a/simsimd b/simsimd
index f8ff727d..814ae410 160000
--- a/simsimd
+++ b/simsimd
@@ -1 +1 @@
-Subproject commit f8ff727dcddcd142e7e8dece33c43801af96c210
+Subproject commit 814ae4107e91f1794a7abf045301b2859d42706e