diff --git a/CMakeLists.txt b/CMakeLists.txt index d058470a..0a3deb6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,18 +63,11 @@ else () endif () # Core compilation settings affecting "index.hpp" -target_compile_definitions( - ${USEARCH_TARGET_NAME} INTERFACE $<$>:USEARCH_USE_OPENMP=0> -) +target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_OPENMP=$") # Supplementary compilation settings affecting "index_plugins.hpp" -target_compile_definitions( - ${USEARCH_TARGET_NAME} INTERFACE $<$>:USEARCH_USE_FP16LIB=1> -) - -target_compile_definitions( - ${USEARCH_TARGET_NAME} INTERFACE $<$>:USEARCH_USE_SIMSIMD=0> -) +target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_FP16LIB=$") +target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_SIMSIMD=$") target_include_directories( ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $ @@ -296,14 +289,12 @@ function (setup_target TARGET_NAME) endif () # Core compilation settings affecting "index.hpp" - target_compile_definitions(${TARGET_NAME} PRIVATE $<$>:USEARCH_USE_OPENMP=0>) + target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_OPENMP=$") # Supplementary compilation settings affecting "index_plugins.hpp" - target_compile_definitions( - ${TARGET_NAME} PRIVATE $<$>:USEARCH_USE_FP16LIB=1> - ) + target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_FP16LIB=$") + target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_SIMSIMD=$") - target_compile_definitions(${TARGET_NAME} PRIVATE $<$>:USEARCH_USE_SIMSIMD=0>) endfunction () diff --git a/c/test.c b/c/test.c index 96e08212..d704b162 100644 --- a/c/test.c +++ b/c/test.c @@ -32,6 +32,7 @@ usearch_init_options_t create_options(size_t const dimensions) { opts.dimensions = dimensions; opts.expansion_add = 40; // 40 in faiss opts.expansion_search = 16; // 10 in faiss + opts.multi = false; opts.metric_kind = usearch_metric_ip_k; opts.metric = NULL; opts.quantization = usearch_scalar_f32_k; diff --git a/cpp/bench.cpp b/cpp/bench.cpp index 4b3efe60..fbf70f90 100644 --- a/cpp/bench.cpp +++ b/cpp/bench.cpp @@ -42,8 +42,6 @@ #include // `omp_set_num_threads()` #endif -#include - #include using namespace unum::usearch; @@ -615,4 +613,4 @@ int main(int argc, char** argv) { run_punned>(dataset, args, config, limits); return 0; -} \ No newline at end of file +} diff --git a/cpp/test.cpp b/cpp/test.cpp index aa9b0c56..7a93cbfc 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -5,12 +5,12 @@ #include // `assert` #include // `std::default_random_engine` #include -#include #include // for std::vector #include #include #include +#include using namespace unum::usearch; using namespace unum; @@ -77,7 +77,7 @@ void test_cosine(index_at& index, std::vector> const& vec expect((index.stats(0).nodes == 3)); // Check if clustering endpoint compiles - index.cluster(vector_first, 0, args...); + // index.cluster(vector_first, 0, args...); // Try removals and replacements if constexpr (punned_ak) { @@ -141,8 +141,8 @@ void test_cosine(index_at& index, std::vector> const& vec index.get(key_second, vec_recovered_from_view.data()); expect(std::equal(vector_second, vector_second + dimensions, vec_recovered_from_view.data())); - auto compaction_result = index.compact(); - expect(bool(compaction_result)); + // auto compaction_result = index.compact(); + // expect(bool(compaction_result)); } expect(index.memory_usage() > 0); @@ -155,14 +155,15 @@ void test_cosine(index_at& index, std::vector> const& vec } } -template // +template // void test_cosine(std::size_t collection_size, std::size_t dimensions) { + using storage_t = storage_at; using scalar_t = scalar_at; using vector_key_t = key_at; using slot_t = slot_at; - using index_typed_t = index_gt; + using index_typed_t = index_gt; using member_cref_t = typename index_typed_t::member_cref_t; using member_citerator_t = typename index_typed_t::member_citerator_t; @@ -197,7 +198,9 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) { std::printf("- templates with connectivity %zu \n", connectivity); metric_t metric{&matrix, dimensions}; index_config_t config(connectivity); - index_typed_t index_typed(config); + storage_t storage{config}; + index_typed_t index_typed_tmp(&storage, config); + index_typed_t index_typed = std::move(index_typed_tmp); test_cosine(index_typed, matrix, metric); } @@ -205,7 +208,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) { for (bool multi : {false, true}) { for (std::size_t connectivity : {3, 13, 50}) { std::printf("- punned with connectivity %zu \n", connectivity); - using index_t = index_dense_gt; + using index_t = index_dense_gt; metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind()); index_dense_config_t config(connectivity); config.multi = multi; @@ -310,9 +313,24 @@ int main(int, char**) { for (std::size_t collection_size : {10, 500}) for (std::size_t dimensions : {97, 256}) { std::printf("Indexing %zu vectors with cos: \n", collection_size); - test_cosine(collection_size, dimensions); - std::printf("Indexing %zu vectors with cos: \n", collection_size); - test_cosine(collection_size, dimensions); + using key_t = std::int64_t; + { + using slot_t = std::uint32_t; + using storage_v2_t = storage_v2_at; + using std_storage_t = std_storage_at; + + test_cosine(collection_size, dimensions); + test_cosine(collection_size, dimensions); + } + { + using slot_t = uint40_t; + using storage_v2_t = storage_v2_at; + using std_storage_t = std_storage_at; + + std::printf("Indexing %zu vectors with cos: \n", collection_size); + test_cosine(collection_size, dimensions); + test_cosine(collection_size, dimensions); + } } for (std::size_t connectivity : {3, 13, 50}) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index b14f7724..2840c6a8 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -55,7 +55,7 @@ Also worth noting, 8-bit quantization results in almost no quantization loss and Within this repository you will find two commonly used utilities: -- `cpp/bench.cpp` the produces the `bench` binary for broad USearch benchmarks. +- `cpp/bench.cpp` the produces the `bench_cpp` binary for broad USearch benchmarks. - `python/bench.py` and `python/bench.ipynb` for interactive charts against FAISS. To achieve best highest results we suggest compiling locally for the target architecture. @@ -64,17 +64,19 @@ To achieve best highest results we suggest compiling locally for the target arch cmake -B ./build_release \ -DCMAKE_BUILD_TYPE=Release \ -DUSEARCH_USE_OPENMP=1 \ + -DUSEARCH_USE_SIMSIMD=1 \ + -DUSEARCH_USE_FP16LIB=0 \ -DUSEARCH_USE_JEMALLOC=1 && \ make -C ./build_release -j -./build_release/bench --help +./build_release/bench_cpp --help ``` Which would print the following instructions. ```txt SYNOPSIS - ./build_release/bench [--vectors ] [--queries ] [--neighbors ] [-b] [-j + ./build_release/bench_cpp [--vectors ] [--queries ] [--neighbors ] [-b] [-j ] [-c ] [--expansion-add ] [--expansion-search ] [--native|--f16quant|--i8quant] [--ip|--l2sq|--cos|--haversine] [-h] @@ -115,12 +117,12 @@ OPTIONS Here is an example of running the C++ benchmark: ```sh -./build_release/bench \ +./build_release/bench_cpp \ --vectors datasets/wiki_1M/base.1M.fbin \ --queries datasets/wiki_1M/query.public.100K.fbin \ --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin -./build_release/bench \ +./build_release/bench_cpp \ --vectors datasets/t2i_1B/base.1B.fbin \ --queries datasets/t2i_1B/query.public.100K.fbin \ --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \ @@ -205,17 +207,17 @@ With `perf`: ```sh # Pass environment variables with `-E`, and `-d` for details -sudo -E perf stat -d ./build_release/bench ... -sudo -E perf mem -d ./build_release/bench ... +sudo -E perf stat -d ./build_release/bench_cpp ... +sudo -E perf mem -d ./build_release/bench_cpp ... # Sample on-CPU functions for the specified command, at 1 Kilo Hertz: -sudo -E perf record -F 1000 ./build_release/bench ... -perf record -d -e arm_spe// -- ./build_release/bench .. +sudo -E perf record -F 1000 ./build_release/bench_cpp ... +perf record -d -e arm_spe// -- ./build_release/bench_cpp .. ``` ### Caches ```sh -sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench ... +sudo perf stat -e 'faults,dTLB-loads,dTLB-load-misses,cache-misses,cache-references' ./build_release/bench_cpp ... ``` Typical output on a 1M vectors dataset is: @@ -242,4 +244,3 @@ sudo sysctl -w vm.nr_hugepages=2048 sudo reboot sudo cat /proc/sys/vm/nr_hugepages ``` - diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp index e5616c15..7817fbe0 100644 --- a/include/usearch/index.hpp +++ b/include/usearch/index.hpp @@ -76,6 +76,7 @@ #include // `std::sort_heap` #include // `std::atomic` #include // `std::bitset` +#include #include // `CHAR_BIT` #include // `std::sqrt` #include // `std::memset` @@ -401,6 +402,8 @@ template > class bitset_gt { static constexpr std::size_t slots(std::size_t bits) { return divide_round_up(bits); } compressed_slot_t* slots_{}; + /// @brief size - number of bits in the bitset + std::size_t size_{}; /// @brief Number of slots. std::size_t count_{}; @@ -409,6 +412,7 @@ template > class bitset_gt { ~bitset_gt() noexcept { reset(); } explicit operator bool() const noexcept { return slots_; } + std::size_t size() const noexcept { return size_; } void clear() noexcept { if (slots_) std::memset(slots_, 0, count_ * sizeof(compressed_slot_t)); @@ -423,18 +427,20 @@ template > class bitset_gt { bitset_gt(std::size_t capacity) noexcept : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))), - count_(slots_ ? slots(capacity) : 0u) { + size_(slots_ ? capacity : 0u), count_(slots_ ? slots(capacity) : 0u) { clear(); } bitset_gt(bitset_gt&& other) noexcept { slots_ = exchange(other.slots_, nullptr); count_ = exchange(other.count_, 0); + size_ = exchange(other.size_, 0); } bitset_gt& operator=(bitset_gt&& other) noexcept { std::swap(slots_, other.slots_); std::swap(count_, other.count_); + std::swap(size_, other.size_); return *this; } @@ -1599,6 +1605,90 @@ template struct member_ref_gt { template inline std::size_t get_slot(member_ref_gt const& m) noexcept { return m.slot; } template inline key_at get_key(member_ref_gt const& m) noexcept { return m.key; } +using level_t = std::int16_t; + +struct precomputed_constants_t { + double inverse_log_connectivity{}; + std::size_t neighbors_bytes{}; + std::size_t neighbors_base_bytes{}; +}; + +// todo:: this is public, but then we make assumptions which are not communicated via this interface +// clean these up later +// +/** + * @brief A loosely-structured handle for every node. One such node is created for every member. + * To minimize memory usage and maximize the number of entries per cache-line, it only + * stores to pointers. The internal tape starts with a `vector_key_t` @b key, then + * a `level_t` for the number of graph @b levels in which this member appears, + * then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences + * for @b each-level. + */ +template class node_at { + byte_t* tape_{}; + + inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, node_at node) const noexcept { + return node_neighbors_bytes_(pre, node.level()); + } + static inline std::size_t node_neighbors_bytes_(const precomputed_constants_t& pre, level_t level) noexcept { + return pre.neighbors_base_bytes + pre.neighbors_bytes * level; + } + + public: + using vector_key_t = key_at; + using slot_t = slot_at; + /** + * @brief Integer for the number of node neighbors at a specific level of the + * multi-level graph. It's selected to be `std::uint32_t` to improve the + * alignment in most common cases. + */ + using neighbors_count_t = std::uint32_t; + using span_bytes_t = span_gt; + explicit node_at(byte_t* tape) noexcept : tape_(tape) {} + byte_t* tape() const noexcept { return tape_; } + /** + * @brief How many bytes of memory are needed to form the "head" of the node. + */ + static constexpr std::size_t head_size_bytes() { return sizeof(vector_key_t) + sizeof(level_t); } + byte_t* neighbors_tape() const noexcept { return tape_ + head_size_bytes(); } + explicit operator bool() const noexcept { return tape_; } + + inline span_bytes_t node_bytes(const precomputed_constants_t& pre) const noexcept { + return {tape(), node_size_bytes(pre, level())}; + } + inline std::size_t node_size_bytes(const precomputed_constants_t& pre) noexcept { + return head_size_bytes() + node_neighbors_bytes_(pre, level()); + } + static inline std::size_t node_size_bytes(const precomputed_constants_t& pre, level_t level) noexcept { + return head_size_bytes() + node_neighbors_bytes_(pre, level); + } + + inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept { + precomputed_constants_t pre; + // todo:: ask-Ashot:: inverse_log_connectibity does not relly belong here, but the other two do. + // maybe we can separate these? + pre.inverse_log_connectivity = 1.0 / std::log(static_cast(config.connectivity)); + pre.neighbors_bytes = config.connectivity * sizeof(slot_t) + sizeof(neighbors_count_t); + pre.neighbors_base_bytes = config.connectivity_base * sizeof(slot_t) + sizeof(neighbors_count_t); + return pre; + } + + node_at() = default; + node_at(node_at const&) = default; + node_at& operator=(node_at const&) = default; + + misaligned_ref_gt ckey() const noexcept { return {tape_}; } + misaligned_ref_gt key() const noexcept { return {tape_}; } + misaligned_ref_gt level() const noexcept { return {tape_ + sizeof(vector_key_t)}; } + + void key(vector_key_t v) noexcept { return misaligned_store(tape_, v); } + void level(level_t v) noexcept { return misaligned_store(tape_ + sizeof(vector_key_t), v); } +}; + +static_assert(std::is_trivially_copy_constructible>::value, + "Nodes must be light!"); +static_assert(std::is_trivially_destructible>::value, "Nodes must be light!"); + /** * @brief Approximate Nearest Neighbors Search @b index-structure using the * Hierarchical Navigable Small World @b (HNSW) graphs algorithm. @@ -1609,6 +1699,14 @@ template inline key_at get_key(member_ref_gt const& m) * not just within equi-dimensional vectors. Examples range from texts to similar Chess * positions. * + * @tparam storage_at + * The storage provider for index_gt. The index uses the storage_at + * API to store and retrieve hnsw index nodes and vectors. + * see `dummy_storage_single_threaded` for a minimal storage implementation + * and interface reference for storage_at. + * NOTE: Storage object is taken by reference. It is the caller's responsibility + * to make sure the reference is valid whenever the index is being used + * * @tparam key_at * The type of primary objects stored in the index. * The values, to which those map, are not managed by the same index structure. @@ -1625,10 +1723,6 @@ template inline key_at get_key(member_ref_gt const& m) * priority queues, needed during construction and traversals of graphs. * The allocated buffers may be uninitialized. * - * @tparam tape_allocator_at - * Potentially different memory allocator for primary allocations of nodes and vectors. - * It would never `deallocate` separate entries, and would only free all the space at once. - * The allocated buffers may be uninitialized. * * @section Features * @@ -1679,24 +1773,28 @@ template inline key_at get_key(member_ref_gt const& m) * - `member_gt` contains an already prefetched copy of the key. * */ -template , // - typename tape_allocator_at = dynamic_allocator_at> // + typename dynamic_allocator_at = std::allocator> // class index_gt { public: + using storage_t = storage_at; + using node_lock_t = typename storage_t::lock_type; using distance_t = distance_at; using vector_key_t = key_at; using key_t = vector_key_t; using compressed_slot_t = compressed_slot_at; using dynamic_allocator_t = dynamic_allocator_at; - using tape_allocator_t = tape_allocator_at; static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense."); using member_cref_t = member_cref_gt; using member_ref_t = member_ref_gt; + using node_t = node_at; + // using node_t = typename storage_t::node_t; + template class member_iterator_gt { using ref_t = ref_at; using index_t = index_at; @@ -1715,8 +1813,8 @@ class index_gt { using pointer = void; using reference = ref_t; - reference operator*() const noexcept { return {index_->node_at_(slot_).key(), slot_}; } - vector_key_t key() const noexcept { return index_->node_at_(slot_).key(); } + reference operator*() const noexcept { return {index_->storage_->get_node_at(slot_).key(), slot_}; } + vector_key_t key() const noexcept { return index_->storage_->get_node_at(slot_).key(); } friend inline std::size_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; } friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); } @@ -1759,10 +1857,7 @@ class index_gt { sizeof(byte_t) == 1, // "Primary allocator must allocate separate addressable bytes"); - using tape_allocator_traits_t = std::allocator_traits; - static_assert( // - sizeof(typename tape_allocator_traits_t::value_type) == 1, // - "Tape allocator must allocate separate addressable bytes"); + using span_bytes_t = span_gt; private: /** @@ -1771,22 +1866,9 @@ class index_gt { * alignment in most common cases. */ using neighbors_count_t = std::uint32_t; - using level_t = std::int16_t; - - /** - * @brief How many bytes of memory are needed to form the "head" of the node. - */ - static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); } - - using nodes_mutexes_t = bitset_gt; using visits_hash_set_t = growing_hash_set_gt, dynamic_allocator_t>; - struct precomputed_constants_t { - double inverse_log_connectivity{}; - std::size_t neighbors_bytes{}; - std::size_t neighbors_base_bytes{}; - }; /// @brief A space-efficient internal data-structure used in graph traversal queues. struct candidate_t { distance_t distance; @@ -1799,38 +1881,6 @@ class index_gt { using top_candidates_t = sorted_buffer_gt, candidates_allocator_t>; using next_candidates_t = max_heap_gt, candidates_allocator_t>; - /** - * @brief A loosely-structured handle for every node. One such node is created for every member. - * To minimize memory usage and maximize the number of entries per cache-line, it only - * stores to pointers. The internal tape starts with a `vector_key_t` @b key, then - * a `level_t` for the number of graph @b levels in which this member appears, - * then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences - * for @b each-level. - */ - class node_t { - byte_t* tape_{}; - - public: - explicit node_t(byte_t* tape) noexcept : tape_(tape) {} - byte_t* tape() const noexcept { return tape_; } - byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); } - explicit operator bool() const noexcept { return tape_; } - - node_t() = default; - node_t(node_t const&) = default; - node_t& operator=(node_t const&) = default; - - misaligned_ref_gt ckey() const noexcept { return {tape_}; } - misaligned_ref_gt key() const noexcept { return {tape_}; } - misaligned_ref_gt level() const noexcept { return {tape_ + sizeof(vector_key_t)}; } - - void key(vector_key_t v) noexcept { return misaligned_store(tape_, v); } - void level(level_t v) noexcept { return misaligned_store(tape_ + sizeof(vector_key_t), v); } - }; - - static_assert(std::is_trivially_copy_constructible::value, "Nodes must be light!"); - static_assert(std::is_trivially_destructible::value, "Nodes must be light!"); - /** * @brief A slice of the node's tape, containing a the list of neighbors * for a node in a single graph level. It's pre-allocated to fit @@ -1900,14 +1950,15 @@ class index_gt { } }; + // todo:: do I have to init this? + // A: Yes! matters a lot in move constructors!! + storage_t* storage_{}; index_config_t config_{}; index_limits_t limits_{}; mutable dynamic_allocator_t dynamic_allocator_{}; - tape_allocator_t tape_allocator_{}; precomputed_constants_t pre_{}; - memory_mapped_file_t viewed_file_{}; /// @brief Number of "slots" available for `node_t` objects. Equals to @b `limits_.members`. usearch_align_m mutable std::atomic nodes_capacity_{}; @@ -1925,14 +1976,6 @@ class index_gt { /// @brief The slot in which the only node of the top-level graph is stored. std::size_t entry_slot_{}; - using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - - /// @brief C-style array of `node_t` smart-pointers. - buffer_gt nodes_{}; - - /// @brief Mutex, that limits concurrent access to `nodes_`. - mutable nodes_mutexes_t nodes_mutexes_{}; - using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; /// @brief Array of thread-specific buffers for temporary data. @@ -1945,25 +1988,27 @@ class index_gt { std::size_t max_level() const noexcept { return nodes_count_ ? static_cast(max_level_) : 0; } index_config_t const& config() const noexcept { return config_; } index_limits_t const& limits() const noexcept { return limits_; } - bool is_immutable() const noexcept { return bool(viewed_file_); } + bool is_immutable() const noexcept { return storage_->is_immutable(); } /** * @section Exceptions * Doesn't throw, unless the ::metric's and ::allocators's throw on copy-construction. */ - explicit index_gt( // - index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}, - tape_allocator_t tape_allocator = {}) noexcept - : config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)), - tape_allocator_(std::move(tape_allocator)), pre_(precompute_(config)), nodes_count_(0u), max_level_(-1), - entry_slot_(0u), nodes_(), nodes_mutexes_(), contexts_() {} + explicit index_gt( // + storage_t* storage, // + index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}) noexcept + : storage_(storage), config_(config), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)), + pre_(node_t::precompute_(config)), nodes_count_(0u), max_level_(-1), entry_slot_(0u), contexts_() {} /** * @brief Clones the structure with the same hyper-parameters, but without contents. */ - index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_, tape_allocator_}; } + index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_}; } - ~index_gt() noexcept { reset(); } + ~index_gt() noexcept { + reset(); + storage_ = nullptr; + } index_gt(index_gt&& other) noexcept { swap(other); } @@ -1986,18 +2031,20 @@ class index_gt { copy_result_t copy(index_copy_config_t config = {}) const noexcept { copy_result_t result; index_gt& other = result.index; - other = index_gt(config_, dynamic_allocator_, tape_allocator_); + other = index_gt(config_, dynamic_allocator_); if (!other.reserve(limits_)) return result.failed("Failed to reserve the contexts"); // Now all is left - is to allocate new `node_t` instances and populate // the `other.nodes_` array into it. - for (std::size_t i = 0; i != nodes_count_; ++i) - other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i])); - other.nodes_count_ = nodes_count_.load(); - other.max_level_ = max_level_; - other.entry_slot_ = entry_slot_; + assert(false); + // for (std::size_t i = 0; i != nodes_count_; ++i) + // other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i])); + + // other.nodes_count_ = nodes_count_.load(); + // other.max_level_ = max_level_; + // other.entry_slot_ = entry_slot_; // This controls nothing for now :) (void)config; @@ -2011,13 +2058,12 @@ class index_gt { member_iterator_t begin() noexcept { return {this, 0}; } member_iterator_t end() noexcept { return {this, size()}; } - member_ref_t at(std::size_t slot) noexcept { return {nodes_[slot].key(), slot}; } - member_cref_t at(std::size_t slot) const noexcept { return {nodes_[slot].ckey(), slot}; } + member_ref_t at(std::size_t slot) noexcept { return {storage_->get_node_at(slot).key(), slot}; } + member_cref_t at(std::size_t slot) const noexcept { return {storage_->get_node_at(slot).ckey(), slot}; } member_iterator_t iterator_at(std::size_t slot) noexcept { return {this, slot}; } member_citerator_t citerator_at(std::size_t slot) const noexcept { return {this, slot}; } dynamic_allocator_t const& dynamic_allocator() const noexcept { return dynamic_allocator_; } - tape_allocator_t const& tape_allocator() const noexcept { return tape_allocator_; } #pragma region Adjusting Configuration @@ -2028,12 +2074,9 @@ class index_gt { * Will keep the number of available threads/contexts the same as it was. */ void clear() noexcept { - if (!has_reset()) { - std::size_t n = nodes_count_; - for (std::size_t i = 0; i != n; ++i) - node_free_(i); - } else - tape_allocator_.deallocate(nullptr, 0); + if (storage_) + storage_->clear(); + nodes_count_ = 0; max_level_ = -1; entry_slot_ = 0u; @@ -2049,29 +2092,29 @@ class index_gt { void reset() noexcept { clear(); - nodes_ = {}; + if (storage_) + storage_->reset(); contexts_ = {}; - nodes_mutexes_ = {}; limits_ = index_limits_t{0, 0}; nodes_capacity_ = 0; - viewed_file_ = memory_mapped_file_t{}; - tape_allocator_ = {}; } + /** + * @brief replace internal storage pointer with the new one + */ + void reset_storage(storage_t* storage) { storage_ = storage; } + /** * @brief Swaps the underlying memory buffers and thread contexts. */ void swap(index_gt& other) noexcept { + std::swap(storage_, other.storage_); std::swap(config_, other.config_); std::swap(limits_, other.limits_); std::swap(dynamic_allocator_, other.dynamic_allocator_); - std::swap(tape_allocator_, other.tape_allocator_); std::swap(pre_, other.pre_); - std::swap(viewed_file_, other.viewed_file_); std::swap(max_level_, other.max_level_); std::swap(entry_slot_, other.entry_slot_); - std::swap(nodes_, other.nodes_); - std::swap(nodes_mutexes_, other.nodes_mutexes_); std::swap(contexts_, other.contexts_); // Non-atomic parts. @@ -2094,21 +2137,14 @@ class index_gt { && limits.members <= limits_.members) return true; - nodes_mutexes_t new_mutexes(limits.members); - buffer_gt new_nodes(limits.members); + bool storage_reserved = storage_->reserve(limits.members); buffer_gt new_contexts(limits.threads()); - if (!new_nodes || !new_contexts || !new_mutexes) + if (!new_contexts || !storage_reserved) return false; - // Move the nodes info, and deallocate previous buffers. - if (nodes_) - std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size()); - limits_ = limits; nodes_capacity_ = limits.members; - nodes_ = std::move(new_nodes); contexts_ = std::move(new_contexts); - nodes_mutexes_ = std::move(new_mutexes); return true; } @@ -2162,12 +2198,12 @@ class index_gt { }; class search_result_t { - node_t const* nodes_{}; + storage_t const* storage_{}; top_candidates_t const* top_{}; friend class index_gt; inline search_result_t(index_gt const& index, top_candidates_t& top) noexcept - : nodes_(index.nodes_), top_(&top) {} + : storage_(index.storage_), top_(&top) {} public: /** @brief Number of search results found. */ @@ -2203,7 +2239,7 @@ class index_gt { inline match_t at(std::size_t i) const noexcept { candidate_t const* top_ordered = top_->data(); candidate_t candidate = top_ordered[i]; - node_t node = nodes_[candidate.slot]; + node_t node = storage_->get_node_at(candidate.slot); return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance}; } inline std::size_t merge_into( // @@ -2321,7 +2357,7 @@ class index_gt { } // Allocate the neighbors - node_t node = node_make_(key, target_level); + node_t node = storage_->node_make(key, target_level); if (!node) { nodes_count_.fetch_sub(1); return result.failed("Out of memory!"); @@ -2329,11 +2365,11 @@ class index_gt { if (target_level <= max_level_copy) new_level_lock.unlock(); - nodes_[new_slot] = node; + storage_->node_store(new_slot, node); result.new_size = new_slot + 1; result.slot = new_slot; callback(at(new_slot)); - node_lock_t new_lock = node_lock_(new_slot); + node_lock_t new_lock = storage_->node_lock(new_slot); // Do nothing for the first element if (!new_slot) { @@ -2417,11 +2453,11 @@ class index_gt { if (!next.reserve(config.expansion)) return result.failed("Out of memory!"); - node_lock_t new_lock = node_lock_(old_slot); - node_t node = node_at_(old_slot); + node_lock_t new_lock = storage_->node_lock(old_slot); + node_t node = storage_->get_node_at(old_slot); level_t node_level = node.level(); - span_bytes_t node_bytes = node_bytes_(node); + span_bytes_t node_bytes = node.node_bytes(pre_); std::memset(node_bytes.data(), 0, node_bytes.size()); node.level(node_level); @@ -2570,14 +2606,14 @@ class index_gt { stats_t result{}; for (std::size_t i = 0; i != size(); ++i) { - node_t node = node_at_(i); + node_t node = storage_->get_node_at(i); std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base; std::size_t edges = 0; for (level_t level = 0; level <= node.level(); ++level) edges += neighbors_(node, level).size(); ++result.nodes; - result.allocated_bytes += node_bytes_(node).size(); + result.allocated_bytes += storage_->node_size_bytes(i); result.edges += edges; result.max_edges += max_edges; } @@ -2589,13 +2625,13 @@ class index_gt { std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes; for (std::size_t i = 0; i != size(); ++i) { - node_t node = node_at_(i); + node_t node = storage_->get_node_at(i); if (static_cast(node.level()) < level) continue; ++result.nodes; result.edges += neighbors_(node, level).size(); - result.allocated_bytes += node_head_bytes_() + neighbors_bytes; + result.allocated_bytes += node_t::head_size_bytes() + neighbors_bytes; } std::size_t max_edges_per_node = level ? config_.connectivity_base : config_.connectivity; @@ -2605,9 +2641,9 @@ class index_gt { stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept { - std::size_t head_bytes = node_head_bytes_(); + std::size_t head_bytes = node_t::head_size_bytes(); for (std::size_t i = 0; i != size(); ++i) { - node_t node = node_at_(i); + node_t node = storage_->get_node_at(i); stats_per_level[0].nodes++; stats_per_level[0].edges += neighbors_(node, 0).size(); @@ -2620,7 +2656,6 @@ class index_gt { stats_per_level[l].allocated_bytes += pre_.neighbors_bytes; } } - // The `max_edges` parameter can be inferred from `nodes` stats_per_level[0].max_edges = stats_per_level[0].nodes * config_.connectivity_base; for (std::size_t l = 1; l <= max_level; ++l) @@ -2645,7 +2680,7 @@ class index_gt { */ std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept { std::size_t total = 0; - if (!viewed_file_) { + if (!storage_->is_immutable()) { stats_t s = stats(); total += s.allocated_bytes; total += s.nodes * allocator_entry_bytes; @@ -2659,7 +2694,7 @@ class index_gt { return total; } - std::size_t memory_usage_per_node(level_t level) const noexcept { return node_bytes_(level); } + std::size_t memory_usage_per_node(level_t level) const noexcept { return node_t::node_size_bytes(pre_, level); } #pragma endregion @@ -2671,7 +2706,7 @@ class index_gt { std::size_t serialized_length() const noexcept { std::size_t neighbors_length = 0; for (std::size_t i = 0; i != size(); ++i) - neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t); + neighbors_length += node_t::node_size_bytes(pre_, storage_->get_node_at(i).level()) + sizeof(level_t); return sizeof(index_serialized_header_t) + neighbors_length; } @@ -2690,92 +2725,41 @@ class index_gt { header.connectivity_base = config_.connectivity_base; header.max_level = max_level_; header.entry_slot = entry_slot_; - if (!output(&header, sizeof(header))) - return result.failed("Failed to serialize the header into stream"); - - // Progress status - std::size_t processed = 0; - std::size_t const total = 2 * header.size; - - // Export the number of levels per node - // That is both enough to estimate the overall memory consumption, - // and to be able to estimate the offsets of every entry in the file. - for (std::size_t i = 0; i != header.size; ++i) { - node_t node = node_at_(i); - level_t level = node.level(); - if (!output(&level, sizeof(level))) - return result.failed("Failed to serialize into stream"); - if (!progress(++processed, total)) - return result.failed("Terminated by user"); - } - - // After that dump the nodes themselves - for (std::size_t i = 0; i != header.size; ++i) { - span_bytes_t node_bytes = node_bytes_(node_at_(i)); - if (!output(node_bytes.data(), node_bytes.size())) - return result.failed("Failed to serialize into stream"); - if (!progress(++processed, total)) - return result.failed("Terminated by user"); - } - return {}; + return storage_->save_nodes_to_stream(output, header, progress); } /** * @brief Symmetric to `save_from_stream`, pulls data from a stream. + * Note: assumes storage is properly reset and ready for loading the hnsw graph */ template serialization_result_t load_from_stream(input_callback_at&& input, progress_at&& progress = {}) noexcept { serialization_result_t result; - // Remove previously stored objects - reset(); - // Pull basic metadata index_serialized_header_t header; - if (!input(&header, sizeof(header))) - return result.failed("Failed to pull the header from the stream"); - - // We are loading an empty index, no more work to do - if (!header.size) { + result = storage_->load_nodes_from_stream(input, header, progress); + if (!result) { reset(); return result; } - // Allocate some dynamic memory to read all the levels - using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt levels(header.size); - if (!levels) - return result.failed("Out of memory"); - if (!input(levels, header.size * sizeof(level_t))) - return result.failed("Failed to pull nodes levels from the stream"); - // Submit metadata config_.connectivity = header.connectivity; config_.connectivity_base = header.connectivity_base; - pre_ = precompute_(config_); + pre_ = node_t::precompute_(config_); + nodes_count_ = header.size; + max_level_ = static_cast(header.max_level); + entry_slot_ = static_cast(header.entry_slot); + // allocate dynamic contexts for queries (storage has already been allocated for the deserialization process) index_limits_t limits; limits.members = header.size; if (!reserve(limits)) { reset(); return result.failed("Out of memory"); } - nodes_count_ = header.size; - max_level_ = static_cast(header.max_level); - entry_slot_ = static_cast(header.entry_slot); - - // Load the nodes - for (std::size_t i = 0; i != header.size; ++i) { - span_bytes_t node_bytes = node_malloc_(levels[i]); - if (!input(node_bytes.data(), node_bytes.size())) { - reset(); - return result.failed("Failed to pull nodes from the stream"); - } - nodes_[i] = node_t{node_bytes.data()}; - if (!progress(i + 1, header.size)) - return result.failed("Terminated by user"); - } return {}; } @@ -2800,7 +2784,7 @@ class index_gt { return io_result; serialization_result_t stream_result = save_to_stream( - [&](void* buffer, std::size_t length) { + [&](const void* buffer, std::size_t length) { io_result = file.write(buffer, length); return !!io_result; }, @@ -2824,7 +2808,7 @@ class index_gt { return io_result; serialization_result_t stream_result = save_to_stream( - [&](void* buffer, std::size_t length) { + [&](const void* buffer, std::size_t length) { if (offset + length > file.size()) return false; std::memcpy(file.data() + offset, buffer, length); @@ -2848,6 +2832,9 @@ class index_gt { if (!io_result) return io_result; + // Remove previously stored objects + reset(); + serialization_result_t stream_result = load_from_stream( [&](void* buffer, std::size_t length) { io_result = file.read(buffer, length); @@ -2873,6 +2860,9 @@ class index_gt { if (!io_result) return io_result; + // Remove previously stored objects + reset(); + serialization_result_t stream_result = load_from_stream( [&](void* buffer, std::size_t length) { if (offset + length > file.size()) @@ -2896,42 +2886,22 @@ class index_gt { // Remove previously stored objects reset(); - - serialization_result_t result = file.open_if_not(); - if (!result) - return result; - - // Pull basic metadata + return view_internal(std::move(file), offset, progress); + } + template + serialization_result_t view_internal(memory_mapped_file_t file, std::size_t offset = 0, + progress_at&& progress = {}) noexcept { + // shall not call reset() + // storage_ may already have some relevant stuff... + serialization_result_t result; index_serialized_header_t header; - if (file.size() - offset < sizeof(header)) - return result.failed("File is corrupted and lacks a header"); - std::memcpy(&header, file.data() + offset, sizeof(header)); - - if (!header.size) { - reset(); + result = storage_->view_nodes_from_file(std::move(file), header, offset, progress); + if (!result) return result; - } - - // Precompute offsets of every node, but before that we need to update the configs - // This could have been done with `std::exclusive_scan`, but it's only available from C++17. - using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt offsets(header.size); - if (!offsets) - return result.failed("Out of memory"); config_.connectivity = header.connectivity; config_.connectivity_base = header.connectivity_base; - pre_ = precompute_(config_); - misaligned_ptr_gt levels{(byte_t*)file.data() + offset + sizeof(header)}; - offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size; - for (std::size_t i = 1; i < header.size; ++i) - offsets[i] = offsets[i - 1] + node_bytes_(levels[i - 1]); - - std::size_t total_bytes = offsets[header.size - 1] + node_bytes_(levels[header.size - 1]); - if (file.size() < total_bytes) { - reset(); - return result.failed("File is corrupted and can't fit all the nodes"); - } + pre_ = node_t::precompute_(config_); // Submit metadata and reserve memory index_limits_t limits; @@ -2944,236 +2914,13 @@ class index_gt { max_level_ = static_cast(header.max_level); entry_slot_ = static_cast(header.entry_slot); - // Rapidly address all the nodes - for (std::size_t i = 0; i != header.size; ++i) { - nodes_[i] = node_t{(byte_t*)file.data() + offsets[i]}; - if (!progress(i + 1, header.size)) - return result.failed("Terminated by user"); - } - viewed_file_ = std::move(file); return {}; } #pragma endregion - /** - * @brief Performs compaction on the whole HNSW index, purging some entries - * and links to them, while also generating a more efficient mapping, - * putting the more frequently used entries closer together. - * - * - * Scans the whole collection, removing the links leading towards - * banned entries. This essentially isolates some nodes from the rest - * of the graph, while keeping their outgoing links, in case the node - * is structurally relevant and has a crucial role in the index. - * It won't reclaim the memory. - * - * @param[in] allow_member Predicate to mark nodes for isolation. - * @param[in] executor Thread-pool to execute the job in parallel. - * @param[in] progress Callback to report the execution progress. - */ - template - void compact( // - values_at&& values, // - metric_at&& metric, // - slot_transition_at&& slot_transition, // - - executor_at&& executor = executor_at{}, // - progress_at&& progress = progress_at{}, // - prefetch_at&& prefetch = prefetch_at{}) noexcept { - - // Export all the keys, slots, and levels. - // Partition them with the predicate. - // Sort the allowed entries in descending order of their level. - // Create a new array mapping old slots to the new ones (INT_MAX for deleted items). - struct slot_level_t { - compressed_slot_t old_slot; - compressed_slot_t cluster; - level_t level; - }; - using slot_level_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt slots_and_levels(size()); - - // Progress status - std::atomic do_tasks{true}; - std::atomic processed{0}; - std::size_t const total = 3 * slots_and_levels.size(); - - // For every bottom level node, determine its parent cluster - executor.dynamic(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) { - context_t& context = contexts_[thread_idx]; - std::size_t cluster = search_for_one_( // - values[citerator_at(old_slot)], // - metric, prefetch, // - entry_slot_, max_level_, 0, context); - slots_and_levels[old_slot] = { // - static_cast(old_slot), // - static_cast(cluster), // - node_at_(old_slot).level()}; - ++processed; - if (thread_idx == 0) - do_tasks = progress(processed.load(), total); - return do_tasks.load(); - }); - if (!do_tasks.load()) - return; - - // Where the actual permutation happens: - std::sort(slots_and_levels.begin(), slots_and_levels.end(), [](slot_level_t const& a, slot_level_t const& b) { - return a.level == b.level ? a.cluster < b.cluster : a.level > b.level; - }); - - using size_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt old_slot_to_new(slots_and_levels.size()); - for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) - old_slot_to_new[slots_and_levels[new_slot].old_slot] = new_slot; - - // Erase all the incoming links - buffer_gt reordered_nodes(slots_and_levels.size()); - tape_allocator_t reordered_tape; - - for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) { - std::size_t old_slot = slots_and_levels[new_slot].old_slot; - node_t old_node = node_at_(old_slot); - - std::size_t node_bytes = node_bytes_(old_node.level()); - byte_t* new_data = (byte_t*)reordered_tape.allocate(node_bytes); - node_t new_node{new_data}; - std::memcpy(new_data, old_node.tape(), node_bytes); - - for (level_t level = 0; level <= old_node.level(); ++level) - for (misaligned_ref_gt neighbor : neighbors_(new_node, level)) - neighbor = static_cast(old_slot_to_new[compressed_slot_t(neighbor)]); - - reordered_nodes[new_slot] = new_node; - if (!progress(++processed, total)) - return; - } - - for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) { - std::size_t old_slot = slots_and_levels[new_slot].old_slot; - slot_transition(node_at_(old_slot).ckey(), // - static_cast(old_slot), // - static_cast(new_slot)); - if (!progress(++processed, total)) - return; - } - - nodes_ = std::move(reordered_nodes); - tape_allocator_ = std::move(reordered_tape); - entry_slot_ = old_slot_to_new[entry_slot_]; - } - - /** - * @brief Scans the whole collection, removing the links leading towards - * banned entries. This essentially isolates some nodes from the rest - * of the graph, while keeping their outgoing links, in case the node - * is structurally relevant and has a crucial role in the index. - * It won't reclaim the memory. - * - * @param[in] allow_member Predicate to mark nodes for isolation. - * @param[in] executor Thread-pool to execute the job in parallel. - * @param[in] progress Callback to report the execution progress. - */ - template < // - typename allow_member_at = dummy_predicate_t, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > - void isolate( // - allow_member_at&& allow_member, // - executor_at&& executor = executor_at{}, // - progress_at&& progress = progress_at{}) noexcept { - - // Progress status - std::atomic do_tasks{true}; - std::atomic processed{0}; - - // Erase all the incoming links - std::size_t nodes_count = size(); - executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) { - node_t node = node_at_(node_idx); - for (level_t level = 0; level <= node.level(); ++level) { - neighbors_ref_t neighbors = neighbors_(node, level); - std::size_t old_size = neighbors.size(); - neighbors.clear(); - for (std::size_t i = 0; i != old_size; ++i) { - compressed_slot_t neighbor_slot = neighbors[i]; - node_t neighbor = node_at_(neighbor_slot); - if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot})) - neighbors.push_back(neighbor_slot); - } - } - ++processed; - if (thread_idx == 0) - do_tasks = progress(processed.load(), nodes_count); - return do_tasks.load(); - }); - - // At the end report the latest numbers, because the reporter thread may be finished earlier - progress(processed.load(), nodes_count); - } - private: - inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept { - precomputed_constants_t pre; - pre.inverse_log_connectivity = 1.0 / std::log(static_cast(config.connectivity)); - pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_t) + sizeof(neighbors_count_t); - pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_t) + sizeof(neighbors_count_t); - return pre; - } - - using span_bytes_t = span_gt; - - inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; } - inline std::size_t node_bytes_(level_t level) const noexcept { - return node_head_bytes_() + node_neighbors_bytes_(level); - } - inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); } - inline std::size_t node_neighbors_bytes_(level_t level) const noexcept { - return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level; - } - - span_bytes_t node_malloc_(level_t level) noexcept { - std::size_t node_bytes = node_bytes_(level); - byte_t* data = (byte_t*)tape_allocator_.allocate(node_bytes); - return data ? span_bytes_t{data, node_bytes} : span_bytes_t{}; - } - - node_t node_make_(vector_key_t key, level_t level) noexcept { - span_bytes_t node_bytes = node_malloc_(level); - if (!node_bytes) - return {}; - - std::memset(node_bytes.data(), 0, node_bytes.size()); - node_t node{(byte_t*)node_bytes.data()}; - node.key(key); - node.level(level); - return node; - } - - node_t node_make_copy_(span_bytes_t old_bytes) noexcept { - byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size()); - if (!data) - return {}; - std::memcpy(data, old_bytes.data(), old_bytes.size()); - return node_t{data}; - } - - void node_free_(std::size_t idx) noexcept { - if (viewed_file_) - return; - - node_t& node = nodes_[idx]; - tape_allocator_.deallocate(node.tape(), node_bytes_(node).size()); - node = node_t{}; - } - - inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; } + // todo:: these can also be moved to node_at, along with class neighbors_ref_t definition inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; } inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept { @@ -3184,18 +2931,6 @@ class index_gt { return level ? neighbors_non_base_(node, level) : neighbors_base_(node); } - struct node_lock_t { - nodes_mutexes_t& mutexes; - std::size_t slot; - inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); } - }; - - inline node_lock_t node_lock_(std::size_t slot) const noexcept { - while (nodes_mutexes_.atomic_set(slot)) - ; - return {nodes_mutexes_, slot}; - } - template void connect_node_across_levels_( // value_at&& value, metric_at&& metric, prefetch_at&& prefetch, // @@ -3220,7 +2955,7 @@ class index_gt { std::size_t connect_new_node_( // metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m { - node_t new_node = node_at_(new_slot); + node_t new_node = storage_->get_node_at(new_slot); top_candidates_t& top = context.top_candidates; // Outgoing links from `new_slot`: @@ -3231,7 +2966,8 @@ class index_gt { for (std::size_t idx = 0; idx != top_view.size(); idx++) { usearch_assert_m(!new_neighbors[idx], "Possible memory corruption"); - usearch_assert_m(level <= node_at_(top_view[idx].slot).level(), "Linking to missing level"); + usearch_assert_m(level <= storage_->get_node_at(top_view[idx].slot).level(), + "Linking to missing level"); new_neighbors.push_back(top_view[idx].slot); } } @@ -3244,7 +2980,7 @@ class index_gt { metric_at&& metric, std::size_t new_slot, value_at&& value, level_t level, context_t& context) usearch_noexcept_m { - node_t new_node = node_at_(new_slot); + node_t new_node = storage_->get_node_at(new_slot); top_candidates_t& top = context.top_candidates; neighbors_ref_t new_neighbors = neighbors_(new_node, level); @@ -3253,8 +2989,8 @@ class index_gt { for (compressed_slot_t close_slot : new_neighbors) { if (close_slot == new_slot) continue; - node_lock_t close_lock = node_lock_(close_slot); - node_t close_node = node_at_(close_slot); + node_lock_t close_lock = storage_->node_lock(close_slot); + node_t close_node = storage_->get_node_at(close_slot); neighbors_ref_t close_header = neighbors_(close_node, level); usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption"); @@ -3336,7 +3072,7 @@ class index_gt { bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; } bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; } - vector_key_t key() const noexcept { return index_->node_at_(slot()).key(); } + vector_key_t key() const noexcept { return index_->get_node_at(slot()).key(); } compressed_slot_t slot() const noexcept { return neighbors_[current_]; } friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); } friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); } @@ -3370,8 +3106,8 @@ class index_gt { bool changed; do { changed = false; - node_lock_t closest_lock = node_lock_(closest_slot); - neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level); + node_lock_t closest_lock = storage_->node_lock(closest_slot); + neighbors_ref_t closest_neighbors = neighbors_non_base_(storage_->get_node_at(closest_slot), level); // Optional prefetching if (!is_dummy()) { @@ -3436,8 +3172,8 @@ class index_gt { compressed_slot_t candidate_slot = candidacy.slot; if (new_slot == candidate_slot) continue; - node_t candidate_ref = node_at_(candidate_slot); - node_lock_t candidate_lock = node_lock_(candidate_slot); + node_t candidate_ref = storage_->get_node_at(candidate_slot); + node_lock_t candidate_lock = storage_->node_lock(candidate_slot); neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level); // Optional prefetching @@ -3507,7 +3243,7 @@ class index_gt { next.pop(); context.iteration_cycles++; - neighbors_ref_t candidate_neighbors = neighbors_base_(node_at_(candidate.slot)); + neighbors_ref_t candidate_neighbors = neighbors_base_(storage_->get_node_at(candidate.slot)); // Optional prefetching if (!is_dummy()) { @@ -3528,7 +3264,7 @@ class index_gt { // This can substantially grow our priority queue: next.insert({-successor_dist, successor_slot}); if (!is_dummy()) - if (!predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot})) + if (!predicate(member_cref_t{storage_->get_node_at(successor_slot).ckey(), successor_slot})) continue; // This will automatically evict poor matches: diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp index e151b929..2f8e266a 100644 --- a/include/usearch/index_dense.hpp +++ b/include/usearch/index_dense.hpp @@ -8,6 +8,7 @@ #include #include +#include #if defined(USEARCH_DEFINED_CPP17) #include // `std::shared_mutex` @@ -16,7 +17,7 @@ namespace unum { namespace usearch { -template class index_dense_gt; +template class index_dense_gt; /** * @brief The "magic" sequence helps infer the type of the file. @@ -123,11 +124,6 @@ struct index_dense_clustering_config_t { } mode = merge_smallest_k; }; -struct index_dense_serialization_config_t { - bool exclude_vectors = false; - bool use_64_bit_dimensions = false; -}; - struct index_dense_copy_config_t : public index_copy_config_t { bool force_vector_copy = true; @@ -295,13 +291,17 @@ inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_map * The second (2.) starts with @b "usearch"-magic-string, used to infer the file type on open. * The third (3.) is implemented by the underlying `index_gt` class. */ -template // +template > // class index_dense_gt { public: using vector_key_t = key_at; using key_t = vector_key_t; using compressed_slot_t = compressed_slot_at; using distance_t = distance_punned_t; + // using node_t = typename storage_at::node_t; + using node_t = node_at; using metric_t = metric_punned_t; using member_ref_t = member_ref_gt; @@ -312,17 +312,16 @@ class index_dense_gt { using head_result_t = index_dense_head_result_t; using serialization_config_t = index_dense_serialization_config_t; - - using dynamic_allocator_t = aligned_allocator_gt; - using tape_allocator_t = memory_mapping_allocator_gt<64>; + using storage_t = storage_at; private: /// @brief Schema: input buffer, bytes in input buffer, output buffer. using cast_t = std::function; /// @brief Punned index. using index_t = index_gt< // + storage_t, // distance_t, vector_key_t, compressed_slot_t, // - dynamic_allocator_t, tape_allocator_t>; + dynamic_allocator_t>; using index_allocator_t = aligned_allocator_gt; using member_iterator_t = typename index_t::member_iterator_t; @@ -345,8 +344,10 @@ class index_dense_gt { inline distance_t operator()(byte_t const* a, byte_t const* b) const noexcept { return f(a, b); } - inline byte_t const* v(member_cref_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; } - inline byte_t const* v(member_citerator_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; } + inline byte_t const* v(member_cref_t m) const noexcept { return index_->storage_.get_vector_at(get_slot(m)); } + inline byte_t const* v(member_citerator_t m) const noexcept { + return index_->storage_.get_vector_at(get_slot(m)); + } inline distance_t f(byte_t const* a, byte_t const* b) const noexcept { return index_->metric_(a, b); } }; @@ -371,14 +372,12 @@ class index_dense_gt { /// @brief An instance of a potentially stateful `metric_t` used to initialize copies and forks. metric_t metric_; - using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>; - /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars. - vectors_tape_allocator_t vectors_tape_allocator_; - - /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy. - mutable std::vector vectors_lookup_; + /// @brief The underlying storage provider for this index that determines file storage layout, + /// implements serialization/deserialization routines, and provides an API to add, update and + /// retrieve vectors and hnsw graph nodes. + storage_t storage_{config_}; - /// @brief Originally forms and array of integers [0, threads], marking all + /// @brief Originally forms and array of integers [0, threads], marking all. mutable std::vector available_threads_; /// @brief Mutex, controlling concurrent access to `available_threads_`. @@ -437,20 +436,21 @@ class index_dense_gt { index_dense_gt() = default; index_dense_gt(index_dense_gt&& other) - : config_(std::move(other.config_)), - + : config_(std::move(other.config_)), // typed_(exchange(other.typed_, nullptr)), // cast_buffer_(std::move(other.cast_buffer_)), // casts_(std::move(other.casts_)), // metric_(std::move(other.metric_)), // - - vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)), // - vectors_lookup_(std::move(other.vectors_lookup_)), // + storage_(std::move(other.storage_)), // available_threads_(std::move(other.available_threads_)), // slot_lookup_(std::move(other.slot_lookup_)), // free_keys_(std::move(other.free_keys_)), // - free_key_(std::move(other.free_key_)) {} // + free_key_(std::move(other.free_key_)) { + // Could do this in the _proxy pattern to void this + // The problem will also go away if/when we make typed_ not do any allocations + typed_->reset_storage(&storage_); + } // index_dense_gt& operator=(index_dense_gt&& other) { swap(other); @@ -468,14 +468,17 @@ class index_dense_gt { std::swap(cast_buffer_, other.cast_buffer_); std::swap(casts_, other.casts_); std::swap(metric_, other.metric_); - - std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_); - std::swap(vectors_lookup_, other.vectors_lookup_); + std::swap(storage_, other.storage_); std::swap(available_threads_, other.available_threads_); std::swap(slot_lookup_, other.slot_lookup_); std::swap(free_keys_, other.free_keys_); std::swap(free_key_, other.free_key_); + // Could do this in the _proxy pattern to void this + // The problem will also go away if/when we make typed_ not do any allocations + typed_->reset_storage(&storage_); + if (other.typed_) + other.typed_->reset_storage(&other.storage_); } ~index_dense_gt() { @@ -513,7 +516,8 @@ class index_dense_gt { // Available since C11, but only C++17, so we use the C version. index_t* raw = index_allocator_t{}.allocate(1); - new (raw) index_t(config); + result.storage_ = storage_t(config); + new (raw) index_t(&result.storage_, config); result.typed_ = raw; return result; } @@ -580,11 +584,9 @@ class index_dense_gt { * @see `serialized_length` for the length of the binary serialized representation. */ std::size_t memory_usage() const { - return // - typed_->memory_usage(0) + // - typed_->tape_allocator().total_wasted() + // - typed_->tape_allocator().total_reserved() + // - vectors_tape_allocator_.total_allocated(); + size_t res = typed_->memory_usage(0); + // todo:: add some memory_usage() interface to storage_ + return res; } static constexpr std::size_t any_thread() { return std::numeric_limits::max(); } @@ -646,9 +648,9 @@ class index_dense_gt { return result; key_and_slot_t a_key_and_slot = *a_it; - byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot]; + byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot); key_and_slot_t b_key_and_slot = *b_it; - byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot]; + byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot); distance_t a_b_distance = metric_(a_vector, b_vector); result.mean = result.min = result.max = a_b_distance; @@ -670,10 +672,10 @@ class index_dense_gt { while (a_range.first != a_range.second) { key_and_slot_t a_key_and_slot = *a_range.first; - byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot]; + byte_t const* a_vector = storage_.get_vector_at(a_key_and_slot.slot); while (b_range.first != b_range.second) { key_and_slot_t b_key_and_slot = *b_range.first; - byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot]; + byte_t const* b_vector = storage_.get_vector_at(b_key_and_slot.slot); distance_t a_b_distance = metric_(a_vector, b_vector); result.mean += a_b_distance; @@ -713,7 +715,7 @@ class index_dense_gt { // Find the closest cluster for any vector under that key. while (key_range.first != key_range.second) { key_and_slot_t key_and_slot = *key_range.first; - byte_t const* vector_data = vectors_lookup_[key_and_slot.slot]; + byte_t const* vector_data = storage_.get_vector_at(key_and_slot.slot); cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow); if (!new_result) return new_result; @@ -730,10 +732,12 @@ class index_dense_gt { * @return `true` if the memory reservation was successful, `false` otherwise. */ bool reserve(index_limits_t limits) { + // todo:: ask-Ashot this seems to allow search() and add() on the dense index, concurrent to this reserve + // But that is not safe on typed_ as typed_->reserve() reallocates the lock buffer, discarding the old one + // without checking if anything is locked { unique_lock_t lock(slot_lookup_mutex_); slot_lookup_.reserve(limits.members); - vectors_lookup_.resize(limits.members); } return typed_->reserve(limits); } @@ -748,11 +752,10 @@ class index_dense_gt { unique_lock_t lookup_lock(slot_lookup_mutex_); std::unique_lock free_lock(free_keys_mutex_); + // storage_ cleared by typed_ todo:: is this confusing? typed_->clear(); slot_lookup_.clear(); - vectors_lookup_.clear(); free_keys_.clear(); - vectors_tape_allocator_.reset(); } /** @@ -767,11 +770,10 @@ class index_dense_gt { std::unique_lock free_lock(free_keys_mutex_); std::unique_lock available_threads_lock(available_threads_mutex_); + // storage is reset by typed_ typed_->reset(); slot_lookup_.clear(); - vectors_lookup_.clear(); free_keys_.clear(); - vectors_tape_allocator_.reset(); // Reset the thread IDs. available_threads_.resize(std::thread::hardware_concurrency()); @@ -787,41 +789,9 @@ class index_dense_gt { progress_at&& progress = {}) const { serialization_result_t result; - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - - // We may not want to put the vectors into the same file - if (!config.exclude_vectors) { - // Save the matrix size - if (!config.use_64_bit_dimensions) { - std::uint32_t dimensions[2]; - dimensions[0] = static_cast(typed_->size()); - dimensions[1] = static_cast(metric_.bytes_per_vector()); - if (!output(&dimensions, sizeof(dimensions))) - return result.failed("Failed to serialize into stream"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } else { - std::uint64_t dimensions[2]; - dimensions[0] = static_cast(typed_->size()); - dimensions[1] = static_cast(metric_.bytes_per_vector()); - if (!output(&dimensions, sizeof(dimensions))) - return result.failed("Failed to serialize into stream"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - - // Dump the vectors one after another - for (std::uint64_t i = 0; i != matrix_rows; ++i) { - byte_t* vector = vectors_lookup_[i]; - if (!output(vector, matrix_cols)) - return result.failed("Failed to serialize into stream"); - } - } - - // Augment metadata + index_dense_head_buffer_t buffer; + // Prepare opaque header for Storage { - index_dense_head_buffer_t buffer; std::memset(buffer, 0, sizeof(buffer)); index_dense_head_t head{buffer}; std::memcpy(buffer, default_magic(), std::strlen(default_magic())); @@ -842,11 +812,11 @@ class index_dense_gt { head.count_deleted = typed_->size() - size(); head.dimensions = dimensions(); head.multi = multi(); - - if (!output(&buffer, sizeof(buffer))) - return result.failed("Failed to serialize into stream"); } + // save vectors and metadata to storage + storage_.save_vectors_to_stream(output, metric_.bytes_per_vector(), typed_->size(), buffer, config); + // Save the actual proximity graph return typed_->save_to_stream(std::forward(output), std::forward(progress)); } @@ -875,46 +845,16 @@ class index_dense_gt { serialization_config_t config = {}, // progress_at&& progress = {}) { - // Discard all previous memory allocations of `vectors_tape_allocator_` + // Discard all previous memory allocations of reset(); // Infer the new index size serialization_result_t result; - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - - // We may not want to load the vectors from the same file, or allow attaching them afterwards - if (!config.exclude_vectors) { - // Save the matrix size - if (!config.use_64_bit_dimensions) { - std::uint32_t dimensions[2]; - if (!input(&dimensions, sizeof(dimensions))) - return result.failed("Failed to read 32-bit dimensions of the matrix"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } else { - std::uint64_t dimensions[2]; - if (!input(&dimensions, sizeof(dimensions))) - return result.failed("Failed to read 64-bit dimensions of the matrix"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - // Load the vectors one after another - vectors_lookup_.resize(matrix_rows); - for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) { - byte_t* vector = vectors_tape_allocator_.allocate(matrix_cols); - if (!input(vector, matrix_cols)) - return result.failed("Failed to read vectors"); - vectors_lookup_[slot] = vector; - } - } + index_dense_head_buffer_t buffer; + storage_.load_vectors_from_stream(input, buffer, config); // Load metadata and choose the right metric { - index_dense_head_buffer_t buffer; - if (!input(buffer, sizeof(buffer))) - return result.failed("Failed to read the index "); - index_dense_head_t head{buffer}; if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0) return result.failed("Magic header mismatch - the file isn't an index"); @@ -937,8 +877,6 @@ class index_dense_gt { result = typed_->load_from_stream(std::forward(input), std::forward(progress)); if (!result) return result; - if (typed_->size() != static_cast(matrix_rows)) - return result.failed("Index size and the number of vectors doesn't match"); reindex_keys_(); return result; @@ -955,50 +893,16 @@ class index_dense_gt { std::size_t offset = 0, serialization_config_t config = {}, // progress_at&& progress = {}) { - // Discard all previous memory allocations of `vectors_tape_allocator_` + // Discard all previous memory allocations. reset(); - - serialization_result_t result = file.open_if_not(); + serialization_result_t result; + // Note that buffer and offset are passed by reference + index_dense_head_buffer_t buffer; + result = storage_.view_vectors_from_file(file, buffer, offset, config); if (!result) return result; - - // Infer the new index size - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - span_punned_t vectors_buffer; - - // We may not want to fetch the vectors from the same file, or allow attaching them afterwards - if (!config.exclude_vectors) { - // Save the matrix size - if (!config.use_64_bit_dimensions) { - std::uint32_t dimensions[2]; - if (file.size() - offset < sizeof(dimensions)) - return result.failed("File is corrupted and lacks matrix dimensions"); - std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - offset += sizeof(dimensions); - } else { - std::uint64_t dimensions[2]; - if (file.size() - offset < sizeof(dimensions)) - return result.failed("File is corrupted and lacks matrix dimensions"); - std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - offset += sizeof(dimensions); - } - vectors_buffer = {file.data() + offset, static_cast(matrix_rows * matrix_cols)}; - offset += vectors_buffer.size(); - } - // Load metadata and choose the right metric { - index_dense_head_buffer_t buffer; - if (file.size() - offset < sizeof(buffer)) - return result.failed("File is corrupted and lacks a header"); - - std::memcpy(buffer, file.data() + offset, sizeof(buffer)); - index_dense_head_t head{buffer}; if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0) return result.failed("Magic header mismatch - the file isn't an index"); @@ -1015,21 +919,12 @@ class index_dense_gt { metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar); config_.multi = head.multi; - offset += sizeof(buffer); } // Pull the actual proximity graph - result = typed_->view(std::move(file), offset, std::forward(progress)); + result = typed_->view_internal(std::move(file), offset, std::forward(progress)); if (!result) return result; - if (typed_->size() != static_cast(matrix_rows)) - return result.failed("Index size and the number of vectors doesn't match"); - - // Address the vectors - vectors_lookup_.resize(matrix_rows); - if (!config.exclude_vectors) - for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) - vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot; reindex_keys_(); return result; @@ -1353,17 +1248,19 @@ class index_dense_gt { copy.free_keys_.push(free_keys_[i]); // Allocate buffers and move the vectors themselves - if (!config.force_vector_copy && copy.config_.exclude_vectors) - copy.vectors_lookup_ = vectors_lookup_; - else { - copy.vectors_lookup_.resize(vectors_lookup_.size()); - for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) - copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector()); - if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr)) - return result.failed("Out of memory!"); - for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) - std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector()); - } + // if (!config.force_vector_copy && copy.config_.exclude_vectors) + // copy.vectors_lookup_ = vectors_lookup_; + // else { + // copy.vectors_lookup_.resize(vectors_lookup_.size()); + // for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) + // copy.vectors_lookup_[slot] = + // copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector()); + // if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr)) + // return result.failed("Out of memory!"); + // for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) + // std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector()); + // } + assert(false); copy.slot_lookup_ = slot_lookup_; *copy.typed_ = std::move(typed_result.index); @@ -1428,263 +1325,6 @@ class index_dense_gt { return result; } - class values_proxy_t { - index_dense_gt const* index_; - - public: - values_proxy_t(index_dense_gt const& index) noexcept : index_(&index) {} - byte_t const* operator[](compressed_slot_t slot) const noexcept { return index_->vectors_lookup_[slot]; } - byte_t const* operator[](member_citerator_t it) const noexcept { return index_->vectors_lookup_[get_slot(it)]; } - }; - - /** - * @brief Performs compaction on the index, pruning links to removed entries. - * @param executor The executor parallel processing. Default ::dummy_executor_t single-threaded. - * @param progress The progress tracker instance to use. Default ::dummy_progress_t reports nothing. - * @return The ::compaction_result_t indicating the result of the compaction operation. - * `result.pruned_edges` will contain the number of edges that were removed. - * `result.error` will contain an error message if an error occurred during the compaction operation. - */ - template - compaction_result_t compact(executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) { - compaction_result_t result; - - std::vector new_vectors_lookup(vectors_lookup_.size()); - vectors_tape_allocator_t new_vectors_allocator; - - auto track_slot_change = [&](vector_key_t, compressed_slot_t old_slot, compressed_slot_t new_slot) { - byte_t* new_vector = new_vectors_allocator.allocate(metric_.bytes_per_vector()); - byte_t* old_vector = vectors_lookup_[old_slot]; - std::memcpy(new_vector, old_vector, metric_.bytes_per_vector()); - new_vectors_lookup[new_slot] = new_vector; - }; - typed_->compact(values_proxy_t{*this}, metric_proxy_t{*this}, track_slot_change, - std::forward(executor), std::forward(progress)); - vectors_lookup_ = std::move(new_vectors_lookup); - vectors_tape_allocator_ = std::move(new_vectors_allocator); - return result; - } - - template < // - typename man_to_woman_at = dummy_key_to_key_mapping_t, // - typename woman_to_man_at = dummy_key_to_key_mapping_t, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > - join_result_t join( // - index_dense_gt const& women, // - index_join_config_t config = {}, // - man_to_woman_at&& man_to_woman = man_to_woman_at{}, // - woman_to_man_at&& woman_to_man = woman_to_man_at{}, // - executor_at&& executor = executor_at{}, // - progress_at&& progress = progress_at{}) const { - - index_dense_gt const& men = *this; - return unum::usearch::join( // - *men.typed_, *women.typed_, // - values_proxy_t{men}, values_proxy_t{women}, // - metric_proxy_t{men}, metric_proxy_t{women}, // - config, // - std::forward(man_to_woman), // - std::forward(woman_to_man), // - std::forward(executor), // - std::forward(progress)); - } - - struct clustering_result_t { - error_t error{}; - std::size_t clusters{}; - std::size_t visited_members{}; - std::size_t computed_distances{}; - - explicit operator bool() const noexcept { return !error; } - clustering_result_t failed(error_t message) noexcept { - error = std::move(message); - return std::move(*this); - } - }; - - /** - * @brief Implements clustering, classifying the given objects (vectors of member keys) - * into a given number of clusters. - * - * @param[in] queries_begin Iterator pointing to the first query. - * @param[in] queries_end Iterator pointing to the last query. - * @param[in] executor Thread-pool to execute the job in parallel. - * @param[in] progress Callback to report the execution progress. - * @param[in] config Configuration parameters for clustering. - * - * @param[out] cluster_keys Pointer to the array where the cluster keys will be exported. - * @param[out] cluster_distances Pointer to the array where the distances to those centroids will be exported. - */ - template < // - typename queries_iterator_at, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > - clustering_result_t cluster( // - queries_iterator_at queries_begin, // - queries_iterator_at queries_end, // - index_dense_clustering_config_t config, // - vector_key_t* cluster_keys, // - distance_t* cluster_distances, // - executor_at&& executor = executor_at{}, // - progress_at&& progress = progress_at{}) { - - std::size_t const queries_count = queries_end - queries_begin; - - // Find the first level (top -> down) that has enough nodes to exceed `config.min_clusters`. - std::size_t level = max_level(); - if (config.min_clusters) { - for (; level > 1; --level) { - if (stats(level).nodes > config.min_clusters) - break; - } - } else - level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2; - - clustering_result_t result; - if (max_level() < 2) - return result.failed("Index too small to cluster!"); - - // A structure used to track the popularity of a specific cluster - struct cluster_t { - vector_key_t centroid; - vector_key_t merged_into; - std::size_t popularity; - byte_t* vector; - }; - - auto centroid_id = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; }; - auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; }; - - std::atomic visited_members(0); - std::atomic computed_distances(0); - std::atomic atomic_error{nullptr}; - - using dynamic_allocator_traits_t = std::allocator_traits; - using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt clusters(queries_count); - if (!clusters) - return result.failed("Out of memory!"); - - map_to_clusters: - // Concurrently perform search until a certain depth - executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) { - auto result = cluster(queries_begin[query_idx], level, thread_idx); - if (!result) { - atomic_error = result.error.release(); - return false; - } - - cluster_keys[query_idx] = result.cluster.member.key; - cluster_distances[query_idx] = result.cluster.distance; - - // Export in case we need to refine afterwards - clusters[query_idx].centroid = result.cluster.member.key; - clusters[query_idx].vector = vectors_lookup_[result.cluster.member.slot]; - clusters[query_idx].merged_into = free_key(); - clusters[query_idx].popularity = 1; - - visited_members += result.visited_members; - computed_distances += result.computed_distances; - return true; - }); - - if (atomic_error) - return result.failed(atomic_error.load()); - - // Now once we have identified the closest clusters, - // we can try reducing their quantity, refining - std::sort(clusters.begin(), clusters.end(), centroid_id); - - // Transform into run-length encoding, computing the number of unique clusters - std::size_t unique_clusters = 0; - { - std::size_t last_idx = 0; - for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) { - if (clusters[last_idx].centroid == clusters[current_idx].centroid) { - clusters[last_idx].popularity++; - } else { - last_idx++; - clusters[last_idx] = clusters[current_idx]; - } - } - unique_clusters = last_idx + 1; - } - - // In some cases the queries may be co-located, all mapping into the same cluster on that - // level. In that case we refine the granularity and dive deeper into clusters: - if (unique_clusters < config.min_clusters && level > 1) { - level--; - goto map_to_clusters; - } - - std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity); - - // If clusters are too numerous, merge the ones that are too close to each other. - std::size_t merge_cycles = 0; - merge_nearby_clusters: - if (unique_clusters > config.max_clusters) { - - cluster_t& merge_source = clusters[unique_clusters - 1]; - std::size_t merge_target_idx = 0; - distance_t merge_distance = std::numeric_limits::max(); - - for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) { - distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector); - if (distance < merge_distance) { - merge_distance = distance; - merge_target_idx = candidate_idx; - } - } - - merge_source.merged_into = clusters[merge_target_idx].centroid; - clusters[merge_target_idx].popularity += exchange(merge_source.popularity, 0); - - // The target object may have to be swapped a few times to get to optimal position. - while (merge_target_idx && - clusters[merge_target_idx - 1].popularity < clusters[merge_target_idx].popularity) - std::swap(clusters[merge_target_idx - 1], clusters[merge_target_idx]), --merge_target_idx; - - unique_clusters--; - merge_cycles++; - goto merge_nearby_clusters; - } - - // Replace evicted clusters - if (merge_cycles) { - // Sort dropped clusters by name to accelerate future lookups - auto clusters_end = clusters.data() + config.max_clusters + merge_cycles; - std::sort(clusters.data(), clusters_end, centroid_id); - - executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) { - vector_key_t& cluster_key = cluster_keys[query_idx]; - distance_t& cluster_distance = cluster_distances[query_idx]; - - // Recursively trace replacements of that cluster - while (true) { - // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key` - cluster_t updated_cluster; - updated_cluster.centroid = cluster_key; - updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, centroid_id); - if (updated_cluster.merged_into == free_key()) - break; - cluster_key = updated_cluster.merged_into; - } - - cluster_distance = distance_between(cluster_key, queries_begin[query_idx], thread_idx).mean; - return true; - }); - } - - result.computed_distances = computed_distances; - result.visited_members = visited_members; - - (void)progress; - return result; - } - private: struct thread_lock_t { index_dense_gt const& parent; @@ -1745,12 +1385,7 @@ class index_dense_gt { auto on_success = [&](member_ref_t member) { unique_lock_t slot_lock(slot_lookup_mutex_); slot_lookup_.try_emplace(key_and_slot_t{key, static_cast(member.slot)}); - if (copy_vector) { - if (!reuse_node) - vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector()); - std::memcpy(vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector()); - } else - vectors_lookup_[member.slot] = (byte_t*)vector_data; + storage_.set_vector_at(member.slot, vector_data, metric_.bytes_per_vector(), copy_vector, reuse_node); }; index_update_config_t update_config; @@ -1839,7 +1474,7 @@ class index_dense_gt { while (key_range.first != key_range.second) { key_and_slot_t key_and_slot = *key_range.first; - byte_t const* a_vector = vectors_lookup_[key_and_slot.slot]; + byte_t const* a_vector = storage_.get_vector_at(key_and_slot.slot); byte_t const* b_vector = vector_data; distance_t a_b_distance = metric_(a_vector, b_vector); @@ -1900,7 +1535,7 @@ class index_dense_gt { slot = (*it).slot; } // Export the entry - byte_t const* punned_vector = reinterpret_cast(vectors_lookup_[slot]); + byte_t const* punned_vector = reinterpret_cast(storage_.get_vector_at(slot)); bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed); if (!casted) std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector()); @@ -1913,7 +1548,7 @@ class index_dense_gt { begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) { // compressed_slot_t slot = (*begin).slot; - byte_t const* punned_vector = reinterpret_cast(vectors_lookup_[slot]); + byte_t const* punned_vector = reinterpret_cast(storage_.get_vector_at(slot)); byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported; bool casted = cast(punned_vector, dimensions(), reconstructed_vector); if (!casted) diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp index 660d9d52..aac30199 100644 --- a/include/usearch/index_plugins.hpp +++ b/include/usearch/index_plugins.hpp @@ -50,8 +50,10 @@ #define SIMSIMD_NATIVE_F16 !USEARCH_USE_FP16LIB #if !defined(SIMSIMD_TARGET_X86_AVX512) && defined(USEARCH_DEFINED_LINUX) +#if defined(__AVX512F__) && defined(__AVX512FP16__) && defined(__AVX512VNNI__) && defined(__AVX512VPOPCNTDQ__) #define SIMSIMD_TARGET_X86_AVX512 1 #endif +#endif #if !defined(SIMSIMD_TARGET_ARM_SVE) && defined(USEARCH_DEFINED_LINUX) #define SIMSIMD_TARGET_ARM_SVE 1 diff --git a/include/usearch/std_storage.hpp b/include/usearch/std_storage.hpp new file mode 100644 index 00000000..a1f7c044 --- /dev/null +++ b/include/usearch/std_storage.hpp @@ -0,0 +1,343 @@ + +#pragma once + +#include +#include +#include +#include +#include + +namespace unum { +namespace usearch { + +/** + * @brief A simple Storage implementation that uses standard cpp containers and complies with the usearch storage + *abstraction for HNSW graph and associated vector data + * + * @tparam key_at + * The type of primary objects stored in the index. + * The values, to which those map, are not managed by the same index structure. + * + * @tparam compressed_slot_at + * The smallest unsigned integer type to address indexed elements. + * It is used internally to maximize space-efficiency and is generally + * up-casted to @b `std::size_t` in public interfaces. + * Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`. + * Which makes the most sense for 4B+ entry indexes. + * + * @tparam allocator_at + * Potentially different memory allocator for primary allocations of nodes and vectors. + * The allocated buffers may be uninitialized. + * Note that we are using a memory aaligned allocator in place of std::allocator + * Because of scalar_t memory requirements in index_* + * + **/ +template > // +class std_storage_at { + public: + using key_t = key_at; + using node_t = node_at; + using span_bytes_t = span_gt; + + private: + using nodes_t = std::vector; + using vectors_t = std::vector; + + nodes_t nodes_{}; + vectors_t vectors_{}; + precomputed_constants_t pre_{}; + allocator_at allocator_{}; + static_assert(!has_reset(), "reset()-able memory allocators not supported for this storage provider"); + memory_mapped_file_t viewed_file_{}; + mutable std::deque locks_{}; + // the next three are used only in serialization/deserialization routines to know how to serialize vectors + // since this is only for serde/vars are marked mutable to still allow const-ness of saving method interface on + // storage instance + mutable size_t node_count_{}; + mutable size_t vector_size_{}; + // defaulted to true because that is what test.cpp assumes when using this storage directly + mutable bool exclude_vectors_ = true; + // used to maintain proper alignment in stored indexes to make sure view() does not result in misaligned accesses + mutable size_t file_offset_{}; + + // used in place of error handling throughout the class + static void expect(bool must_be_true) { + if (!must_be_true) + throw std::runtime_error("Failed!"); + } + // padding buffer, some prefix of which will be used every time we need padding in the serialization + // of the index. + // Rest of the array will be zeros but we will also never need paddings that large + // The pattern is to help in debugging + constexpr static byte_t padding_buffer[64] = {0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42}; + + template size_t align(T v) const { + return (sizeof(A) - (size_t)v % sizeof(A)) % sizeof(A); + } + + template size_t align4(T v) const { return align(v); } + + public: + std_storage_at(index_config_t config, allocator_at allocator = {}) + : pre_(node_t::precompute_(config)), allocator_(allocator) {} + + inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; } + inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_[idx].data(); } + inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); } + bool is_immutable() const noexcept { return bool(viewed_file_); } + + /* To get a single-threaded implementation of storage with no locking, replace lock_type + * with the following and return dummy_lock{} from node_lock() + * struct dummy_lock { + * // destructor necessary to avoid "unused variable warning" + * // at callcites of node_lock() + * ~dummy_lock() = default; + * }; + * using lock_type = dummy_lock; + */ + using lock_type = std::unique_lock; + + bool reserve(std::size_t count) { + if (count < nodes_.size()) + return true; + nodes_.resize(count); + vectors_.resize(count); + locks_.resize(count); + return true; + } + void clear() noexcept { + if (!is_immutable()) { + std::size_t n = nodes_.size(); + for (std::size_t i = 0; i != n; ++i) { + // we do not know which slots have been filled and which ones - no + // so we iterate over full reserved space + if (nodes_[i]) + node_free(i, nodes_[i]); + } + n = vectors_.size(); + for (std::size_t i = 0; i != n; ++i) { + span_bytes_t v = vectors_[i]; + if (v.data()) { + allocator_.deallocate(v.data(), v.size()); + } + } + } + if (vectors_.data()) + std::fill(vectors_.begin(), vectors_.end(), span_bytes_t{}); + if (nodes_.data()) + std::fill(nodes_.begin(), nodes_.end(), node_t{}); + } + void reset() noexcept { clear(); } + + span_bytes_t node_malloc(level_t level) noexcept { + std::size_t node_size = node_t::node_size_bytes(pre_, level); + byte_t* data = (byte_t*)allocator_.allocate(node_size); + return data ? span_bytes_t{data, node_size} : span_bytes_t{}; + } + void node_free(size_t slot, node_t node) { + allocator_.deallocate(node.tape(), node.node_size_bytes(pre_)); + nodes_[slot] = node_t{}; + } + node_t node_make(key_at key, level_t level) noexcept { + span_bytes_t node_bytes = node_malloc(level); + if (!node_bytes) + return {}; + + std::memset(node_bytes.data(), 0, node_bytes.size()); + node_t node{(byte_t*)node_bytes.data()}; + node.key(key); + node.level(level); + return node; + } + void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; } + void set_vector_at(size_t slot, const byte_t* vector_data, size_t vector_size, bool copy_vector, bool reuse_node) { + + usearch_assert_m(!(reuse_node && !copy_vector), + "Cannot reuse node when not copying as there is no allocation needed"); + if (copy_vector) { + if (!reuse_node) + vectors_[slot] = span_bytes_t{allocator_.allocate(vector_size), vector_size}; + std::memcpy(vectors_[slot].data(), vector_data, vector_size); + } else + vectors_[slot] = span_bytes_t{(byte_t*)vector_data, vector_size}; + } + + allocator_at const& node_allocator() const noexcept { return allocator_; } + + inline lock_type node_lock(std::size_t i) const noexcept { return std::unique_lock(locks_[i]); } + + // serialization + + template + serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes, + std::uint64_t node_count, // + const vectors_metadata_at& metadata_buffer, + serialization_config_t config = {}) const { + expect(!config.use_64_bit_dimensions); + expect(output(metadata_buffer, sizeof(metadata_buffer))); + + file_offset_ = sizeof(metadata_buffer); + vector_size_ = vector_size_bytes; + node_count_ = node_count; + exclude_vectors_ = config.exclude_vectors; + return {}; + } + + template + serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header, + progress_at& = {}) const { + expect(output(&header, sizeof(header))); + expect(output(&vector_size_, sizeof(vector_size_))); + expect(output(&node_count_, sizeof(node_count_))); + file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_); + // Save node levels, for offset calculation + for (std::size_t i = 0; i != header.size; ++i) { + node_t node = get_node_at(i); + level_t level = node.level(); + expect(output(&level, sizeof(level))); + } + + file_offset_ += header.size * sizeof(level_t); + + // After that dump the nodes themselves + for (std::size_t i = 0; i != header.size; ++i) { + span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_); + expect(output(node_bytes.data(), node_bytes.size())); + file_offset_ += node_bytes.size(); + if (!exclude_vectors_) { + // add padding for proper alignment + int16_t padding_size = align4(file_offset_); + expect(output(&padding_buffer, padding_size)); + file_offset_ += padding_size; + byte_t* vector_bytes = get_vector_at(i); + expect(output(vector_bytes, vector_size_)); + file_offset_ += vector_size_; + } + } + return {}; + } + + template + serialization_result_t load_vectors_from_stream(input_callback_at& input, // + vectors_metadata_at& metadata_buffer, + serialization_config_t config = {}) { + expect(!config.use_64_bit_dimensions); + expect(input(metadata_buffer, sizeof(metadata_buffer))); + file_offset_ = sizeof(metadata_buffer); + exclude_vectors_ = config.exclude_vectors; + return {}; + } + + template + serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header, + progress_at& = {}) noexcept { + byte_t in_padding_buffer[64] = {0}; + expect(input(&header, sizeof(header))); + expect(input(&vector_size_, sizeof(vector_size_))); + expect(input(&node_count_, sizeof(node_count_))); + file_offset_ += sizeof(header) + sizeof(vector_size_) + sizeof(node_count_); + if (!header.size) { + reset(); + return {}; + } + buffer_gt levels(header.size); + expect(levels); + expect(input(levels, header.size * sizeof(level_t))); + expect(reserve(header.size)); + + file_offset_ += header.size * sizeof(level_t); + // Load the nodes + for (std::size_t i = 0; i != header.size; ++i) { + span_bytes_t node_bytes = node_malloc(levels[i]); + expect(input(node_bytes.data(), node_bytes.size())); + file_offset_ += node_bytes.size(); + node_store(i, node_t{node_bytes.data()}); + if (!exclude_vectors_) { + int16_t padding_size = align4(file_offset_); + expect(input(&in_padding_buffer, padding_size)); + file_offset_ += padding_size; + expect(std::memcmp(in_padding_buffer, padding_buffer, padding_size) == 0); + byte_t* vector_bytes = allocator_.allocate(vector_size_); + expect(input(vector_bytes, vector_size_)); + file_offset_ += vector_size_; + set_vector_at(i, vector_bytes, vector_size_, false, false); + } + } + return {}; + } + + template + serialization_result_t view_vectors_from_file( + memory_mapped_file_t& file, // + //// todo!! document that offset is a reference, or better - do not do it this way + vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) { + reset(); + exclude_vectors_ = config.exclude_vectors; + expect(!config.use_64_bit_dimensions); + + expect(bool(file.open_if_not())); + std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer)); + file_offset_ = sizeof(metadata_buffer); + offset += sizeof(metadata_buffer); + return {}; + } + + template + serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header, + std::size_t offset = 0, progress_at& = {}) noexcept { + serialization_result_t result = file.open_if_not(); + std::memcpy(&header, file.data() + offset, sizeof(header)); + offset += sizeof(header); + std::memcpy(&vector_size_, file.data() + offset, sizeof(vector_size_)); + offset += sizeof(vector_size_); + std::memcpy(&node_count_, file.data() + offset, sizeof(node_count_)); + offset += sizeof(node_count_); + if (!header.size) { + reset(); + return result; + } + index_config_t config; + config.connectivity = header.connectivity; + config.connectivity_base = header.connectivity_base; + pre_ = node_t::precompute_(config); + buffer_gt offsets(header.size); + expect(offsets); + misaligned_ptr_gt levels{(byte_t*)file.data() + offset}; + offset += sizeof(level_t) * header.size; + offsets[0u] = offset; + for (std::size_t i = 1; i < header.size; ++i) { + offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]); + if (!exclude_vectors_) { + // add room for vector alignment + offsets[i] += align4(offsets[i]); + offsets[i] += vector_size_; + } + } + expect(reserve(header.size)); + + // Rapidly address all the nodes and vectors + for (std::size_t i = 0; i != header.size; ++i) { + node_store(i, node_t{(byte_t*)file.data() + offsets[i]}); + expect(node_size_bytes(i) == node_t::node_size_bytes(pre_, levels[i])); + + if (!exclude_vectors_) { + size_t vector_offset = offsets[i] + node_size_bytes(i); + expect(std::memcmp((byte_t*)file.data() + vector_offset, padding_buffer, align4(vector_offset)) == 0); + vector_offset += align4(vector_offset); + + // expect proper alignment + expect(align4(vector_offset) == 0); + expect(align4((byte_t*)file.data() + vector_offset) == 0); + set_vector_at(i, (byte_t*)file.data() + vector_offset, vector_size_, false, false); + } + } + viewed_file_ = std::move(file); + return {}; + } +}; + +using default_std_storage_t = std_storage_at; +ASSERT_VALID_STORAGE(default_std_storage_t); + +} // namespace usearch +} // namespace unum diff --git a/include/usearch/storage.hpp b/include/usearch/storage.hpp new file mode 100644 index 00000000..30f82f35 --- /dev/null +++ b/include/usearch/storage.hpp @@ -0,0 +1,826 @@ +#pragma once + +#include +#include + +namespace unum { +namespace usearch { + +/** + * @brief This macro, `HAS_FUNCTION_TEMPLATE`, is a utility to check at + * compile-time whether a given type (CHECK_AT) has a member function with a specific name (NAME_AK), signature + * (SIGNATURE_AT=return_at(args_at...)), constness (CONST_AK=const|[empty]), and exception specification + * (NOEXCEPT_AK=true|false). + * + * It is based on has_reset_gt template: + * 1. Replace declval with declval to enforce function const-ness + * method: https://stackoverflow.com/questions/30407754/how-to-test-if-a-method-is-const + * 2. Replace .reset with dynamic NAME_AK to support methods with other names + * 3. Add option to enforce noexcept + * method: https://stackoverflow.com/questions/56510130/unit-test-to-check-for-noexcept-property-for-a-c-method + * + * @param[in] CHECK_AT Placeholder type used within the template instantiation to denote the type to be checked. + * @param[in] NAME_AK Name of the member function to be checked for. This name is incorporated in the generated + * structure's name and used in the check. + * @param[in] SIGNATURE_AT Placeholder for the function signature, employed in specializing the template for function + * types. + * @param[in] CONST_AK Indicates if the member function should be a const function. This forms part of the function + * call signature within the check. + * @param[in] NOEXCEPT_AK Indicates if the member function should be noexcept. This affects the check, particularly + * important for ensuring exception safety in certain contexts. + * + * generates a structure structure named `has_##NAME_AK##_gt` with a static constexpr boolean member `value`. This + * member is true if the specified type has a member function that matches the name, signature, constness, and noexcept + * status provided in the macro's arguments. Otherwise, it is false. + * + * @example + * Suppose you have a class `Foo` with that has an interface requirement of a const noexcept member function `bar` that + * returns an `int` and takes a `const double`. To enforce the interface requirement, if this function exists, is + * const, and noexcept, you would instantiate the generated template like so: + * ```cpp + * struct Foo { + * // CHECK CATCHES: expected double, got double* + * // int bar(const double*) const noexcept { return 42; } + * // CHECK CATCHES: wrong const-ness + * // int bar(const double) noexcept { return 42; } + * // CHECK CATCHES: wrong excempt-ness + * // int bar(const double) const { return 42; } + * // CHECK CATCHES because required int can be cast to double + * // double bar(const double) const noexcept { return 42; } + * // CHECK CATHCES wrong returned value + * // int* bar(const double) const noexcept { return nullptr; } + * // CHECK CATHCES wrong signature + * // int bar(const double, int) const { return 42; } + * // + * // SUCCESS! the invariant we wanted + * + * int bar(const double) const noexcept { return 42; } + * + * // + * // Some PROBLEMS + * // CHECK **DOES NOT** CATCH. assertion succeeds + * // int bar(const double&) const noexcept { return 42; } + * // CHECK **DOES NOT** CATCH. assertion succeeds + * // int bar(const double&&) const noexcept { return 42; } + * }; + * + * HAS_FUNCTION_TEMPLATE(Foo, bar, int(const double), const, true); + * static_assert(has_bar_gt::value); + * ``` + * If `Foo` indeed has a const noexcept member function `bar` matching this signature, the static assertion succeeds + * Otherwise, it will cause a compile failure + */ +#define HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK) \ + template struct has_##NAME_AK##_gt { \ + static_assert(std::integral_constant::value, \ + "Second template parameter needs to be of function type."); \ + }; \ + \ + template \ + struct has_##NAME_AK##_gt { \ + private: \ + template \ + static constexpr auto check(at*) -> \ + typename std::is_same().NAME_AK(std::declval()...)), \ + return_at>::type; \ + template static constexpr std::false_type check(...); \ + \ + template static constexpr bool f_is_noexcept(at*) { \ + return noexcept(std::declval().NAME_AK(std::declval()...)); \ + } \ + \ + typedef decltype(check(0)) type; \ + \ + public: /* if NOEXCEPT_AK then f_is_noexcept(0) */ \ + static constexpr bool value = type::value && (!NOEXCEPT_AK || f_is_noexcept(0)); \ + }; + +/** + * This is a wrapper around the macro above that allows getting less cryptic error messages + * in particular, it: + * 1. Wraps the defined template in a unique namespace to avoid collisions. If this ends up being used elsewhere, + * probably it would be worth it to add a __FILE__ prefix to the namespace name as well + * 2. Regarless of the requrement, it runs signature check without taking into account const-ness and exception + * requirement. + * 3. Only after the initial signature check succeeds, it takes into acount const and noexcept and runs relevant checks, + * printing descriptive error messages is the constraints are not satisfied + * + * The macro takes the same parameters as the one above + **/ +#define ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK) \ + /************ check function signature without const or noexcept*/ \ + namespace CHECK_AT##__##NAME_AK { \ + HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, , false) \ + } \ + static_assert(CHECK_AT##__##NAME_AK::has_##NAME_AK##_gt::value, \ + " Function \"" #CHECK_AT "::" #NAME_AK \ + "\" does not exist or does not satisfy storage API signature"); \ + /************ check function signature with const requirement but without noexcept*/ \ + namespace CHECK_AT##__##NAME_AK##_const { \ + HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, false) \ + } \ + static_assert(CHECK_AT##__##NAME_AK##_const::has_##NAME_AK##_gt::value, \ + " Function \"" #CHECK_AT "::" #NAME_AK \ + "\" exists but does not satisfy const-requirement of storage API"); \ + /************ check function signature with const and noexcept requirements */ \ + namespace CHECK_AT##__##NAME_AK##_const_noexcept { \ + HAS_FUNCTION_TEMPLATE(CHECK_AT, NAME_AK, SIGNATURE_AT, CONST_AK, NOEXCEPT_AK) \ + } \ + static_assert( \ + !NOEXCEPT_AK || CHECK_AT##__##NAME_AK##_const_noexcept::has_##NAME_AK##_gt::value, \ + " Function \"" #CHECK_AT "::" #NAME_AK "\" exists but does not satisfy noexcept requirement of storage API") + +/** Various commonly used shortcusts for the assertion macro above + * Note: NOCONST in comments indicates intentional lack of const qualifier + **/ +#define ASSERT_HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) \ + ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, false) +#define ASSERT_HAS_CONST_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) \ + ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, false) +#define ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) \ + ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, /*NOCONST*/, true) +#define ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) \ + ASSERT_HAS_FUNCTION_GM(CHECK_AT, NAME_AK, SIGNATURE_AT, const, true) + +#define HAS_FUNCTION(CHECK_AT, NAME_AK, SIGNATURE_AT) has_##NAME_AK##_gt::value + +/** + * @brief An example of what a USearch-Storage-compatible output callback should look like. + * The callback is called to store arbitrarily serialized usearch index data in the underlying + * storage medium managed in the callback implementation + * + */ +struct dummy_output_callback_t { + inline bool operator()(const void* /*source memory*/, std::size_t /*size of the source*/) { return true; } +}; + +/** + * @brief An example of what a USearch-Storage-compatible input callback should look like. + * The callback is called to read arbitrarily serialized usearch index data from the underlying + * storage medium managed in the callback implementation + * + */ +struct dummy_input_callback_t { + inline bool operator()(void* /*destination memory*/, std::size_t /*size of the destination*/) { return true; } +}; + +/** + * @brief A dummy metadata buffer used in serialization/deserialization API checks below + * An actual index implementation might need to keep some app-level constants in here to be serialized on the + * stored index binary, but we do not need its structure for type-checking + * + */ +struct dummy_vectors_metadata_buffer_t {}; + +struct index_dense_serialization_config_t { + // We may not want to fetch the vectors from the same file, or allow attaching them afterwards + bool exclude_vectors = false; + bool use_64_bit_dimensions = false; +}; + +using serialization_config_t = index_dense_serialization_config_t; + +/** + * @brief The macro takes in a usearch Storage-provider type, and makes sure the type provides the necessary interface + * assumed in usearch internals N.B: the validation does notenforce reference argument types properly Validation + *succeeds even when in the sertions below an interface is required to take a reference type but the actual + *implementation takes a copy + **/ +#define ASSERT_VALID_STORAGE(CHECK_AT) \ + ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_node_at, CHECK_AT::node_t(std::size_t idx)); \ + ASSERT_HAS_CONST_FUNCTION(CHECK_AT, get_vector_at, byte_t*(std::size_t idx)); \ + ASSERT_HAS_CONST_FUNCTION(CHECK_AT, node_size_bytes, std::size_t(std::size_t idx)); \ + ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, is_immutable, bool()); \ + \ + /*Container methods */ \ + ASSERT_HAS_FUNCTION(CHECK_AT, reserve, bool(std::size_t count)); \ + ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, clear, void()); \ + ASSERT_HAS_NOEXCEPT_FUNCTION(CHECK_AT, reset, void()); \ + /*Setters*/ \ + ASSERT_HAS_FUNCTION(CHECK_AT, node_malloc, CHECK_AT::span_bytes_t(level_t level)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, node_free, void(std::size_t slot, CHECK_AT::node_t node)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, node_make, CHECK_AT::node_t(CHECK_AT::key_t key, level_t level)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, node_store, void(std::size_t slot, CHECK_AT::node_t node)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, set_vector_at, \ + void(std::size_t idx, const byte_t* vector_data, std::size_t vector_bytes, bool copy_vector, \ + bool reuse_node)); \ + /*Locking*/ \ + ASSERT_HAS_CONST_NOEXCEPT_FUNCTION(CHECK_AT, node_lock, CHECK_AT::lock_type(std::size_t idx)); \ + /*Save/Restore API enforcement*/ \ + ASSERT_HAS_FUNCTION(CHECK_AT, save_vectors_to_stream, \ + serialization_result_t( \ + dummy_output_callback_t& cb, std::size_t vector_size_bytes, std::uint64_t node_count, \ + const dummy_vectors_metadata_buffer_t& metadata_buffer, serialization_config_t config)); \ + ASSERT_HAS_CONST_FUNCTION(CHECK_AT, save_nodes_to_stream, \ + serialization_result_t(dummy_output_callback_t& cb, \ + const index_serialized_header_t& header, \ + dummy_progress_t& progress)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, load_vectors_from_stream, \ + serialization_result_t(dummy_input_callback_t& cb, \ + const dummy_vectors_metadata_buffer_t& metadata_buffer, \ + serialization_config_t config)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, load_nodes_from_stream, \ + serialization_result_t(dummy_input_callback_t& cb, index_serialized_header_t& header, \ + dummy_progress_t& progress)); \ + \ + /* View from file API*/ \ + ASSERT_HAS_FUNCTION(CHECK_AT, view_vectors_from_file, \ + serialization_result_t(memory_mapped_file_t& file, \ + dummy_vectors_metadata_buffer_t& metadata_buffer, std::size_t& offset, \ + serialization_config_t config)); \ + ASSERT_HAS_FUNCTION(CHECK_AT, view_nodes_from_file, \ + serialization_result_t(memory_mapped_file_t file, index_serialized_header_t& metadata_buffer, \ + std::size_t& offset, dummy_progress_t& progress)); \ + static_assert(true, "this is to require a semicolon at the end of macro call") + +/** I initially used this abstract class as a way to enforce storage API but ran into several limitations mentioned + * below I switched to macro+template based approach in the end, but left this around, in case there are ways to work + * around the issues below that I am not aware of. + **/ +template // +class storage_interface { + public: + using key_t = key_at; + using node_t = node_at; + // storage_interface(index_config_t conig, tape_allocator_at allocator = {}); + + struct lock_type; + + // q:: ask-Ashot can I enforce this interface function in inherited storages somehow? + // currently impossible because + // 1. can do virtual constexpr after c++2a + // 2. making this virtual would enforce this particular lock_type struct as the return type, + // and not an equivalently named one in the child class + // I currently enforice it via macros + constexpr inline lock_type node_lock(std::size_t slot) const noexcept; + + virtual inline node_t get_node_at(std::size_t idx) const noexcept = 0; + virtual inline std::size_t node_size_bytes(std::size_t idx) const noexcept = 0; + virtual inline byte_t* get_vector_at(std::size_t idx) const noexcept = 0; + + inline void set_at(std::size_t idx, node_t node, byte_t* vector_data, std::size_t vector_size, bool reuse_node); + + // the following functions take template arguments so cannot be type-enforced via virtual function inheritence + // as far as I can tell. + // virtual void load_vectors_from_stream() = 0; + // virtual void load_nodes_from_stream() = 0; + + // serialization_result_t save_vectors_to_stream() const; + // serialization_result_t save_nodes_to_stream() const; + + // serialization_result_t view_vectors_from_file() const; + // serialization_result_t view_nodes_from_file() const; + + virtual std::size_t size() const noexcept = 0; + virtual bool reserve(std::size_t count) = 0; + virtual void clear() noexcept = 0; + virtual void reset() noexcept = 0; + std::size_t memory_usage(); +}; + +/*Default allocators for storage_v2 */ +using dynamic_allocator_t = aligned_allocator_gt; +using tape_allocator_t = memory_mapping_allocator_gt<64>; +using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>; +/** + * @brief Storage abstraction for HNSW graph and associated vector data + * + * @tparam key_at + * The type of primary objects stored in the index. + * The values, to which those map, are not managed by the same index structure. + * + * @tparam compressed_slot_at + * The smallest unsigned integer type to address indexed elements. + * It is used internally to maximize space-efficiency and is generally + * up-casted to @b `std::size_t` in public interfaces. + * Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`. + * Which makes the most sense for 4B+ entry indexes. + * + * @tparam tape_allocator_at + * Potentially different memory allocator for primary allocations of nodes and vectors. + * It would never `deallocate` separate entries, and would only free all the space at once. + * The allocated buffers may be uninitialized. + * + * NOTE: + * The class below used to inherit from storage_interface via: + * class storage_v2 : public storage_interface + * I disabled inheritence for now as interface compatibility is more + * thoroughly enforced via the macros at the beginning of this file + **/ +template // +class storage_v2_at { + public: + using key_t = key_at; + using node_t = node_at; + + private: + using nodes_mutexes_t = bitset_gt; + using dynamic_allocator_traits_t = std::allocator_traits; + using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; + using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; + using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; + using vectors_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; + using nodes_t = buffer_gt; + // todo:: ask-Ashot: in the older version vectors_lookup_ was using the default vector allocator, + // and not the dynamic_allocator_at that was passed it. + // Can remove this if the previous approach was intentional + // Update (Jan 10): It seems giving vectors_allocator_t as vectors_t + // allocator below only works when CMAKE_HAVE_LIBC_PTHREAD is false + // otherwise, I get a compile error + using vectors_t = std::vector; + + /// @brief C-style array of `node_t` smart-pointers. + // buffer_gt nodes_{}; + + nodes_t nodes_{}; + + /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy. + vectors_t vectors_lookup_{}; + /// @brief Mutex, that limits concurrent access to `nodes_`. + mutable nodes_mutexes_t nodes_mutexes_{}; + precomputed_constants_t pre_{}; + tape_allocator_at tape_allocator_{}; + /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars. + vectors_allocator_at vectors_allocator_{}; + + std::uint64_t matrix_rows_ = 0; + std::uint64_t matrix_cols_ = 0; + bool vectors_loaded_{}; + memory_mapped_file_t viewed_file_{}; + using tape_allocator_traits_t = std::allocator_traits; + static_assert( // + sizeof(typename tape_allocator_traits_t::value_type) == 1, // + "Tape allocator must allocate separate addressable bytes"); + + struct node_lock_t { + nodes_mutexes_t& mutexes; + std::size_t slot; + inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); } + }; + + public: + storage_v2_at(index_config_t config, tape_allocator_at tape_allocator = {}) + : pre_(node_t::precompute_(config)), tape_allocator_(tape_allocator) {} + + inline node_t get_node_at(std::size_t idx) const noexcept { return nodes_[idx]; } + // todo:: most of the time this is called for const* vector, maybe add a separate interface for const? + inline byte_t* get_vector_at(std::size_t idx) const noexcept { return vectors_lookup_[idx]; } + inline void set_vector_at(std::size_t idx, const byte_t* vector_data, std::size_t bytes_per_vector, + bool copy_vector, bool reuse_node) { + usearch_assert_m(!(reuse_node && !copy_vector), + "Cannot reuse node when not copying as there is no allocation needed"); + if (copy_vector) { + if (!reuse_node) + vectors_lookup_[idx] = vectors_allocator_.allocate(bytes_per_vector); + std::memcpy(vectors_lookup_[idx], vector_data, bytes_per_vector); + } else + vectors_lookup_[idx] = (byte_t*)vector_data; + } + + inline size_t node_size_bytes(std::size_t idx) const noexcept { return get_node_at(idx).node_size_bytes(pre_); } + bool is_immutable() const noexcept { return bool(viewed_file_); } + + using lock_type = node_lock_t; + + bool reserve(std::size_t count) { + if (count < nodes_.size() && count < nodes_mutexes_.size()) + return true; + nodes_mutexes_t new_mutexes(count); + nodes_t new_nodes(count); + if (!new_mutexes || !new_nodes) + return false; + if (nodes_) + std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * nodes_.size()); + + nodes_mutexes_ = std::move(new_mutexes); + nodes_ = std::move(new_nodes); + // todo:: make sure to only reserve this if vectors are not stored externally + // will probably need to pass the fact as storage config parameter + vectors_lookup_.resize(count); + return true; + } + + void clear() noexcept { + if (!viewed_file_) { + if (!has_reset()) { + std::size_t n = nodes_.size(); + for (std::size_t i = 0; i != n; ++i) { + // we do not know which slots have been filled and which ones - no + // so we iterate over full reserved space + if (nodes_[i]) + node_free(i, nodes_[i]); + } + } else + tape_allocator_.deallocate(nullptr, 0); + + if (!has_reset()) { + std::size_t n = vectors_lookup_.size(); + for (std::size_t i = 0; i != n; ++i) { + if (vectors_lookup_[i]) + vectors_allocator_.deallocate(vectors_lookup_[i], matrix_cols_); + } + } else + tape_allocator_.deallocate(nullptr, 0); + } + std::fill(nodes_.begin(), nodes_.end(), node_t{}); + viewed_file_ = {}; + } + + void reset() noexcept { + nodes_mutexes_ = {}; + nodes_ = {}; + + vectors_lookup_.clear(); + vectors_lookup_.shrink_to_fit(); + viewed_file_ = {}; + } + + using span_bytes_t = span_gt; + + span_bytes_t node_malloc(level_t level) noexcept { + std::size_t node_size = node_t::node_size_bytes(pre_, level); + byte_t* data = (byte_t*)tape_allocator_.allocate(node_size); + return data ? span_bytes_t{data, node_size} : span_bytes_t{}; + } + void node_free(size_t slot, node_t node) { + tape_allocator_.deallocate(node.tape(), node.node_size_bytes(pre_)); + nodes_[slot] = node_t{}; + } + node_t node_make(key_at key, level_t level) noexcept { + span_bytes_t node_bytes = node_malloc(level); + if (!node_bytes) + return {}; + + std::memset(node_bytes.data(), 0, node_bytes.size()); + node_t node{(byte_t*)node_bytes.data()}; + node.key(key); + node.level(level); + return node; + } + + // node_t node_make_copy_(span_bytes_t old_bytes) noexcept { + // byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size()); + // if (!data) + // return {}; + // std::memcpy(data, old_bytes.data(), old_bytes.size()); + // return node_t{data}; + // } + + void node_store(size_t slot, node_t node) noexcept { nodes_[slot] = node; } + tape_allocator_at const& node_allocator() const noexcept { return tape_allocator_; } + // dummy lock just to satisfy the interface + constexpr inline lock_type node_lock(std::size_t slot) const noexcept { + while (nodes_mutexes_.atomic_set(slot)) + ; + return {nodes_mutexes_, slot}; + } + +#pragma region Storage Serialization and Deserialization + + /** + * @brief Saves serialized binary index vectors to a stream. + * @param[in] output Output stream to which vectors will be saved to according to this storage format. + * @param[in] metadata_buffer A buffer opaque to Storage, that will be serialized into output stream + * @param[in] config Configuration parameters for imports. + * @return Outcome descriptor explicitly convertible to boolean. + */ + template + serialization_result_t save_vectors_to_stream(output_callback_at& output, std::uint64_t vector_size_bytes, + std::uint64_t node_count, // + const vectors_metadata_at& metadata_buffer, + serialization_config_t config = {}) const { + + serialization_result_t result; + std::uint64_t matrix_rows = 0; + std::uint64_t matrix_cols = 0; + + // We may not want to put the vectors into the same file + if (!config.exclude_vectors) { + // Save the matrix size + if (!config.use_64_bit_dimensions) { + std::uint32_t dimensions[2]; + dimensions[0] = static_cast(node_count); + dimensions[1] = static_cast(vector_size_bytes); + if (!output(&dimensions, sizeof(dimensions))) + return result.failed("Failed to serialize into stream"); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + } else { + std::uint64_t dimensions[2]; + dimensions[0] = static_cast(node_count); + dimensions[1] = static_cast(vector_size_bytes); + if (!output(&dimensions, sizeof(dimensions))) + return result.failed("Failed to serialize into stream"); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + } + + // Dump the vectors one after another + for (std::uint64_t i = 0; i != matrix_rows; ++i) { + const byte_t* vector = get_vector_at(i); + if (!output(vector, matrix_cols)) + return result.failed("Failed to serialize into stream"); + } + } + + if (!output(&metadata_buffer, sizeof(metadata_buffer))) + return result.failed("Failed to read the index vector metadata"); + + return result; + } + + /** + * @brief Symmetric to `save_from_stream`, pulls data from a stream. + */ + template + serialization_result_t save_nodes_to_stream(output_callback_at& output, const index_serialized_header_t& header, + progress_at& progress = {}) const { + + serialization_result_t result; + + if (!output(&header, sizeof(header))) + return result.failed("Failed to serialize the header into stream"); + + // Progress status + std::size_t processed = 0; + std::size_t const total = 2 * header.size; + + // Export the number of levels per node + // That is both enough to estimate the overall memory consumption, + // and to be able to estimate the offsets of every entry in the file. + for (std::size_t i = 0; i != header.size; ++i) { + node_t node = get_node_at(i); + level_t level = node.level(); + if (!output(&level, sizeof(level))) + return result.failed("Failed to serialize into stream"); + if (!progress(++processed, total)) + return result.failed("Terminated by user"); + } + + // After that dump the nodes themselves + for (std::size_t i = 0; i != header.size; ++i) { + span_bytes_t node_bytes = get_node_at(i).node_bytes(pre_); + if (!output(node_bytes.data(), node_bytes.size())) + return result.failed("Failed to serialize into stream"); + if (!progress(++processed, total)) + return result.failed("Terminated by user"); + } + return result; + } + + /** + * @brief Parses the index from file to RAM. + * @param[in] input Input stream from which vectors will be loaded according to this storage format. + * @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be + * loaded from input stream + * @param[in] config Configuration parameters for imports. + * @return Outcome descriptor explicitly convertible to boolean. + */ + template + serialization_result_t load_vectors_from_stream(input_callback_at& input, // + vectors_metadata_at& metadata_buffer, + serialization_config_t config = {}) { + + reset(); + + // Infer the new index size + serialization_result_t result; + std::uint64_t matrix_rows = 0; + std::uint64_t matrix_cols = 0; + + // We may not want to load the vectors from the same file, or allow attaching them afterwards + if (!config.exclude_vectors) { + // Save the matrix size + if (!config.use_64_bit_dimensions) { + std::uint32_t dimensions[2]; + if (!input(&dimensions, sizeof(dimensions))) + return result.failed("Failed to read 32-bit dimensions of the matrix"); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + } else { + std::uint64_t dimensions[2]; + if (!input(&dimensions, sizeof(dimensions))) + return result.failed("Failed to read 64-bit dimensions of the matrix"); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + } + // Load the vectors one after another + // most of this logic should move within storage class + reserve(matrix_rows); + for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) { + byte_t* vector = vectors_allocator_.allocate(matrix_cols); + if (!input(vector, matrix_cols)) + return result.failed("Failed to read vectors"); + vectors_lookup_[slot] = vector; + } + vectors_loaded_ = true; + } + matrix_rows_ = matrix_rows; + matrix_cols_ = matrix_cols; + + if (!input(metadata_buffer, sizeof(metadata_buffer))) + return result.failed("Failed to read the index vector metadata"); + + return result; + } + + /** + * @brief Symmetric to `save_from_stream`, pulls data from a stream. + */ + template + serialization_result_t load_nodes_from_stream(input_callback_at& input, index_serialized_header_t& header, + progress_at& progress = {}) noexcept { + + serialization_result_t result; + + // Pull basic metadata directly into the return paramter + if (!input(&header, sizeof(header))) + return result.failed("Failed to pull the header from the stream"); + + // We are loading an empty index, no more work to do + if (!header.size) { + reset(); + return result; + } + + // Allocate some dynamic memory to read all the levels + buffer_gt levels(header.size); + if (!levels) + return result.failed("Out of memory"); + if (!input(levels, header.size * sizeof(level_t))) + return result.failed("Failed to pull nodes levels from the stream"); + + if (!reserve(header.size)) { + reset(); + return result.failed("Out of memory"); + } + + // Load the nodes + for (std::size_t i = 0; i != header.size; ++i) { + span_bytes_t node_bytes = node_malloc(levels[i]); + if (!input(node_bytes.data(), node_bytes.size())) { + reset(); + return result.failed("Failed to pull nodes from the stream"); + } + node_store(i, node_t{node_bytes.data()}); + + if (!progress(i + 1, header.size)) + return result.failed("Terminated by user"); + } + + if (vectors_loaded_ && header.size != static_cast(matrix_rows_)) + return result.failed("Index size and the number of vectors doesn't match"); + return {}; + } + + /** + * @brief Parses the index from file to RAM. + * @param[in] file Memory mapped file from which vectors will be viewed according to this storage format. + * @param[out] metadata_buffer A buffer opaque to Storage, into which previously stored metadata will be + * loaded from input stream + * @param[in] config Configuration parameters for imports. + * @return Outcome descriptor explicitly convertible to boolean. + */ + template + serialization_result_t view_vectors_from_file( + memory_mapped_file_t& file, // + //// todo!! document that offset is a reference, or better - do not do it this way + vectors_metadata_at& metadata_buffer, std::size_t& offset, serialization_config_t config = {}) { + + reset(); + + serialization_result_t result = file.open_if_not(); + if (!result) + return result; + + // Infer the new index size + std::uint64_t matrix_rows = 0; + std::uint64_t matrix_cols = 0; + span_punned_t vectors_buffer; + + // We may not want to fetch the vectors from the same file, or allow attaching them afterwards + if (!config.exclude_vectors) { + // Save the matrix size + if (!config.use_64_bit_dimensions) { + std::uint32_t dimensions[2]; + if (file.size() - offset < sizeof(dimensions)) + return result.failed("File is corrupted and lacks matrix dimensions"); + std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + offset += sizeof(dimensions); + } else { + std::uint64_t dimensions[2]; + if (file.size() - offset < sizeof(dimensions)) + return result.failed("File is corrupted and lacks matrix dimensions"); + std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); + matrix_rows = dimensions[0]; + matrix_cols = dimensions[1]; + offset += sizeof(dimensions); + } + vectors_buffer = {file.data() + offset, static_cast(matrix_rows * matrix_cols)}; + offset += vectors_buffer.size(); + vectors_loaded_ = true; + } + matrix_rows_ = matrix_rows; + matrix_cols_ = matrix_cols; + // q:: how does this work when vectors are excluded? + // Address the vectors + reserve(matrix_rows); + if (!config.exclude_vectors) + for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) + set_vector_at(slot, vectors_buffer.data() + matrix_cols * slot, matrix_cols, // + false, false); + + if (file.size() - offset < sizeof(metadata_buffer)) + return result.failed("File is corrupted and lacks a header"); + + std::memcpy(metadata_buffer, file.data() + offset, sizeof(metadata_buffer)); + offset += sizeof(metadata_buffer); + + return result; + } + + /** + * @brief Symmetric to `save_from_stream`, pulls data from a stream. + */ + template + serialization_result_t view_nodes_from_file(memory_mapped_file_t file, index_serialized_header_t& header, + std::size_t offset = 0, progress_at& progress = {}) noexcept { + + serialization_result_t result = file.open_if_not(); + if (!result) + return result; + + // Pull basic metadata + if (file.size() - offset < sizeof(header)) + return result.failed("File is corrupted and lacks a header"); + std::memcpy(&header, file.data() + offset, sizeof(header)); + + if (!header.size) { + reset(); + return result; + } + + // update config_ and pre_ for correct node_t size calculations below + index_config_t config; + config.connectivity = header.connectivity; + config.connectivity_base = header.connectivity_base; + pre_ = node_t::precompute_(config); + + buffer_gt offsets(header.size); + + if (!offsets) + return result.failed("Out of memory"); + + // before mapping levels[] from file, let's make sure the file is large enough + if (file.size() - offset - sizeof(header) - header.size * sizeof(level_t) < 0) + return result.failed("File is corrupted. Unable to parse node levels from file"); + + misaligned_ptr_gt levels{(byte_t*)file.data() + offset + sizeof(header)}; + offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size; + + for (std::size_t i = 1; i < header.size; ++i) + offsets[i] = offsets[i - 1] + node_t::node_size_bytes(pre_, levels[i - 1]); + + std::size_t total_bytes = offsets[header.size - 1] + node_t::node_size_bytes(pre_, levels[header.size - 1]); + if (file.size() < total_bytes) { + reset(); + return result.failed("File is corrupted and can't fit all the nodes"); + } + + if (!reserve(header.size)) { + reset(); + return result.failed("Out of memory"); + } + + // Rapidly address all the nodes + for (std::size_t i = 0; i != header.size; ++i) { + node_store(i, node_t{(byte_t*)file.data() + offsets[i]}); + if (!progress(i + 1, header.size)) + return result.failed("Terminated by user"); + } + viewed_file_ = std::move(file); + + if (vectors_loaded_ && header.size != static_cast(matrix_rows_)) + return result.failed("Index size and the number of vectors doesn't match"); + + return {}; + } + +#pragma endregion +}; + +using default_storage_v2_t = storage_v2_at; + +ASSERT_VALID_STORAGE(default_storage_v2_t); + +} // namespace usearch +} // namespace unum diff --git a/simsimd b/simsimd index f8ff727d..814ae410 160000 --- a/simsimd +++ b/simsimd @@ -1 +1 @@ -Subproject commit f8ff727dcddcd142e7e8dece33c43801af96c210 +Subproject commit 814ae4107e91f1794a7abf045301b2859d42706e