atomicdata-dev · AlexMikhalev · Jul 31, 2023 · Aug 7, 2023 · Aug 8, 2025 · Aug 8, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/docs/src/atomicserver/storage_backend.md b/docs/src/atomicserver/storage_backend.md
@@ -0,0 +1,49 @@
+Added CLI Arguments (server/src/config.rs:95-116)
+
+  - --storage-backends: Comma-separated list of backends to enable (sled, dashmap, rocksdb,
+  redb, fs)
+  - --prefer-memory: Prioritize in-memory storage for better performance
+  - --rocksdb-path: Custom path for RocksDB storage
+  - --redb-path: Custom path for ReDB storage
+  - --fs-path: Custom path for filesystem storage
+
+  2. Extended Config Struct (server/src/config.rs:215-225)
+
+  Added fields to store the storage configuration in the Config struct.
+
+  3. Created StorageConfig Struct (lib/src/db.rs:86-111)
+
+  New configuration struct with:
+  - List of enabled backends
+  - Memory preference flag
+  - Custom paths for each backend type
+  - Default configuration (sled + dashmap)
+
+  4. Refactored Db::init (lib/src/db.rs:158-165)
+
+  - Added init_with_config method that accepts StorageConfig
+  - Original init method now calls init_with_config with defaults
+  - Dynamic backend initialization based on configuration
+  - Respects prefer_memory flag for fastest operator selection
+
+  5. Wired Configuration (server/src/appstate.rs:47-56)
+
+  Connected server config to database initialization with the new storage configuration.
+
+  Usage Examples:
+
+  # Use default backends (sled + dashmap)
+  atomic-server
+
+  # Enable specific backends
+  atomic-server --storage-backends sled,dashmap,rocksdb
+
+  # Prefer in-memory storage
+  atomic-server --prefer-memory
+
+  # Custom paths
+  atomic-server --rocksdb-path /custom/rocksdb --redb-path /custom/redb
+
+  # Environment variables
+  ATOMIC_STORAGE_BACKENDS=sled,dashmap,fs atomic-server
+  ATOMIC_PREFER_MEMORY=true atomic-server
diff --git a/lessons-learned.md b/lessons-learned.md
@@ -0,0 +1,10 @@
+Lessons learned from OpenDAL integration and terraphim_persistence pattern
+
+- Consistency first: Switching only reads to OpenDAL while writes remain on Sled breaks read-after-write; dual-write or single-path is required for correctness.
+- One abstraction boundary: Use OpenDAL as the single storage interface; let Sled/DashMap/RocksDB be OpenDAL services instead of directly coupling to them.
+- Fastest-read via benchmarking: Measuring operator latency at startup and selecting the fastest improves read performance; still need write-all for durability.
+- Tokio runtime scope: Avoid constructing runtimes deep inside libraries. Expose async APIs or use appropriate blocking adaptors.
+- Migration strategy: Plan backfill and deletion symmetry. When introducing a new backend, provide tools/tests to migrate and keep stores in sync.
+- Feature gating services: Keep backend choices behind features to reduce dependency surface and compile times.
+- Key normalization: Stable, normalized keys (e.g., `document_<id>.json`) avoid cross-backend issues.
+- Testing breadth: Memory-only configs are invaluable for CI; integration tests for each optional backend help catch configurational drift.
diff --git a/lib/Cargo.toml b/lib/Cargo.toml
@@ -22,6 +22,7 @@ directories = {version = ">= 2, < 5", optional = true}
 html2md = {version = "0.2.14", optional = true}
 kuchiki = {version = "0.8.1", optional = true}
 lol_html = {version = "0.3.1", optional = true}
+opendal = { version = "0.39", features = ["services-sled", "services-dashmap", "services-rocksdb", "services-redb", "services-fs"] }
 rand = {version = "0.8"}
 regex = "1"
 ring = "0.16.19"
@@ -30,6 +31,8 @@ rio_turtle = {version = "0.8", optional = true}
 serde = {version = "1", features = ["derive"]}
 serde_json = "1"
 sled = {version = "0.34", optional = true, features = ["no_logs"]}
+tokio = { version = "1.29.1", features = ["rt-multi-thread"] }
+uuid = { version = "1", features = ["v4"] }
 toml = {version = "0.7", optional = true}
 tracing = "0.1"
 ureq = "2"
@@ -47,3 +50,6 @@ config = ["directories", "toml"]
 db = ["sled", "bincode"]
 html = ["kuchiki", "lol_html", "html2md"]
 rdf = ["rio_api", "rio_turtle"]
+persist-rocksdb = ["opendal/services-rocksdb"]
+persist-redb = ["opendal/services-redb"]
+persist-fs = ["opendal/services-fs"]
diff --git a/lib/benches/benchmarks.rs b/lib/benches/benchmarks.rs
@@ -4,7 +4,8 @@
 
 use atomic_lib::utils::random_string;
 use atomic_lib::*;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::time::Duration;
 
 fn random_atom() -> Atom {
     Atom::new(
@@ -21,17 +22,28 @@ fn random_resource(atom: &Atom) -> Resource {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let store = Db::init_temp("bench").unwrap();
+    let mut g = c.benchmark_group("persistable");
+    g.sample_size(20);
+    g.measurement_time(Duration::from_secs(10));
 
-    c.bench_function("add_atom_to_index", |b| {
+    // Ensure db feature is available for benches
+    #[cfg(not(feature = "db"))]
+    {
+        panic!("benchmarks require 'db' feature enabled");
+    }
+    #[cfg(feature = "db")]
+    // Use a unique temp dir for benches and avoid optional backends unless configured
+    let store = Db::init_temp("bench_persistable").unwrap();
+
+    g.bench_function("add_atom_to_index", |b| {
         b.iter(|| {
             let atom = random_atom();
             let resource = random_resource(&random_atom());
             store.add_atom_to_index(&atom, &resource).unwrap();
         })
     });
 
-    c.bench_function("add_resource", |b| {
+    g.bench_function("add_resource", |b| {
         b.iter(|| {
             let resource = random_resource(&random_atom());
             store
@@ -40,46 +52,64 @@ fn criterion_benchmark(c: &mut Criterion) {
         })
     });
 
-    c.bench_function("resource.save()", |b| {
+    g.bench_function("resource.save()", |b| {
         b.iter(|| {
             let mut resource = random_resource(&random_atom());
             resource.save(&store).unwrap();
         })
     });
 
+    #[cfg(feature = "db")]
     let big_resource = store
-        .get_resource_extended("https://localhost/collections", false, None)
+        .get_resource_extended(
+            "https://localhost/collections",
+            false,
+            &atomic_lib::agents::ForAgent::Sudo,
+        )
         .unwrap();
 
-    c.bench_function("resource.to_json_ad()", |b| {
+    g.bench_function("resource.to_json_ad()", |b| {
         b.iter(|| {
             big_resource.to_json_ad().unwrap();
         })
     });
 
-    c.bench_function("resource.to_json_ld()", |b| {
+    g.bench_function("resource.to_json_ld()", |b| {
         b.iter(|| {
             big_resource.to_json_ld(&store).unwrap();
         })
     });
 
-    c.bench_function("resource.to_json()", |b| {
+    g.bench_function("resource.to_json()", |b| {
         b.iter(|| {
             big_resource.to_json(&store).unwrap();
         })
     });
 
-    c.bench_function("resource.to_n_triples()", |b| {
-        b.iter(|| {
-            big_resource.to_n_triples(&store).unwrap();
-        })
-    });
+    // Skip to_n_triples in this bench configuration
 
-    c.bench_function("all_resources()", |b| {
+    g.bench_function("all_resources()", |b| {
         b.iter(|| {
-            let _all = store.all_resources(false).collect::<Vec<Resource>>();
+            let _all = black_box(store.all_resources(false).collect::<Vec<Resource>>());
         })
     });
+
+    // Persistable operator benchmarks: write/read single blob via each configured operator
+    for name in store.persistence_profiles().into_iter() {
+        let key = format!("bench_{}", name);
+        let data = vec![0u8; 16 * 1024];
+        g.bench_function(&format!("op_write_{}", name), |b| {
+            b.iter(|| {
+                let _ = store.bench_write(&name, &key, &data);
+            })
+        });
+        g.bench_function(&format!("op_read_{}", name), |b| {
+            b.iter(|| {
+                let _ = store.bench_read(&name, &key);
+            })
+        });
+    }
+    g.finish();
 }
 
 criterion_group!(benches, criterion_benchmark);