Skip to content
3,482 changes: 2,396 additions & 1,086 deletions Cargo.lock

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions docs/src/atomicserver/storage_backend.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
Added CLI Arguments (server/src/config.rs:95-116)

- --storage-backends: Comma-separated list of backends to enable (sled, dashmap, rocksdb,
redb, fs)
- --prefer-memory: Prioritize in-memory storage for better performance
- --rocksdb-path: Custom path for RocksDB storage
- --redb-path: Custom path for ReDB storage
- --fs-path: Custom path for filesystem storage

2. Extended Config Struct (server/src/config.rs:215-225)

Added fields to store the storage configuration in the Config struct.

3. Created StorageConfig Struct (lib/src/db.rs:86-111)

New configuration struct with:
- List of enabled backends
- Memory preference flag
- Custom paths for each backend type
- Default configuration (sled + dashmap)

4. Refactored Db::init (lib/src/db.rs:158-165)

- Added init_with_config method that accepts StorageConfig
- Original init method now calls init_with_config with defaults
- Dynamic backend initialization based on configuration
- Respects prefer_memory flag for fastest operator selection

5. Wired Configuration (server/src/appstate.rs:47-56)

Connected server config to database initialization with the new storage configuration.

Usage Examples:

# Use default backends (sled + dashmap)
atomic-server

# Enable specific backends
atomic-server --storage-backends sled,dashmap,rocksdb

# Prefer in-memory storage
atomic-server --prefer-memory

# Custom paths
atomic-server --rocksdb-path /custom/rocksdb --redb-path /custom/redb

# Environment variables
ATOMIC_STORAGE_BACKENDS=sled,dashmap,fs atomic-server
ATOMIC_PREFER_MEMORY=true atomic-server
10 changes: 10 additions & 0 deletions lessons-learned.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Lessons learned from OpenDAL integration and terraphim_persistence pattern

- Consistency first: Switching only reads to OpenDAL while writes remain on Sled breaks read-after-write; dual-write or single-path is required for correctness.
- One abstraction boundary: Use OpenDAL as the single storage interface; let Sled/DashMap/RocksDB be OpenDAL services instead of directly coupling to them.
- Fastest-read via benchmarking: Measuring operator latency at startup and selecting the fastest improves read performance; still need write-all for durability.
- Tokio runtime scope: Avoid constructing runtimes deep inside libraries. Expose async APIs or use appropriate blocking adaptors.
- Migration strategy: Plan backfill and deletion symmetry. When introducing a new backend, provide tools/tests to migrate and keep stores in sync.
- Feature gating services: Keep backend choices behind features to reduce dependency surface and compile times.
- Key normalization: Stable, normalized keys (e.g., `document_<id>.json`) avoid cross-backend issues.
- Testing breadth: Memory-only configs are invaluable for CI; integration tests for each optional backend help catch configurational drift.
6 changes: 6 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ directories = {version = ">= 2, < 5", optional = true}
html2md = {version = "0.2.14", optional = true}
kuchiki = {version = "0.8.1", optional = true}
lol_html = {version = "0.3.1", optional = true}
opendal = { version = "0.39", features = ["services-sled", "services-dashmap", "services-rocksdb", "services-redb", "services-fs"] }
rand = {version = "0.8"}
regex = "1"
ring = "0.16.19"
Expand All @@ -30,6 +31,8 @@ rio_turtle = {version = "0.8", optional = true}
serde = {version = "1", features = ["derive"]}
serde_json = "1"
sled = {version = "0.34", optional = true, features = ["no_logs"]}
tokio = { version = "1.29.1", features = ["rt-multi-thread"] }
uuid = { version = "1", features = ["v4"] }
toml = {version = "0.7", optional = true}
tracing = "0.1"
ureq = "2"
Expand All @@ -47,3 +50,6 @@ config = ["directories", "toml"]
db = ["sled", "bincode"]
html = ["kuchiki", "lol_html", "html2md"]
rdf = ["rio_api", "rio_turtle"]
persist-rocksdb = ["opendal/services-rocksdb"]
persist-redb = ["opendal/services-redb"]
persist-fs = ["opendal/services-fs"]
62 changes: 46 additions & 16 deletions lib/benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

use atomic_lib::utils::random_string;
use atomic_lib::*;
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::time::Duration;

fn random_atom() -> Atom {
Atom::new(
Expand All @@ -21,17 +22,28 @@ fn random_resource(atom: &Atom) -> Resource {
}

fn criterion_benchmark(c: &mut Criterion) {
let store = Db::init_temp("bench").unwrap();
let mut g = c.benchmark_group("persistable");
g.sample_size(20);
g.measurement_time(Duration::from_secs(10));

c.bench_function("add_atom_to_index", |b| {
// Ensure db feature is available for benches
#[cfg(not(feature = "db"))]
{
panic!("benchmarks require 'db' feature enabled");
}
#[cfg(feature = "db")]
// Use a unique temp dir for benches and avoid optional backends unless configured
let store = Db::init_temp("bench_persistable").unwrap();

g.bench_function("add_atom_to_index", |b| {
b.iter(|| {
let atom = random_atom();
let resource = random_resource(&random_atom());
store.add_atom_to_index(&atom, &resource).unwrap();
})
});

c.bench_function("add_resource", |b| {
g.bench_function("add_resource", |b| {
b.iter(|| {
let resource = random_resource(&random_atom());
store
Expand All @@ -40,46 +52,64 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("resource.save()", |b| {
g.bench_function("resource.save()", |b| {
b.iter(|| {
let mut resource = random_resource(&random_atom());
resource.save(&store).unwrap();
})
});

#[cfg(feature = "db")]
let big_resource = store
.get_resource_extended("https://localhost/collections", false, None)
.get_resource_extended(
"https://localhost/collections",
false,
&atomic_lib::agents::ForAgent::Sudo,
)
.unwrap();

c.bench_function("resource.to_json_ad()", |b| {
g.bench_function("resource.to_json_ad()", |b| {
b.iter(|| {
big_resource.to_json_ad().unwrap();
})
});

c.bench_function("resource.to_json_ld()", |b| {
g.bench_function("resource.to_json_ld()", |b| {
b.iter(|| {
big_resource.to_json_ld(&store).unwrap();
})
});

c.bench_function("resource.to_json()", |b| {
g.bench_function("resource.to_json()", |b| {
b.iter(|| {
big_resource.to_json(&store).unwrap();
})
});

c.bench_function("resource.to_n_triples()", |b| {
b.iter(|| {
big_resource.to_n_triples(&store).unwrap();
})
});
// Skip to_n_triples in this bench configuration

c.bench_function("all_resources()", |b| {
g.bench_function("all_resources()", |b| {
b.iter(|| {
let _all = store.all_resources(false).collect::<Vec<Resource>>();
let _all = black_box(store.all_resources(false).collect::<Vec<Resource>>());
})
});

// Persistable operator benchmarks: write/read single blob via each configured operator
for name in store.persistence_profiles().into_iter() {
let key = format!("bench_{}", name);
let data = vec![0u8; 16 * 1024];
g.bench_function(&format!("op_write_{}", name), |b| {
b.iter(|| {
let _ = store.bench_write(&name, &key, &data);
})
});
g.bench_function(&format!("op_read_{}", name), |b| {
b.iter(|| {
let _ = store.bench_read(&name, &key);
})
});
}
g.finish();
}

criterion_group!(benches, criterion_benchmark);
Expand Down
Loading
Loading