1
+ #! /usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Export OpenMemory data from a running Docker container without relying on API endpoints.
5
+ # Produces: memories.json + memories.jsonl.gz zipped as memories_export_<USER_ID>.zip
6
+ #
7
+ # Requirements:
8
+ # - docker available locally
9
+ # - The target container has Python + SQLAlchemy and access to the same DATABASE_URL it uses in prod
10
+ #
11
+ # Usage:
12
+ # ./export_openmemory.sh --user-id <USER_ID> [--container <NAME_OR_ID>] [--app-id <UUID>] [--from-date <epoch_secs>] [--to-date <epoch_secs>]
13
+ #
14
+ # Notes:
15
+ # - USER_ID is the external user identifier (e.g., "vikramiyer"), not the internal UUID.
16
+ # - If --container is omitted, the script uses container name "openmemory-openmemory-mcp-1".
17
+ # - The script writes intermediate files to /tmp inside the container, then docker cp's them out and zips locally.
18
+
19
+ usage () {
20
+ echo " Usage: $0 --user-id <USER_ID> [--container <NAME_OR_ID>] [--app-id <UUID>] [--from-date <epoch_secs>] [--to-date <epoch_secs>]"
21
+ exit 1
22
+ }
23
+
24
+ USER_ID=" "
25
+ CONTAINER=" "
26
+ APP_ID=" "
27
+ FROM_DATE=" "
28
+ TO_DATE=" "
29
+
30
+ while [[ $# -gt 0 ]]; do
31
+ case " $1 " in
32
+ --user-id) USER_ID=" ${2:- } " ; shift 2 ;;
33
+ --container) CONTAINER=" ${2:- } " ; shift 2 ;;
34
+ --app-id) APP_ID=" ${2:- } " ; shift 2 ;;
35
+ --from-date) FROM_DATE=" ${2:- } " ; shift 2 ;;
36
+ --to-date) TO_DATE=" ${2:- } " ; shift 2 ;;
37
+ -h|--help) usage ;;
38
+ * ) echo " Unknown arg: $1 " ; usage ;;
39
+ esac
40
+ done
41
+
42
+ if [[ -z " ${USER_ID} " ]]; then
43
+ echo " ERROR: --user-id is required"
44
+ usage
45
+ fi
46
+
47
+ if [[ -z " ${CONTAINER} " ]]; then
48
+ CONTAINER=" openmemory-openmemory-mcp-1"
49
+ fi
50
+
51
+ # Verify the container exists and is running
52
+ if ! docker ps --format ' {{.Names}}' | grep -qx " ${CONTAINER} " ; then
53
+ echo " ERROR: Container '${CONTAINER} ' not found/running. Pass --container <NAME_OR_ID> if different."
54
+ exit 1
55
+ fi
56
+
57
+ # Verify python is available inside the container
58
+ if ! docker exec " ${CONTAINER} " sh -lc ' command -v python3 >/dev/null 2>&1 || command -v python >/dev/null 2>&1' ; then
59
+ echo " ERROR: Python is not available in container ${CONTAINER} "
60
+ exit 1
61
+ fi
62
+
63
+ PY_BIN=" python3"
64
+ if ! docker exec " ${CONTAINER} " sh -lc ' command -v python3 >/dev/null 2>&1' ; then
65
+ PY_BIN=" python"
66
+ fi
67
+
68
+ echo " Using container: ${CONTAINER} "
69
+ echo " Exporting data for user_id: ${USER_ID} "
70
+
71
+ # Run Python inside the container to generate memories.json and memories.jsonl.gz in /tmp
72
+ set +e
73
+ cat << 'PYCODE ' | docker exec -i \
74
+ -e EXPORT_USER_ID="${USER_ID}" \
75
+ -e EXPORT_APP_ID="${APP_ID}" \
76
+ -e EXPORT_FROM_DATE="${FROM_DATE}" \
77
+ -e EXPORT_TO_DATE="${TO_DATE}" \
78
+ "${CONTAINER}" "${PY_BIN}" -
79
+ import os
80
+ import sys
81
+ import json
82
+ import gzip
83
+ import uuid
84
+ import datetime
85
+ from typing import Any, Dict, List
86
+
87
+ try:
88
+ from sqlalchemy import create_engine, text
89
+ except Exception as e:
90
+ print(f"ERROR: SQLAlchemy not available inside the container: {e}", file=sys.stderr)
91
+ sys.exit(3)
92
+
93
+ def _iso(dt):
94
+ if dt is None:
95
+ return None
96
+ try:
97
+ if isinstance(dt, str):
98
+ try:
99
+ dt_obj = datetime.datetime.fromisoformat(dt.replace("Z", "+00:00"))
100
+ except Exception:
101
+ return dt
102
+ else:
103
+ dt_obj = dt
104
+ if dt_obj.tzinfo is None:
105
+ dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc)
106
+ else:
107
+ dt_obj = dt_obj.astimezone(datetime.timezone.utc)
108
+ return dt_obj.isoformat()
109
+ except Exception:
110
+ return None
111
+
112
+ def _json_load_maybe(val):
113
+ if isinstance(val, (dict, list)) or val is None:
114
+ return val
115
+ if isinstance(val, (bytes, bytearray)):
116
+ try:
117
+ return json.loads(val.decode("utf-8"))
118
+ except Exception:
119
+ try:
120
+ return val.decode("utf-8", "ignore")
121
+ except Exception:
122
+ return None
123
+ if isinstance(val, str):
124
+ try:
125
+ return json.loads(val)
126
+ except Exception:
127
+ return val
128
+ return val
129
+
130
+ def _named_in_clause(prefix: str, items: List[Any]):
131
+ names = [f":{prefix}{i}" for i in range(len(items))]
132
+ params = {f"{prefix}{i}": items[i] for i in range(len(items))}
133
+ return ", ".join(names), params
134
+
135
+ DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./openmemory.db")
136
+ user_id_str = os.getenv("EXPORT_USER_ID")
137
+ app_id_filter = os.getenv("EXPORT_APP_ID") or None
138
+ from_date = os.getenv("EXPORT_FROM_DATE")
139
+ to_date = os.getenv("EXPORT_TO_DATE")
140
+
141
+ if not user_id_str:
142
+ print("Missing EXPORT_USER_ID", file=sys.stderr)
143
+ sys.exit(2)
144
+
145
+ from_ts = None
146
+ to_ts = None
147
+ try:
148
+ if from_date:
149
+ from_ts = int(from_date)
150
+ if to_date:
151
+ to_ts = int(to_date)
152
+ except Exception:
153
+ pass
154
+
155
+ engine = create_engine(DATABASE_URL)
156
+
157
+ with engine.connect() as conn:
158
+ user_row = conn.execute(
159
+ text("SELECT id, user_id, name, email, metadata, created_at, updated_at FROM users WHERE user_id = :uid"),
160
+ {"uid": user_id_str}
161
+ ).mappings().first()
162
+ if not user_row:
163
+ print(f'User not found for user_id "{user_id_str}"', file=sys.stderr)
164
+ sys.exit(1)
165
+
166
+ user_uuid = user_row["id"]
167
+
168
+ # Build memories filter
169
+ params = {"user_id": user_uuid}
170
+ conditions = ["user_id = :user_id"]
171
+ if from_ts is not None:
172
+ params["from_dt"] = datetime.datetime.fromtimestamp(from_ts, tz=datetime.timezone.utc)
173
+ conditions.append("created_at >= :from_dt")
174
+ if to_ts is not None:
175
+ params["to_dt"] = datetime.datetime.fromtimestamp(to_ts, tz=datetime.timezone.utc)
176
+ conditions.append("created_at <= :to_dt")
177
+ if app_id_filter:
178
+ try:
179
+ # Accept UUID or raw DB value
180
+ app_uuid = uuid.UUID(app_id_filter)
181
+ params["app_id"] = str(app_uuid)
182
+ except Exception:
183
+ params["app_id"] = app_id_filter
184
+ conditions.append("app_id = :app_id")
185
+
186
+ mem_sql = f"""
187
+ SELECT id, user_id, app_id, content, metadata, state, created_at, updated_at, archived_at, deleted_at
188
+ FROM memories
189
+ WHERE {' AND '.join(conditions)}
190
+ """
191
+ mem_rows = list(conn.execute(text(mem_sql), params).mappings())
192
+ memory_ids = [r["id"] for r in mem_rows]
193
+ app_ids = sorted({r["app_id"] for r in mem_rows if r["app_id"] is not None})
194
+
195
+ # memory_categories
196
+ mc_rows = []
197
+ if memory_ids:
198
+ names, in_params = _named_in_clause("mid", memory_ids)
199
+ mc_rows = list(conn.execute(
200
+ text(f"SELECT memory_id, category_id FROM memory_categories WHERE memory_id IN ({names})"),
201
+ in_params
202
+ ).mappings())
203
+
204
+ # categories for referenced category_ids
205
+ cats = []
206
+ cat_ids = sorted({r["category_id"] for r in mc_rows})
207
+ if cat_ids:
208
+ names, in_params = _named_in_clause("cid", cat_ids)
209
+ cats = list(conn.execute(
210
+ text(f"SELECT id, name, description, created_at, updated_at FROM categories WHERE id IN ({names})"),
211
+ in_params
212
+ ).mappings())
213
+
214
+ # apps for referenced app_ids
215
+ apps = []
216
+ if app_ids:
217
+ names, in_params = _named_in_clause("aid", app_ids)
218
+ apps = list(conn.execute(
219
+ text(f"SELECT id, owner_id, name, description, metadata, is_active, created_at, updated_at FROM apps WHERE id IN ({names})"),
220
+ in_params
221
+ ).mappings())
222
+
223
+ # status history for selected memories
224
+ history = []
225
+ if memory_ids:
226
+ names, in_params = _named_in_clause("hid", memory_ids)
227
+ history = list(conn.execute(
228
+ text(f"SELECT id, memory_id, changed_by, old_state, new_state, changed_at FROM memory_status_history WHERE memory_id IN ({names})"),
229
+ in_params
230
+ ).mappings())
231
+
232
+ # access_controls for the apps
233
+ acls = []
234
+ if app_ids:
235
+ names, in_params = _named_in_clause("sid", app_ids)
236
+ acls = list(conn.execute(
237
+ text(f"""SELECT id, subject_type, subject_id, object_type, object_id, effect, created_at
238
+ FROM access_controls
239
+ WHERE subject_type = 'app' AND subject_id IN ({names})"""),
240
+ in_params
241
+ ).mappings())
242
+
243
+ # Build helper maps
244
+ app_name_by_id = {r["id"]: r["name"] for r in apps}
245
+ app_rec_by_id = {r["id"]: r for r in apps}
246
+ cat_name_by_id = {r["id"]: r["name"] for r in cats}
247
+ mem_cat_ids_map: Dict[Any, List[Any]] = {}
248
+ mem_cat_names_map: Dict[Any, List[str]] = {}
249
+ for r in mc_rows:
250
+ mem_cat_ids_map.setdefault(r["memory_id"], []).append(r["category_id"])
251
+ mem_cat_names_map.setdefault(r["memory_id"], []).append(cat_name_by_id.get(r["category_id"], ""))
252
+
253
+ # Build sqlite-like payload
254
+ sqlite_payload = {
255
+ "user": {
256
+ "id": str(user_row["id"]),
257
+ "user_id": user_row["user_id"],
258
+ "name": user_row.get("name"),
259
+ "email": user_row.get("email"),
260
+ "metadata": _json_load_maybe(user_row.get("metadata")),
261
+ "created_at": _iso(user_row.get("created_at")),
262
+ "updated_at": _iso(user_row.get("updated_at")),
263
+ },
264
+ "apps": [
265
+ {
266
+ "id": str(a["id"]),
267
+ "owner_id": str(a["owner_id"]) if a.get("owner_id") else None,
268
+ "name": a["name"],
269
+ "description": a.get("description"),
270
+ "metadata": _json_load_maybe(a.get("metadata")),
271
+ "is_active": bool(a.get("is_active")),
272
+ "created_at": _iso(a.get("created_at")),
273
+ "updated_at": _iso(a.get("updated_at")),
274
+ }
275
+ for a in apps
276
+ ],
277
+ "categories": [
278
+ {
279
+ "id": str(c["id"]),
280
+ "name": c["name"],
281
+ "description": c.get("description"),
282
+ "created_at": _iso(c.get("created_at")),
283
+ "updated_at": _iso(c.get("updated_at")),
284
+ }
285
+ for c in cats
286
+ ],
287
+ "memories": [
288
+ {
289
+ "id": str(m["id"]),
290
+ "user_id": str(m["user_id"]),
291
+ "app_id": str(m["app_id"]) if m.get("app_id") else None,
292
+ "content": m.get("content") or "",
293
+ "metadata": _json_load_maybe(m.get("metadata")) or {},
294
+ "state": m.get("state"),
295
+ "created_at": _iso(m.get("created_at")),
296
+ "updated_at": _iso(m.get("updated_at")),
297
+ "archived_at": _iso(m.get("archived_at")),
298
+ "deleted_at": _iso(m.get("deleted_at")),
299
+ "category_ids": [str(cid) for cid in mem_cat_ids_map.get(m["id"], [])],
300
+ }
301
+ for m in mem_rows
302
+ ],
303
+ "memory_categories": [
304
+ {"memory_id": str(r["memory_id"]), "category_id": str(r["category_id"])}
305
+ for r in mc_rows
306
+ ],
307
+ "status_history": [
308
+ {
309
+ "id": str(h["id"]),
310
+ "memory_id": str(h["memory_id"]),
311
+ "changed_by": str(h["changed_by"]),
312
+ "old_state": h.get("old_state"),
313
+ "new_state": h.get("new_state"),
314
+ "changed_at": _iso(h.get("changed_at")),
315
+ }
316
+ for h in history
317
+ ],
318
+ "access_controls": [
319
+ {
320
+ "id": str(ac["id"]),
321
+ "subject_type": ac.get("subject_type"),
322
+ "subject_id": str(ac["subject_id"]) if ac.get("subject_id") else None,
323
+ "object_type": ac.get("object_type"),
324
+ "object_id": str(ac["object_id"]) if ac.get("object_id") else None,
325
+ "effect": ac.get("effect"),
326
+ "created_at": _iso(ac.get("created_at")),
327
+ }
328
+ for ac in acls
329
+ ],
330
+ "export_meta": {
331
+ "app_id_filter": str(app_id_filter) if app_id_filter else None,
332
+ "from_date": from_ts,
333
+ "to_date": to_ts,
334
+ "version": "1",
335
+ "generated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
336
+ },
337
+ }
338
+
339
+ # Write memories.json
340
+ out_json = "/tmp/memories.json"
341
+ with open(out_json, "w", encoding="utf-8") as f:
342
+ json.dump(sqlite_payload, f, indent=2, ensure_ascii=False)
343
+
344
+ # Write logical jsonl.gz
345
+ out_jsonl_gz = "/tmp/memories.jsonl.gz"
346
+ with gzip.open(out_jsonl_gz, "wb") as gz:
347
+ for m in mem_rows:
348
+ record = {
349
+ "id": str(m["id"]),
350
+ "content": m.get("content") or "",
351
+ "metadata": _json_load_maybe(m.get("metadata")) or {},
352
+ "created_at": _iso(m.get("created_at")),
353
+ "updated_at": _iso(m.get("updated_at")),
354
+ "state": m.get("state"),
355
+ "app": app_name_by_id.get(m.get("app_id")) if m.get("app_id") else None,
356
+ "categories": [c for c in mem_cat_names_map.get(m["id"], []) if c],
357
+ }
358
+ gz.write((json.dumps(record, ensure_ascii=False) + "\n").encode("utf-8"))
359
+
360
+ print(out_json)
361
+ print(out_jsonl_gz)
362
+ PYCODE
363
+ PY_EXIT=$?
364
+ set -e
365
+ if [[ $PY_EXIT -ne 0 ]]; then
366
+ echo " ERROR: Export failed inside container (exit code $PY_EXIT )"
367
+ exit $PY_EXIT
368
+ fi
369
+
370
+ # Copy files out of the container
371
+ TMPDIR=" $( mktemp -d) "
372
+ docker cp " ${CONTAINER} :/tmp/memories.json" " ${TMPDIR} /memories.json"
373
+ docker cp " ${CONTAINER} :/tmp/memories.jsonl.gz" " ${TMPDIR} /memories.jsonl.gz"
374
+
375
+ # Create zip on host
376
+ ZIP_NAME=" memories_export_${USER_ID} .zip"
377
+ if command -v zip > /dev/null 2>&1 ; then
378
+ (cd " ${TMPDIR} " && zip -q -r " ../${ZIP_NAME} " " memories.json" " memories.jsonl.gz" )
379
+ mv " ${TMPDIR} /../${ZIP_NAME} " " ./${ZIP_NAME} "
380
+ else
381
+ # Fallback: use Python zipfile
382
+ python3 - << PYFALLBACK
383
+ import sys, zipfile
384
+ zf = zipfile.ZipFile("${ZIP_NAME} ", "w", compression=zipfile.ZIP_DEFLATED)
385
+ zf.write("${TMPDIR} /memories.json", arcname="memories.json")
386
+ zf.write("${TMPDIR} /memories.jsonl.gz", arcname="memories.jsonl.gz")
387
+ zf.close()
388
+ print("${ZIP_NAME} ")
389
+ PYFALLBACK
390
+ fi
391
+
392
+ echo " Wrote ./${ZIP_NAME} "
393
+ echo " Done."
0 commit comments