Skip to content

Commit ffe190e

Browse files
VikramIyer125Vikram Iyer
authored andcommitted
Add export_openmemory.sh migration script (mem0ai#3352)
Co-authored-by: Vikram Iyer <[email protected]>
1 parent 9826215 commit ffe190e

File tree

1 file changed

+393
-0
lines changed

1 file changed

+393
-0
lines changed
Lines changed: 393 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,393 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Export OpenMemory data from a running Docker container without relying on API endpoints.
5+
# Produces: memories.json + memories.jsonl.gz zipped as memories_export_<USER_ID>.zip
6+
#
7+
# Requirements:
8+
# - docker available locally
9+
# - The target container has Python + SQLAlchemy and access to the same DATABASE_URL it uses in prod
10+
#
11+
# Usage:
12+
# ./export_openmemory.sh --user-id <USER_ID> [--container <NAME_OR_ID>] [--app-id <UUID>] [--from-date <epoch_secs>] [--to-date <epoch_secs>]
13+
#
14+
# Notes:
15+
# - USER_ID is the external user identifier (e.g., "vikramiyer"), not the internal UUID.
16+
# - If --container is omitted, the script uses container name "openmemory-openmemory-mcp-1".
17+
# - The script writes intermediate files to /tmp inside the container, then docker cp's them out and zips locally.
18+
19+
usage() {
20+
echo "Usage: $0 --user-id <USER_ID> [--container <NAME_OR_ID>] [--app-id <UUID>] [--from-date <epoch_secs>] [--to-date <epoch_secs>]"
21+
exit 1
22+
}
23+
24+
USER_ID=""
25+
CONTAINER=""
26+
APP_ID=""
27+
FROM_DATE=""
28+
TO_DATE=""
29+
30+
while [[ $# -gt 0 ]]; do
31+
case "$1" in
32+
--user-id) USER_ID="${2:-}"; shift 2 ;;
33+
--container) CONTAINER="${2:-}"; shift 2 ;;
34+
--app-id) APP_ID="${2:-}"; shift 2 ;;
35+
--from-date) FROM_DATE="${2:-}"; shift 2 ;;
36+
--to-date) TO_DATE="${2:-}"; shift 2 ;;
37+
-h|--help) usage ;;
38+
*) echo "Unknown arg: $1"; usage ;;
39+
esac
40+
done
41+
42+
if [[ -z "${USER_ID}" ]]; then
43+
echo "ERROR: --user-id is required"
44+
usage
45+
fi
46+
47+
if [[ -z "${CONTAINER}" ]]; then
48+
CONTAINER="openmemory-openmemory-mcp-1"
49+
fi
50+
51+
# Verify the container exists and is running
52+
if ! docker ps --format '{{.Names}}' | grep -qx "${CONTAINER}"; then
53+
echo "ERROR: Container '${CONTAINER}' not found/running. Pass --container <NAME_OR_ID> if different."
54+
exit 1
55+
fi
56+
57+
# Verify python is available inside the container
58+
if ! docker exec "${CONTAINER}" sh -lc 'command -v python3 >/dev/null 2>&1 || command -v python >/dev/null 2>&1'; then
59+
echo "ERROR: Python is not available in container ${CONTAINER}"
60+
exit 1
61+
fi
62+
63+
PY_BIN="python3"
64+
if ! docker exec "${CONTAINER}" sh -lc 'command -v python3 >/dev/null 2>&1'; then
65+
PY_BIN="python"
66+
fi
67+
68+
echo "Using container: ${CONTAINER}"
69+
echo "Exporting data for user_id: ${USER_ID}"
70+
71+
# Run Python inside the container to generate memories.json and memories.jsonl.gz in /tmp
72+
set +e
73+
cat <<'PYCODE' | docker exec -i \
74+
-e EXPORT_USER_ID="${USER_ID}" \
75+
-e EXPORT_APP_ID="${APP_ID}" \
76+
-e EXPORT_FROM_DATE="${FROM_DATE}" \
77+
-e EXPORT_TO_DATE="${TO_DATE}" \
78+
"${CONTAINER}" "${PY_BIN}" -
79+
import os
80+
import sys
81+
import json
82+
import gzip
83+
import uuid
84+
import datetime
85+
from typing import Any, Dict, List
86+
87+
try:
88+
from sqlalchemy import create_engine, text
89+
except Exception as e:
90+
print(f"ERROR: SQLAlchemy not available inside the container: {e}", file=sys.stderr)
91+
sys.exit(3)
92+
93+
def _iso(dt):
94+
if dt is None:
95+
return None
96+
try:
97+
if isinstance(dt, str):
98+
try:
99+
dt_obj = datetime.datetime.fromisoformat(dt.replace("Z", "+00:00"))
100+
except Exception:
101+
return dt
102+
else:
103+
dt_obj = dt
104+
if dt_obj.tzinfo is None:
105+
dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc)
106+
else:
107+
dt_obj = dt_obj.astimezone(datetime.timezone.utc)
108+
return dt_obj.isoformat()
109+
except Exception:
110+
return None
111+
112+
def _json_load_maybe(val):
113+
if isinstance(val, (dict, list)) or val is None:
114+
return val
115+
if isinstance(val, (bytes, bytearray)):
116+
try:
117+
return json.loads(val.decode("utf-8"))
118+
except Exception:
119+
try:
120+
return val.decode("utf-8", "ignore")
121+
except Exception:
122+
return None
123+
if isinstance(val, str):
124+
try:
125+
return json.loads(val)
126+
except Exception:
127+
return val
128+
return val
129+
130+
def _named_in_clause(prefix: str, items: List[Any]):
131+
names = [f":{prefix}{i}" for i in range(len(items))]
132+
params = {f"{prefix}{i}": items[i] for i in range(len(items))}
133+
return ", ".join(names), params
134+
135+
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./openmemory.db")
136+
user_id_str = os.getenv("EXPORT_USER_ID")
137+
app_id_filter = os.getenv("EXPORT_APP_ID") or None
138+
from_date = os.getenv("EXPORT_FROM_DATE")
139+
to_date = os.getenv("EXPORT_TO_DATE")
140+
141+
if not user_id_str:
142+
print("Missing EXPORT_USER_ID", file=sys.stderr)
143+
sys.exit(2)
144+
145+
from_ts = None
146+
to_ts = None
147+
try:
148+
if from_date:
149+
from_ts = int(from_date)
150+
if to_date:
151+
to_ts = int(to_date)
152+
except Exception:
153+
pass
154+
155+
engine = create_engine(DATABASE_URL)
156+
157+
with engine.connect() as conn:
158+
user_row = conn.execute(
159+
text("SELECT id, user_id, name, email, metadata, created_at, updated_at FROM users WHERE user_id = :uid"),
160+
{"uid": user_id_str}
161+
).mappings().first()
162+
if not user_row:
163+
print(f'User not found for user_id "{user_id_str}"', file=sys.stderr)
164+
sys.exit(1)
165+
166+
user_uuid = user_row["id"]
167+
168+
# Build memories filter
169+
params = {"user_id": user_uuid}
170+
conditions = ["user_id = :user_id"]
171+
if from_ts is not None:
172+
params["from_dt"] = datetime.datetime.fromtimestamp(from_ts, tz=datetime.timezone.utc)
173+
conditions.append("created_at >= :from_dt")
174+
if to_ts is not None:
175+
params["to_dt"] = datetime.datetime.fromtimestamp(to_ts, tz=datetime.timezone.utc)
176+
conditions.append("created_at <= :to_dt")
177+
if app_id_filter:
178+
try:
179+
# Accept UUID or raw DB value
180+
app_uuid = uuid.UUID(app_id_filter)
181+
params["app_id"] = str(app_uuid)
182+
except Exception:
183+
params["app_id"] = app_id_filter
184+
conditions.append("app_id = :app_id")
185+
186+
mem_sql = f"""
187+
SELECT id, user_id, app_id, content, metadata, state, created_at, updated_at, archived_at, deleted_at
188+
FROM memories
189+
WHERE {' AND '.join(conditions)}
190+
"""
191+
mem_rows = list(conn.execute(text(mem_sql), params).mappings())
192+
memory_ids = [r["id"] for r in mem_rows]
193+
app_ids = sorted({r["app_id"] for r in mem_rows if r["app_id"] is not None})
194+
195+
# memory_categories
196+
mc_rows = []
197+
if memory_ids:
198+
names, in_params = _named_in_clause("mid", memory_ids)
199+
mc_rows = list(conn.execute(
200+
text(f"SELECT memory_id, category_id FROM memory_categories WHERE memory_id IN ({names})"),
201+
in_params
202+
).mappings())
203+
204+
# categories for referenced category_ids
205+
cats = []
206+
cat_ids = sorted({r["category_id"] for r in mc_rows})
207+
if cat_ids:
208+
names, in_params = _named_in_clause("cid", cat_ids)
209+
cats = list(conn.execute(
210+
text(f"SELECT id, name, description, created_at, updated_at FROM categories WHERE id IN ({names})"),
211+
in_params
212+
).mappings())
213+
214+
# apps for referenced app_ids
215+
apps = []
216+
if app_ids:
217+
names, in_params = _named_in_clause("aid", app_ids)
218+
apps = list(conn.execute(
219+
text(f"SELECT id, owner_id, name, description, metadata, is_active, created_at, updated_at FROM apps WHERE id IN ({names})"),
220+
in_params
221+
).mappings())
222+
223+
# status history for selected memories
224+
history = []
225+
if memory_ids:
226+
names, in_params = _named_in_clause("hid", memory_ids)
227+
history = list(conn.execute(
228+
text(f"SELECT id, memory_id, changed_by, old_state, new_state, changed_at FROM memory_status_history WHERE memory_id IN ({names})"),
229+
in_params
230+
).mappings())
231+
232+
# access_controls for the apps
233+
acls = []
234+
if app_ids:
235+
names, in_params = _named_in_clause("sid", app_ids)
236+
acls = list(conn.execute(
237+
text(f"""SELECT id, subject_type, subject_id, object_type, object_id, effect, created_at
238+
FROM access_controls
239+
WHERE subject_type = 'app' AND subject_id IN ({names})"""),
240+
in_params
241+
).mappings())
242+
243+
# Build helper maps
244+
app_name_by_id = {r["id"]: r["name"] for r in apps}
245+
app_rec_by_id = {r["id"]: r for r in apps}
246+
cat_name_by_id = {r["id"]: r["name"] for r in cats}
247+
mem_cat_ids_map: Dict[Any, List[Any]] = {}
248+
mem_cat_names_map: Dict[Any, List[str]] = {}
249+
for r in mc_rows:
250+
mem_cat_ids_map.setdefault(r["memory_id"], []).append(r["category_id"])
251+
mem_cat_names_map.setdefault(r["memory_id"], []).append(cat_name_by_id.get(r["category_id"], ""))
252+
253+
# Build sqlite-like payload
254+
sqlite_payload = {
255+
"user": {
256+
"id": str(user_row["id"]),
257+
"user_id": user_row["user_id"],
258+
"name": user_row.get("name"),
259+
"email": user_row.get("email"),
260+
"metadata": _json_load_maybe(user_row.get("metadata")),
261+
"created_at": _iso(user_row.get("created_at")),
262+
"updated_at": _iso(user_row.get("updated_at")),
263+
},
264+
"apps": [
265+
{
266+
"id": str(a["id"]),
267+
"owner_id": str(a["owner_id"]) if a.get("owner_id") else None,
268+
"name": a["name"],
269+
"description": a.get("description"),
270+
"metadata": _json_load_maybe(a.get("metadata")),
271+
"is_active": bool(a.get("is_active")),
272+
"created_at": _iso(a.get("created_at")),
273+
"updated_at": _iso(a.get("updated_at")),
274+
}
275+
for a in apps
276+
],
277+
"categories": [
278+
{
279+
"id": str(c["id"]),
280+
"name": c["name"],
281+
"description": c.get("description"),
282+
"created_at": _iso(c.get("created_at")),
283+
"updated_at": _iso(c.get("updated_at")),
284+
}
285+
for c in cats
286+
],
287+
"memories": [
288+
{
289+
"id": str(m["id"]),
290+
"user_id": str(m["user_id"]),
291+
"app_id": str(m["app_id"]) if m.get("app_id") else None,
292+
"content": m.get("content") or "",
293+
"metadata": _json_load_maybe(m.get("metadata")) or {},
294+
"state": m.get("state"),
295+
"created_at": _iso(m.get("created_at")),
296+
"updated_at": _iso(m.get("updated_at")),
297+
"archived_at": _iso(m.get("archived_at")),
298+
"deleted_at": _iso(m.get("deleted_at")),
299+
"category_ids": [str(cid) for cid in mem_cat_ids_map.get(m["id"], [])],
300+
}
301+
for m in mem_rows
302+
],
303+
"memory_categories": [
304+
{"memory_id": str(r["memory_id"]), "category_id": str(r["category_id"])}
305+
for r in mc_rows
306+
],
307+
"status_history": [
308+
{
309+
"id": str(h["id"]),
310+
"memory_id": str(h["memory_id"]),
311+
"changed_by": str(h["changed_by"]),
312+
"old_state": h.get("old_state"),
313+
"new_state": h.get("new_state"),
314+
"changed_at": _iso(h.get("changed_at")),
315+
}
316+
for h in history
317+
],
318+
"access_controls": [
319+
{
320+
"id": str(ac["id"]),
321+
"subject_type": ac.get("subject_type"),
322+
"subject_id": str(ac["subject_id"]) if ac.get("subject_id") else None,
323+
"object_type": ac.get("object_type"),
324+
"object_id": str(ac["object_id"]) if ac.get("object_id") else None,
325+
"effect": ac.get("effect"),
326+
"created_at": _iso(ac.get("created_at")),
327+
}
328+
for ac in acls
329+
],
330+
"export_meta": {
331+
"app_id_filter": str(app_id_filter) if app_id_filter else None,
332+
"from_date": from_ts,
333+
"to_date": to_ts,
334+
"version": "1",
335+
"generated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
336+
},
337+
}
338+
339+
# Write memories.json
340+
out_json = "/tmp/memories.json"
341+
with open(out_json, "w", encoding="utf-8") as f:
342+
json.dump(sqlite_payload, f, indent=2, ensure_ascii=False)
343+
344+
# Write logical jsonl.gz
345+
out_jsonl_gz = "/tmp/memories.jsonl.gz"
346+
with gzip.open(out_jsonl_gz, "wb") as gz:
347+
for m in mem_rows:
348+
record = {
349+
"id": str(m["id"]),
350+
"content": m.get("content") or "",
351+
"metadata": _json_load_maybe(m.get("metadata")) or {},
352+
"created_at": _iso(m.get("created_at")),
353+
"updated_at": _iso(m.get("updated_at")),
354+
"state": m.get("state"),
355+
"app": app_name_by_id.get(m.get("app_id")) if m.get("app_id") else None,
356+
"categories": [c for c in mem_cat_names_map.get(m["id"], []) if c],
357+
}
358+
gz.write((json.dumps(record, ensure_ascii=False) + "\n").encode("utf-8"))
359+
360+
print(out_json)
361+
print(out_jsonl_gz)
362+
PYCODE
363+
PY_EXIT=$?
364+
set -e
365+
if [[ $PY_EXIT -ne 0 ]]; then
366+
echo "ERROR: Export failed inside container (exit code $PY_EXIT)"
367+
exit $PY_EXIT
368+
fi
369+
370+
# Copy files out of the container
371+
TMPDIR="$(mktemp -d)"
372+
docker cp "${CONTAINER}:/tmp/memories.json" "${TMPDIR}/memories.json"
373+
docker cp "${CONTAINER}:/tmp/memories.jsonl.gz" "${TMPDIR}/memories.jsonl.gz"
374+
375+
# Create zip on host
376+
ZIP_NAME="memories_export_${USER_ID}.zip"
377+
if command -v zip >/dev/null 2>&1; then
378+
(cd "${TMPDIR}" && zip -q -r "../${ZIP_NAME}" "memories.json" "memories.jsonl.gz")
379+
mv "${TMPDIR}/../${ZIP_NAME}" "./${ZIP_NAME}"
380+
else
381+
# Fallback: use Python zipfile
382+
python3 - <<PYFALLBACK
383+
import sys, zipfile
384+
zf = zipfile.ZipFile("${ZIP_NAME}", "w", compression=zipfile.ZIP_DEFLATED)
385+
zf.write("${TMPDIR}/memories.json", arcname="memories.json")
386+
zf.write("${TMPDIR}/memories.jsonl.gz", arcname="memories.jsonl.gz")
387+
zf.close()
388+
print("${ZIP_NAME}")
389+
PYFALLBACK
390+
fi
391+
392+
echo "Wrote ./${ZIP_NAME}"
393+
echo "Done."

0 commit comments

Comments
 (0)