Files
deskmeter/dmapp/dmdb/sync.py
2026-02-04 05:05:56 -03:00

135 lines
4.1 KiB
Python

"""
dmsync - MongoDB Change Streams sync daemon
Watches local deskmeter database and pushes changes to remote MongoDB
Requires local MongoDB to be configured as a replica set.
Uses resume tokens to continue from last position after restart.
"""
import json
import logging
import os
from pathlib import Path
from pymongo import MongoClient, ReplaceOne
from pymongo.errors import PyMongoError
logging.basicConfig(
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)
log = logging.getLogger("dmsync")
RESUME_TOKEN_FILE = Path.home() / ".dmsync-resume-token"
REMOTE_HOST = os.environ.get("DMSYNC_REMOTE_HOST", "mcrn.ar")
REMOTE_PORT = int(os.environ.get("DMSYNC_REMOTE_PORT", 27017))
COLLECTIONS = ("switch", "task", "task_history", "state")
def load_resume_token():
"""Load resume token from file if exists."""
if RESUME_TOKEN_FILE.exists():
try:
return json.loads(RESUME_TOKEN_FILE.read_text())
except (json.JSONDecodeError, IOError) as e:
log.warning(f"Failed to load resume token: {e}")
return None
def save_resume_token(token):
"""Persist resume token to file."""
try:
RESUME_TOKEN_FILE.write_text(json.dumps(token))
except IOError as e:
log.error(f"Failed to save resume token: {e}")
def bulk_sync(local_db, remote_db):
"""Bulk sync all missing documents from local to remote."""
total_synced = 0
for coll_name in COLLECTIONS:
local_coll = local_db[coll_name]
remote_coll = remote_db[coll_name]
# Get all local docs and remote IDs
local_docs = {doc["_id"]: doc for doc in local_coll.find()}
remote_ids = set(doc["_id"] for doc in remote_coll.find({}, {"_id": 1}))
# Find missing docs
missing_ids = set(local_docs.keys()) - remote_ids
if missing_ids:
# Bulk insert missing docs
ops = [
ReplaceOne({"_id": _id}, local_docs[_id], upsert=True)
for _id in missing_ids
]
result = remote_coll.bulk_write(ops)
count = result.upserted_count + result.modified_count
log.info(f"{coll_name}: bulk synced {count} documents")
total_synced += count
return total_synced
def sync():
"""Main sync loop using Change Streams."""
log.info(f"Connecting to local MongoDB...")
local = MongoClient()
log.info(f"Connecting to remote MongoDB at {REMOTE_HOST}:{REMOTE_PORT}...")
remote = MongoClient(REMOTE_HOST, REMOTE_PORT)
local_db = local.deskmeter
remote_db = remote.deskmeter
# Bulk sync first to catch up
log.info("Performing bulk sync to catch up...")
synced = bulk_sync(local_db, remote_db)
log.info(f"Bulk sync complete: {synced} documents")
# Clear resume token to start fresh with Change Streams
# (we're now caught up, don't need to replay old changes)
if RESUME_TOKEN_FILE.exists():
RESUME_TOKEN_FILE.unlink()
log.info("Cleared old resume token")
# Now watch for new changes only (no resume token)
watch_kwargs = {"full_document": "updateLookup"}
# Watch for inserts, updates, and replaces on the database
pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}]
log.info(f"Watching collections: {', '.join(COLLECTIONS)}")
try:
with local_db.watch(pipeline, **watch_kwargs) as stream:
for change in stream:
collection = change["ns"]["coll"]
if collection not in COLLECTIONS:
continue
doc = change.get("fullDocument")
if not doc:
continue
# Upsert to remote
result = remote_db[collection].replace_one(
{"_id": doc["_id"]}, doc, upsert=True
)
if result.upserted_id:
log.info(f"{collection}: inserted {doc['_id']}")
save_resume_token(stream.resume_token)
except PyMongoError as e:
log.error(f"MongoDB error: {e}")
raise
if __name__ == "__main__":
log.info("Starting dmsync daemon")
sync()