""" dmsync - MongoDB Change Streams sync daemon Watches local deskmeter database and pushes changes to remote MongoDB Requires local MongoDB to be configured as a replica set. Uses resume tokens to continue from last position after restart. """ import json import logging import os from pathlib import Path from pymongo import MongoClient, ReplaceOne from pymongo.errors import PyMongoError logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s" ) log = logging.getLogger("dmsync") RESUME_TOKEN_FILE = Path.home() / ".dmsync-resume-token" REMOTE_HOST = os.environ.get("DMSYNC_REMOTE_HOST", "mcrn.ar") REMOTE_PORT = int(os.environ.get("DMSYNC_REMOTE_PORT", 27017)) COLLECTIONS = ("switch", "task", "task_history", "state") def load_resume_token(): """Load resume token from file if exists.""" if RESUME_TOKEN_FILE.exists(): try: return json.loads(RESUME_TOKEN_FILE.read_text()) except (json.JSONDecodeError, IOError) as e: log.warning(f"Failed to load resume token: {e}") return None def save_resume_token(token): """Persist resume token to file.""" try: RESUME_TOKEN_FILE.write_text(json.dumps(token)) except IOError as e: log.error(f"Failed to save resume token: {e}") def bulk_sync(local_db, remote_db): """Bulk sync all missing documents from local to remote.""" total_synced = 0 for coll_name in COLLECTIONS: local_coll = local_db[coll_name] remote_coll = remote_db[coll_name] # Get all local docs and remote IDs local_docs = {doc["_id"]: doc for doc in local_coll.find()} remote_ids = set(doc["_id"] for doc in remote_coll.find({}, {"_id": 1})) # Find missing docs missing_ids = set(local_docs.keys()) - remote_ids if missing_ids: # Bulk insert missing docs ops = [ ReplaceOne({"_id": _id}, local_docs[_id], upsert=True) for _id in missing_ids ] result = remote_coll.bulk_write(ops) count = result.upserted_count + result.modified_count log.info(f"{coll_name}: bulk synced {count} documents") total_synced += count return total_synced def sync(): """Main sync loop using Change Streams.""" log.info(f"Connecting to local MongoDB...") local = MongoClient() log.info(f"Connecting to remote MongoDB at {REMOTE_HOST}:{REMOTE_PORT}...") remote = MongoClient(REMOTE_HOST, REMOTE_PORT) local_db = local.deskmeter remote_db = remote.deskmeter # Bulk sync first to catch up log.info("Performing bulk sync to catch up...") synced = bulk_sync(local_db, remote_db) log.info(f"Bulk sync complete: {synced} documents") # Clear resume token to start fresh with Change Streams # (we're now caught up, don't need to replay old changes) if RESUME_TOKEN_FILE.exists(): RESUME_TOKEN_FILE.unlink() log.info("Cleared old resume token") # Now watch for new changes only (no resume token) watch_kwargs = {"full_document": "updateLookup"} # Watch for inserts, updates, and replaces on the database pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}] log.info(f"Watching collections: {', '.join(COLLECTIONS)}") try: with local_db.watch(pipeline, **watch_kwargs) as stream: for change in stream: collection = change["ns"]["coll"] if collection not in COLLECTIONS: continue doc = change.get("fullDocument") if not doc: continue # Upsert to remote result = remote_db[collection].replace_one( {"_id": doc["_id"]}, doc, upsert=True ) if result.upserted_id: log.info(f"{collection}: inserted {doc['_id']}") save_resume_token(stream.resume_token) except PyMongoError as e: log.error(f"MongoDB error: {e}") raise if __name__ == "__main__": log.info("Starting dmsync daemon") sync()