135 lines
4.1 KiB
Python
135 lines
4.1 KiB
Python
"""
|
|
dmsync - MongoDB Change Streams sync daemon
|
|
Watches local deskmeter database and pushes changes to remote MongoDB
|
|
|
|
Requires local MongoDB to be configured as a replica set.
|
|
Uses resume tokens to continue from last position after restart.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from pymongo import MongoClient, ReplaceOne
|
|
from pymongo.errors import PyMongoError
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
|
)
|
|
log = logging.getLogger("dmsync")
|
|
|
|
RESUME_TOKEN_FILE = Path.home() / ".dmsync-resume-token"
|
|
REMOTE_HOST = os.environ.get("DMSYNC_REMOTE_HOST", "mcrn.ar")
|
|
REMOTE_PORT = int(os.environ.get("DMSYNC_REMOTE_PORT", 27017))
|
|
COLLECTIONS = ("switch", "task", "task_history", "state")
|
|
|
|
|
|
def load_resume_token():
|
|
"""Load resume token from file if exists."""
|
|
if RESUME_TOKEN_FILE.exists():
|
|
try:
|
|
return json.loads(RESUME_TOKEN_FILE.read_text())
|
|
except (json.JSONDecodeError, IOError) as e:
|
|
log.warning(f"Failed to load resume token: {e}")
|
|
return None
|
|
|
|
|
|
def save_resume_token(token):
|
|
"""Persist resume token to file."""
|
|
try:
|
|
RESUME_TOKEN_FILE.write_text(json.dumps(token))
|
|
except IOError as e:
|
|
log.error(f"Failed to save resume token: {e}")
|
|
|
|
|
|
def bulk_sync(local_db, remote_db):
|
|
"""Bulk sync all missing documents from local to remote."""
|
|
total_synced = 0
|
|
|
|
for coll_name in COLLECTIONS:
|
|
local_coll = local_db[coll_name]
|
|
remote_coll = remote_db[coll_name]
|
|
|
|
# Get all local docs and remote IDs
|
|
local_docs = {doc["_id"]: doc for doc in local_coll.find()}
|
|
remote_ids = set(doc["_id"] for doc in remote_coll.find({}, {"_id": 1}))
|
|
|
|
# Find missing docs
|
|
missing_ids = set(local_docs.keys()) - remote_ids
|
|
|
|
if missing_ids:
|
|
# Bulk insert missing docs
|
|
ops = [
|
|
ReplaceOne({"_id": _id}, local_docs[_id], upsert=True)
|
|
for _id in missing_ids
|
|
]
|
|
result = remote_coll.bulk_write(ops)
|
|
count = result.upserted_count + result.modified_count
|
|
log.info(f"{coll_name}: bulk synced {count} documents")
|
|
total_synced += count
|
|
|
|
return total_synced
|
|
|
|
|
|
def sync():
|
|
"""Main sync loop using Change Streams."""
|
|
log.info(f"Connecting to local MongoDB...")
|
|
local = MongoClient()
|
|
|
|
log.info(f"Connecting to remote MongoDB at {REMOTE_HOST}:{REMOTE_PORT}...")
|
|
remote = MongoClient(REMOTE_HOST, REMOTE_PORT)
|
|
|
|
local_db = local.deskmeter
|
|
remote_db = remote.deskmeter
|
|
|
|
# Bulk sync first to catch up
|
|
log.info("Performing bulk sync to catch up...")
|
|
synced = bulk_sync(local_db, remote_db)
|
|
log.info(f"Bulk sync complete: {synced} documents")
|
|
|
|
# Clear resume token to start fresh with Change Streams
|
|
# (we're now caught up, don't need to replay old changes)
|
|
if RESUME_TOKEN_FILE.exists():
|
|
RESUME_TOKEN_FILE.unlink()
|
|
log.info("Cleared old resume token")
|
|
|
|
# Now watch for new changes only (no resume token)
|
|
watch_kwargs = {"full_document": "updateLookup"}
|
|
|
|
# Watch for inserts, updates, and replaces on the database
|
|
pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}]
|
|
|
|
log.info(f"Watching collections: {', '.join(COLLECTIONS)}")
|
|
|
|
try:
|
|
with local_db.watch(pipeline, **watch_kwargs) as stream:
|
|
for change in stream:
|
|
collection = change["ns"]["coll"]
|
|
|
|
if collection not in COLLECTIONS:
|
|
continue
|
|
|
|
doc = change.get("fullDocument")
|
|
if not doc:
|
|
continue
|
|
|
|
# Upsert to remote
|
|
result = remote_db[collection].replace_one(
|
|
{"_id": doc["_id"]}, doc, upsert=True
|
|
)
|
|
|
|
if result.upserted_id:
|
|
log.info(f"{collection}: inserted {doc['_id']}")
|
|
|
|
save_resume_token(stream.resume_token)
|
|
|
|
except PyMongoError as e:
|
|
log.error(f"MongoDB error: {e}")
|
|
raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
log.info("Starting dmsync daemon")
|
|
sync()
|