This commit is contained in:
2026-02-04 05:05:56 -03:00
parent de2ea3b7cb
commit c97ef63756
2 changed files with 76 additions and 14 deletions

View File

@@ -11,7 +11,7 @@ import logging
import os import os
from pathlib import Path from pathlib import Path
from pymongo import MongoClient from pymongo import MongoClient, ReplaceOne
from pymongo.errors import PyMongoError from pymongo.errors import PyMongoError
logging.basicConfig( logging.basicConfig(
@@ -43,6 +43,35 @@ def save_resume_token(token):
log.error(f"Failed to save resume token: {e}") log.error(f"Failed to save resume token: {e}")
def bulk_sync(local_db, remote_db):
"""Bulk sync all missing documents from local to remote."""
total_synced = 0
for coll_name in COLLECTIONS:
local_coll = local_db[coll_name]
remote_coll = remote_db[coll_name]
# Get all local docs and remote IDs
local_docs = {doc["_id"]: doc for doc in local_coll.find()}
remote_ids = set(doc["_id"] for doc in remote_coll.find({}, {"_id": 1}))
# Find missing docs
missing_ids = set(local_docs.keys()) - remote_ids
if missing_ids:
# Bulk insert missing docs
ops = [
ReplaceOne({"_id": _id}, local_docs[_id], upsert=True)
for _id in missing_ids
]
result = remote_coll.bulk_write(ops)
count = result.upserted_count + result.modified_count
log.info(f"{coll_name}: bulk synced {count} documents")
total_synced += count
return total_synced
def sync(): def sync():
"""Main sync loop using Change Streams.""" """Main sync loop using Change Streams."""
log.info(f"Connecting to local MongoDB...") log.info(f"Connecting to local MongoDB...")
@@ -54,12 +83,19 @@ def sync():
local_db = local.deskmeter local_db = local.deskmeter
remote_db = remote.deskmeter remote_db = remote.deskmeter
resume_token = load_resume_token() # Bulk sync first to catch up
if resume_token: log.info("Performing bulk sync to catch up...")
log.info("Resuming from saved token") synced = bulk_sync(local_db, remote_db)
log.info(f"Bulk sync complete: {synced} documents")
watch_kwargs = {"resume_after": resume_token} if resume_token else {} # Clear resume token to start fresh with Change Streams
watch_kwargs["full_document"] = "updateLookup" # Get full doc on updates # (we're now caught up, don't need to replay old changes)
if RESUME_TOKEN_FILE.exists():
RESUME_TOKEN_FILE.unlink()
log.info("Cleared old resume token")
# Now watch for new changes only (no resume token)
watch_kwargs = {"full_document": "updateLookup"}
# Watch for inserts, updates, and replaces on the database # Watch for inserts, updates, and replaces on the database
pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}] pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}]
@@ -83,8 +119,8 @@ def sync():
{"_id": doc["_id"]}, doc, upsert=True {"_id": doc["_id"]}, doc, upsert=True
) )
action = "inserted" if result.upserted_id else "updated" if result.upserted_id:
log.info(f"{collection}: {action} {doc['_id']}") log.info(f"{collection}: inserted {doc['_id']}")
save_resume_token(stream.resume_token) save_resume_token(stream.resume_token)

View File

@@ -6,9 +6,31 @@ from zoneinfo import ZoneInfo
from pymongo import MongoClient from pymongo import MongoClient
timezone = ZoneInfo("America/Argentina/Buenos_Aires") default_timezone = ZoneInfo("America/Argentina/Buenos_Aires")
timezone = default_timezone # Keep for backwards compatibility
utctz = ZoneInfo("UTC") utctz = ZoneInfo("UTC")
SUPPORTED_TIMEZONES = [
("America/Argentina/Buenos_Aires", "Buenos Aires"),
("America/New_York", "New York"),
("America/Los_Angeles", "Los Angeles"),
("Europe/London", "London"),
("Europe/Paris", "Paris"),
("UTC", "UTC"),
]
def get_timezone(tz_name=None):
"""Get ZoneInfo for timezone name, with validation."""
if not tz_name:
return default_timezone
# Validate against supported list
valid_names = [tz[0] for tz in SUPPORTED_TIMEZONES]
if tz_name in valid_names:
return ZoneInfo(tz_name)
return default_timezone
client = MongoClient(os.environ.get("MONGODB_HOST", "localhost")) client = MongoClient(os.environ.get("MONGODB_HOST", "localhost"))
db = client.deskmeter db = client.deskmeter
switches = db.switch switches = db.switch
@@ -209,7 +231,7 @@ def get_work_period_totals(start, end):
def get_task_blocks_calendar( def get_task_blocks_calendar(
start, end, task=None, min_block_seconds=300, grid_hours=1 start, end, task=None, min_block_seconds=300, grid_hours=1, tz=None
): ):
""" """
Get task blocks for calendar-style visualization, aggregated by time grid. Get task blocks for calendar-style visualization, aggregated by time grid.
@@ -231,6 +253,8 @@ def get_task_blocks_calendar(
'active_ratio': float (always 1.0) 'active_ratio': float (always 1.0)
}, ...] }, ...]
""" """
local_tz = tz if tz else default_timezone
task_query = {"$in": task.split(",")} if task else {} task_query = {"$in": task.split(",")} if task else {}
match_query = { match_query = {
@@ -252,7 +276,7 @@ def get_task_blocks_calendar(
for switch in raw_switches: for switch in raw_switches:
task_id = switch.get("task") task_id = switch.get("task")
switch_start = switch["date"].replace(tzinfo=utctz).astimezone(timezone) switch_start = switch["date"].replace(tzinfo=utctz).astimezone(local_tz)
switch_duration = switch["delta"] switch_duration = switch["delta"]
switch_end = switch_start + timedelta(seconds=switch_duration) switch_end = switch_start + timedelta(seconds=switch_duration)
@@ -290,7 +314,7 @@ def get_task_blocks_calendar(
for (date, grid_hour, task_id), data in grid_task_time.items(): for (date, grid_hour, task_id), data in grid_task_time.items():
if data["duration"] >= min_block_seconds: if data["duration"] >= min_block_seconds:
grid_start = datetime( grid_start = datetime(
date.year, date.month, date.day, grid_hour, 0, 0, tzinfo=timezone date.year, date.month, date.day, grid_hour, 0, 0, tzinfo=local_tz
) )
blocks.append( blocks.append(
@@ -310,7 +334,7 @@ def get_task_blocks_calendar(
return sorted(blocks, key=lambda x: (x["start"], x["task_path"])) return sorted(blocks, key=lambda x: (x["start"], x["task_path"]))
def get_raw_switches(start, end, task=None): def get_raw_switches(start, end, task=None, tz=None):
""" """
Get all raw switch documents in the period. Get all raw switch documents in the period.
@@ -323,6 +347,8 @@ def get_raw_switches(start, end, task=None):
'delta': int (seconds) 'delta': int (seconds)
}, ...] }, ...]
""" """
local_tz = tz if tz else default_timezone
task_query = {"$in": task.split(",")} if task else {} task_query = {"$in": task.split(",")} if task else {}
match_query = {"date": {"$gte": start, "$lte": end}} match_query = {"date": {"$gte": start, "$lte": end}}
@@ -342,7 +368,7 @@ def get_raw_switches(start, end, task=None):
"workspace": switch["workspace"], "workspace": switch["workspace"],
"task_id": task_id, "task_id": task_id,
"task_path": task_path, "task_path": task_path,
"date": switch["date"].replace(tzinfo=utctz).astimezone(timezone), "date": switch["date"].replace(tzinfo=utctz).astimezone(local_tz),
"delta": switch["delta"], "delta": switch["delta"],
} }
) )