This commit is contained in:
2026-02-04 05:05:56 -03:00
parent de2ea3b7cb
commit c97ef63756
2 changed files with 76 additions and 14 deletions

View File

@@ -11,7 +11,7 @@ import logging
import os
from pathlib import Path
from pymongo import MongoClient
from pymongo import MongoClient, ReplaceOne
from pymongo.errors import PyMongoError
logging.basicConfig(
@@ -43,6 +43,35 @@ def save_resume_token(token):
log.error(f"Failed to save resume token: {e}")
def bulk_sync(local_db, remote_db):
"""Bulk sync all missing documents from local to remote."""
total_synced = 0
for coll_name in COLLECTIONS:
local_coll = local_db[coll_name]
remote_coll = remote_db[coll_name]
# Get all local docs and remote IDs
local_docs = {doc["_id"]: doc for doc in local_coll.find()}
remote_ids = set(doc["_id"] for doc in remote_coll.find({}, {"_id": 1}))
# Find missing docs
missing_ids = set(local_docs.keys()) - remote_ids
if missing_ids:
# Bulk insert missing docs
ops = [
ReplaceOne({"_id": _id}, local_docs[_id], upsert=True)
for _id in missing_ids
]
result = remote_coll.bulk_write(ops)
count = result.upserted_count + result.modified_count
log.info(f"{coll_name}: bulk synced {count} documents")
total_synced += count
return total_synced
def sync():
"""Main sync loop using Change Streams."""
log.info(f"Connecting to local MongoDB...")
@@ -54,12 +83,19 @@ def sync():
local_db = local.deskmeter
remote_db = remote.deskmeter
resume_token = load_resume_token()
if resume_token:
log.info("Resuming from saved token")
# Bulk sync first to catch up
log.info("Performing bulk sync to catch up...")
synced = bulk_sync(local_db, remote_db)
log.info(f"Bulk sync complete: {synced} documents")
watch_kwargs = {"resume_after": resume_token} if resume_token else {}
watch_kwargs["full_document"] = "updateLookup" # Get full doc on updates
# Clear resume token to start fresh with Change Streams
# (we're now caught up, don't need to replay old changes)
if RESUME_TOKEN_FILE.exists():
RESUME_TOKEN_FILE.unlink()
log.info("Cleared old resume token")
# Now watch for new changes only (no resume token)
watch_kwargs = {"full_document": "updateLookup"}
# Watch for inserts, updates, and replaces on the database
pipeline = [{"$match": {"operationType": {"$in": ["insert", "update", "replace"]}}}]
@@ -83,8 +119,8 @@ def sync():
{"_id": doc["_id"]}, doc, upsert=True
)
action = "inserted" if result.upserted_id else "updated"
log.info(f"{collection}: {action} {doc['_id']}")
if result.upserted_id:
log.info(f"{collection}: inserted {doc['_id']}")
save_resume_token(stream.resume_token)

View File

@@ -6,9 +6,31 @@ from zoneinfo import ZoneInfo
from pymongo import MongoClient
timezone = ZoneInfo("America/Argentina/Buenos_Aires")
default_timezone = ZoneInfo("America/Argentina/Buenos_Aires")
timezone = default_timezone # Keep for backwards compatibility
utctz = ZoneInfo("UTC")
SUPPORTED_TIMEZONES = [
("America/Argentina/Buenos_Aires", "Buenos Aires"),
("America/New_York", "New York"),
("America/Los_Angeles", "Los Angeles"),
("Europe/London", "London"),
("Europe/Paris", "Paris"),
("UTC", "UTC"),
]
def get_timezone(tz_name=None):
"""Get ZoneInfo for timezone name, with validation."""
if not tz_name:
return default_timezone
# Validate against supported list
valid_names = [tz[0] for tz in SUPPORTED_TIMEZONES]
if tz_name in valid_names:
return ZoneInfo(tz_name)
return default_timezone
client = MongoClient(os.environ.get("MONGODB_HOST", "localhost"))
db = client.deskmeter
switches = db.switch
@@ -209,7 +231,7 @@ def get_work_period_totals(start, end):
def get_task_blocks_calendar(
start, end, task=None, min_block_seconds=300, grid_hours=1
start, end, task=None, min_block_seconds=300, grid_hours=1, tz=None
):
"""
Get task blocks for calendar-style visualization, aggregated by time grid.
@@ -231,6 +253,8 @@ def get_task_blocks_calendar(
'active_ratio': float (always 1.0)
}, ...]
"""
local_tz = tz if tz else default_timezone
task_query = {"$in": task.split(",")} if task else {}
match_query = {
@@ -252,7 +276,7 @@ def get_task_blocks_calendar(
for switch in raw_switches:
task_id = switch.get("task")
switch_start = switch["date"].replace(tzinfo=utctz).astimezone(timezone)
switch_start = switch["date"].replace(tzinfo=utctz).astimezone(local_tz)
switch_duration = switch["delta"]
switch_end = switch_start + timedelta(seconds=switch_duration)
@@ -290,7 +314,7 @@ def get_task_blocks_calendar(
for (date, grid_hour, task_id), data in grid_task_time.items():
if data["duration"] >= min_block_seconds:
grid_start = datetime(
date.year, date.month, date.day, grid_hour, 0, 0, tzinfo=timezone
date.year, date.month, date.day, grid_hour, 0, 0, tzinfo=local_tz
)
blocks.append(
@@ -310,7 +334,7 @@ def get_task_blocks_calendar(
return sorted(blocks, key=lambda x: (x["start"], x["task_path"]))
def get_raw_switches(start, end, task=None):
def get_raw_switches(start, end, task=None, tz=None):
"""
Get all raw switch documents in the period.
@@ -323,6 +347,8 @@ def get_raw_switches(start, end, task=None):
'delta': int (seconds)
}, ...]
"""
local_tz = tz if tz else default_timezone
task_query = {"$in": task.split(",")} if task else {}
match_query = {"date": {"$gte": start, "$lte": end}}
@@ -342,7 +368,7 @@ def get_raw_switches(start, end, task=None):
"workspace": switch["workspace"],
"task_id": task_id,
"task_path": task_path,
"date": switch["date"].replace(tzinfo=utctz).astimezone(timezone),
"date": switch["date"].replace(tzinfo=utctz).astimezone(local_tz),
"delta": switch["delta"],
}
)