import os from collections import Counter, defaultdict from datetime import datetime, timedelta from pathlib import Path from zoneinfo import ZoneInfo from pymongo import MongoClient timezone = ZoneInfo("America/Argentina/Buenos_Aires") utctz = ZoneInfo("UTC") client = MongoClient(os.environ.get("MONGODB_HOST", "localhost")) db = client.deskmeter switches = db.switch tasks = db.task task_history = db.task_history task_file = "/home/mariano/LETRAS/adm/task/main" task_dir = Path(task_file).parent def parse_task_line(line): """Parse a task line to extract task name and ID.""" line = line.strip() if not line: return None, None parts = line.split("|") if len(parts) > 1: task_name = parts[0].strip() id_parts = parts[1].split() if id_parts: task_id = id_parts[0].strip() return task_name, task_id return task_name, None return parts[0].strip(), None def load_task_from_files(task_id): """Search task directory files (recursively) for a task ID and load it into task_history.""" for task_filepath in task_dir.glob("**/*"): if not task_filepath.is_file(): continue current_path = [] try: with open(task_filepath, "r") as f: for line in f: if not line.strip(): continue indent = len(line) - len(line.lstrip()) level = indent // 4 task_name, found_id = parse_task_line(line) if task_name is None: continue current_path = current_path[:level] current_path.append(task_name) full_path = "/".join(current_path) if found_id == task_id: # Found it! Insert into task_history task_history.update_one( {"task_id": task_id}, { "$set": { "path": full_path, "task_id": task_id, "source_file": task_filepath.name, } }, upsert=True, ) return full_path except: # Skip files that can't be read continue return None def get_task_path(task_id): """ Get task path from tasks collection, falling back to task_history. If not found, searches task directory files and populates task_history on-demand. """ if not task_id: return None # Try current tasks first task_doc = tasks.find_one({"task_id": task_id}) if task_doc and "path" in task_doc: return task_doc["path"] # Try task history cache task_doc = task_history.find_one({"task_id": task_id}) if task_doc and "path" in task_doc: return task_doc["path"] # Not in cache, search files and populate history task_path = load_task_from_files(task_id) if task_path: return task_path # Still not found, return ID as fallback return task_id def get_current_task_info(): """Get current task ID and path from state and tasks collection""" states = db.state current_doc = states.find_one({"_id": "current"}) if not current_doc or "task" not in current_doc: return None, None task_id = current_doc["task"] task_doc = tasks.find_one({"task_id": task_id}) if task_doc and "path" in task_doc: return task_id, task_doc["path"] return task_id, None def get_task_time_seconds(start, end, task_id, workspaces=None): """Get total seconds for a task within a time period using MongoDB aggregation.""" if workspaces is None: workspaces = ["Plan", "Think", "Work"] pipeline = [ { "$match": { "date": {"$gte": start, "$lte": end}, "task": task_id, "workspace": {"$in": workspaces}, } }, {"$group": {"_id": None, "total_seconds": {"$sum": "$delta"}}}, ] result = list(switches.aggregate(pipeline)) if result and len(result) > 0: return result[0]["total_seconds"] return 0 def task_or_none(task=None): if task == "all": task = None return task def now(): return datetime.now(timezone) def convert_seconds(seconds, use_days=False): days = seconds // 86400 hours = (seconds % 86400) // 3600 minutes = (seconds % 3600) // 60 remaining_seconds = seconds % 60 if use_days: return "{} days, {:02d}:{:02d}:{:02d}".format( days, hours, minutes, remaining_seconds ) return "{:02d}:{:02d}:{:02d}".format(hours + days * 24, minutes, remaining_seconds) def get_work_period_totals(start, end): """Get period totals grouped by task with full path.""" # Get all tasks with time in the period pipeline = [ { "$match": { "date": {"$gte": start, "$lte": end}, "workspace": {"$in": ["Plan", "Think", "Work"]}, "task": {"$exists": True, "$ne": None}, } }, {"$group": {"_id": "$task", "total_seconds": {"$sum": "$delta"}}}, ] results = list(switches.aggregate(pipeline)) combined_rows = [] for result in results: task_id = result["_id"] total_seconds = result["total_seconds"] if total_seconds > 0: # Get task path with history fallback task_path = get_task_path(task_id) combined_rows.append( {"ws": task_path, "total": convert_seconds(total_seconds)} ) # Sort by path for consistency combined_rows.sort(key=lambda x: x["ws"]) return combined_rows def get_task_blocks_calendar( start, end, task=None, min_block_seconds=300, grid_hours=1 ): """ Get task blocks for calendar-style visualization, aggregated by time grid. Shows all tasks worked on during each grid period, with overlapping blocks. Each task block's height is proportional to time spent in that grid period. Args: grid_hours: Grid size in hours (1, 3, or 6) Returns list of blocks: [{ 'task_id': str, 'task_path': str, 'start': datetime (start of grid period), 'end': datetime (end of grid period or actual end time if less), 'duration': int (seconds in this grid block), 'hour': int (hour of grid start, 0-23), 'active_seconds': int, 'active_ratio': float (always 1.0) }, ...] """ task_query = {"$in": task.split(",")} if task else {} match_query = { "date": {"$gte": start, "$lte": end}, "workspace": {"$in": ["Plan", "Think", "Work"]}, # Only active workspaces } if task_query: match_query["task"] = task_query # Get all active switches in period raw_switches = list(switches.find(match_query).sort("date", 1)) if not raw_switches: return [] # Aggregate by grid period and task # Structure: {(date, grid_start_hour, task_id): total_seconds} grid_task_time = defaultdict(lambda: {"duration": 0, "task_path": None}) for switch in raw_switches: task_id = switch.get("task") switch_start = switch["date"].replace(tzinfo=utctz).astimezone(timezone) switch_duration = switch["delta"] switch_end = switch_start + timedelta(seconds=switch_duration) # Calculate how much time falls in each grid period this switch spans current_time = switch_start remaining_duration = switch_duration while remaining_duration > 0 and current_time < switch_end: # Calculate grid period start (hour rounded down to grid_hours) grid_hour = (current_time.hour // grid_hours) * grid_hours grid_start = current_time.replace( hour=grid_hour, minute=0, second=0, microsecond=0 ) grid_end = grid_start + timedelta(hours=grid_hours) # Time in this grid period time_in_grid = min( (grid_end - current_time).total_seconds(), remaining_duration ) key = (current_time.date(), grid_hour, task_id) # Get task path (cache it) if grid_task_time[key]["task_path"] is None: task_path = get_task_path(task_id) or "No Task" grid_task_time[key]["task_path"] = task_path grid_task_time[key]["duration"] += time_in_grid remaining_duration -= time_in_grid current_time = grid_end # Convert to blocks blocks = [] for (date, grid_hour, task_id), data in grid_task_time.items(): if data["duration"] >= min_block_seconds: grid_start = datetime( date.year, date.month, date.day, grid_hour, 0, 0, tzinfo=timezone ) blocks.append( { "task_id": task_id, "task_path": data["task_path"], "start": grid_start, "end": grid_start + timedelta(seconds=data["duration"]), "hour": grid_hour, "duration": int(data["duration"]), "active_seconds": int(data["duration"]), "idle_seconds": 0, "active_ratio": 1.0, } ) return sorted(blocks, key=lambda x: (x["start"], x["task_path"])) def get_raw_switches(start, end, task=None): """ Get all raw switch documents in the period. Returns list of switches: [{ 'workspace': str, 'task_id': str, 'task_path': str, 'date': datetime, 'delta': int (seconds) }, ...] """ task_query = {"$in": task.split(",")} if task else {} match_query = {"date": {"$gte": start, "$lte": end}} if task_query: match_query["task"] = task_query raw_switches = list(switches.find(match_query).sort("date", 1)) result = [] for switch in raw_switches: task_id = switch.get("task") # Get task path with history fallback task_path = get_task_path(task_id) or "No Task" result.append( { "workspace": switch["workspace"], "task_id": task_id, "task_path": task_path, "date": switch["date"].replace(tzinfo=utctz).astimezone(timezone), "delta": switch["delta"], } ) return result def get_period_totals(start, end, task=None): task_query = {"$in": task.split(",")} if task else {} match_query = {"date": {"$gte": start, "$lte": end}} if task_query: match_query["task"] = task_query pipeline = [ {"$match": match_query}, {"$sort": {"date": 1}}, { "$group": { "_id": None, "documents_in_range": {"$push": "$$ROOT"}, "first_doc": {"$first": "$$ROOT"}, "last_doc": {"$last": "$$ROOT"}, } }, { "$lookup": { "from": "switch", "let": {"first_date": "$first_doc.date", "task": "$first_doc.task"}, "pipeline": [ { "$match": { "$expr": { "$and": [ { "$lt": ["$date", "$$first_date"] }, # Only before the first date { "$eq": ["$task", "$$task"] }, # Must have the same task ] } } }, {"$sort": {"date": -1}}, # Get the most recent (closest) document {"$limit": 1}, # Only the immediate previous document ], "as": "before_first", } }, { "$project": { "documents": { "$concatArrays": [ {"$ifNull": ["$before_first", []]}, # Add only if found "$documents_in_range", ] } } }, {"$unwind": "$documents"}, {"$replaceRoot": {"newRoot": "$documents"}}, { "$group": { "_id": "$workspace", "total": {"$sum": "$delta"}, } }, ] results = list(switches.aggregate(pipeline)) if not results: return [{"ws": "No Data", "total": ""}] pipeline_before_after = [ # Match documents within the date range {"$match": match_query}, {"$sort": {"date": 1}}, { "$group": { "_id": None, "first_doc": {"$first": "$$ROOT"}, "last_doc": {"$last": "$$ROOT"}, } }, # Lookup to get one document before the first document in the range { "$lookup": { "from": "switch", "let": {"first_date": "$first_doc.date", "task": "$first_doc.task"}, "pipeline": [ { "$match": { "$expr": { "$and": [ { "$lt": ["$date", "$$first_date"] }, # Only before the first date { "$eq": ["$task", "$$task"] }, # Must have the same task ] } } }, {"$sort": {"date": -1}}, # Get the most recent (closest) document {"$limit": 1}, # Only the immediate previous document ], "as": "before_first", } }, { "$project": { "before_first": { "$ifNull": [{"$arrayElemAt": ["$before_first", 0]}, ""] }, "last_doc": "$last_doc", # Include the last_doc from the matched period } }, ] aux_results = list(switches.aggregate(pipeline_before_after)) # Safety check: if aux_results is empty, return early with no data if not aux_results: return [{"ws": "No Data", "total": ""}] bfirst = aux_results[0]["before_first"] start_delta = 0 if bfirst: bfdate = bfirst["date"].replace(tzinfo=utctz) time_since_bfirst = round((start - bfdate.astimezone(timezone)).total_seconds()) # Only apply start_delta if the before_first switch actually crosses into the period # If time_since_bfirst > bfirst["delta"], the switch ended before the period started if time_since_bfirst <= bfirst["delta"]: start_delta = time_since_bfirst ldoc = aux_results[0]["last_doc"] lastdate = ldoc["date"].replace(tzinfo=utctz) end_delta = round((end - lastdate.astimezone(timezone)).total_seconds()) rows = [] active_vs_idle = {"Active": 0, "Idle": 0} for result in results: if bfirst: if result["_id"] == bfirst["workspace"]: # Safety: ensure start_delta doesn't exceed total adjustment = min(start_delta, result["total"]) result["total"] -= adjustment if end < now(): if result["_id"] == ldoc["workspace"]: # Safety: ensure we don't subtract more than the total adjustment = ldoc["delta"] - end_delta safe_adjustment = min(adjustment, result["total"]) result["total"] -= safe_adjustment for result in results: if result["total"] > 0: rows.append( {"ws": result["_id"], "total": convert_seconds(result["total"])} ) if result["_id"] in ["Think", "Plan", "Work"]: active_vs_idle["Active"] += result["total"] if result["_id"] in ["Away", "Other"]: active_vs_idle["Idle"] += result["total"] order = ["Plan", "Think", "Work", "Other", "Away", "Active", "Idle"] rows = sorted(rows, key=lambda x: order.index(x["ws"])) for k, v in active_vs_idle.items(): rows.append({"ws": k, "total": convert_seconds(v)}) return rows # print( # get_period_totals( # datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone) # - timedelta(days=1), # datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone) # - timedelta(days=1), # # "ffbe198e", # ) # ) # print( # get_period_totals( # datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone), # datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone), # "5fc751ec", # ) # )