deskmeter/dmold/dmweb/get_period_times.py

from collections import Counter, defaultdict
from datetime import datetime, timedelta
from pathlib import Path

from pymongo import MongoClient
from zoneinfo import ZoneInfo

timezone = ZoneInfo("America/Argentina/Buenos_Aires")
utctz = ZoneInfo("UTC")

client = MongoClient()
db = client.deskmeter
switches = db.switch
tasks = db.task
task_history = db.task_history


task_file = "/home/mariano/LETRAS/adm/task/main"
task_dir = Path(task_file).parent


def parse_task_line(line):
    """Parse a task line to extract task name and ID."""
    line = line.strip()
    if not line:
        return None, None

    parts = line.split("|")
    if len(parts) > 1:
        task_name = parts[0].strip()
        id_parts = parts[1].split()
        if id_parts:
            task_id = id_parts[0].strip()
            return task_name, task_id
        return task_name, None

    return parts[0].strip(), None


def load_task_from_files(task_id):
    """Search task directory files for a task ID and load it into task_history."""
    for task_filepath in task_dir.glob("*"):
        if not task_filepath.is_file():
            continue

        current_path = []
        try:
            with open(task_filepath, "r") as f:
                for line in f:
                    if not line.strip():
                        continue

                    indent = len(line) - len(line.lstrip())
                    level = indent // 4

                    task_name, found_id = parse_task_line(line)
                    if task_name is None:
                        continue

                    current_path = current_path[:level]
                    current_path.append(task_name)
                    full_path = "/".join(current_path)

                    if found_id == task_id:
                        # Found it! Insert into task_history
                        task_history.update_one(
                            {"task_id": task_id},
                            {"$set": {
                                "path": full_path,
                                "task_id": task_id,
                                "source_file": task_filepath.name
                            }},
                            upsert=True
                        )
                        return full_path
        except:
            # Skip files that can't be read
            continue

    return None


def get_task_path(task_id):
    """
    Get task path from tasks collection, falling back to task_history.
    If not found, searches task directory files and populates task_history on-demand.
    """
    if not task_id:
        return None

    # Try current tasks first
    task_doc = tasks.find_one({"task_id": task_id})
    if task_doc and "path" in task_doc:
        return task_doc["path"]

    # Try task history cache
    task_doc = task_history.find_one({"task_id": task_id})
    if task_doc and "path" in task_doc:
        return task_doc["path"]

    # Not in cache, search files and populate history
    task_path = load_task_from_files(task_id)
    if task_path:
        return task_path

    # Still not found, return ID as fallback
    return task_id


def get_current_task_info():
    """Get current task ID and path from state and tasks collection"""
    states = db.state
    current_doc = states.find_one({"_id": "current"})

    if not current_doc or "task" not in current_doc:
        return None, None

    task_id = current_doc["task"]
    task_doc = tasks.find_one({"task_id": task_id})

    if task_doc and "path" in task_doc:
        return task_id, task_doc["path"]

    return task_id, None


def get_task_time_seconds(start, end, task_id, workspaces=None):
    """Get total seconds for a task within a time period using MongoDB aggregation."""
    if workspaces is None:
        workspaces = ["Plan", "Think", "Work"]

    pipeline = [
        {
            "$match": {
                "date": {"$gte": start, "$lte": end},
                "task": task_id,
                "workspace": {"$in": workspaces}
            }
        },
        {
            "$group": {
                "_id": None,
                "total_seconds": {"$sum": "$delta"}
            }
        }
    ]

    result = list(switches.aggregate(pipeline))

    if result and len(result) > 0:
        return result[0]["total_seconds"]

    return 0


def task_or_none(task=None):
    if not task:
        task = read_and_extract(task_file)

    if task == "all":
        task = None

    return task


def now():
    return datetime.now(timezone)


def convert_seconds(seconds, use_days=False):
    days = seconds // 86400
    hours = (seconds % 86400) // 3600
    minutes = (seconds % 3600) // 60
    remaining_seconds = seconds % 60

    if use_days:
        return "{} days, {:02d}:{:02d}:{:02d}".format(
            days, hours, minutes, remaining_seconds
        )
    return "{:02d}:{:02d}:{:02d}".format(hours + days * 24, minutes, remaining_seconds)


def extract(line):
    if line.rstrip().endswith("*"):
        pipe_index = line.find("|")
        if pipe_index != -1 and len(line) > pipe_index + 8:
            value = line[pipe_index + 1 : pipe_index + 9]
            return value
    return None


def read_and_extract(file_path):
    with open(file_path, "r") as file:
        for line in file:
            value = extract(line)
            if value:
                return value
    return None


def get_work_period_totals(start, end):
    """Get period totals grouped by task with full path."""
    # Get all tasks with time in the period
    pipeline = [
        {
            "$match": {
                "date": {"$gte": start, "$lte": end},
                "workspace": {"$in": ["Plan", "Think", "Work"]},
                "task": {"$exists": True, "$ne": None}
            }
        },
        {
            "$group": {
                "_id": "$task",
                "total_seconds": {"$sum": "$delta"}
            }
        }
    ]

    results = list(switches.aggregate(pipeline))
    combined_rows = []

    for result in results:
        task_id = result["_id"]
        total_seconds = result["total_seconds"]

        if total_seconds > 0:
            # Get task path with history fallback
            task_path = get_task_path(task_id)

            combined_rows.append({
                "ws": task_path,
                "total": convert_seconds(total_seconds)
            })

    # Sort by path for consistency
    combined_rows.sort(key=lambda x: x["ws"])
    return combined_rows


def get_task_blocks_calendar(start, end, task=None, min_block_seconds=300):
    """
    Get task blocks for calendar-style visualization.
    Groups consecutive switches to the same task into blocks, tracking active/idle time.

    Returns list of blocks:
    [{
        'task_id': str,
        'task_path': str,
        'start': datetime,
        'end': datetime,
        'duration': int (total seconds),
        'active_seconds': int (Plan/Think/Work time),
        'idle_seconds': int (Other/Away time),
        'active_ratio': float (0.0 to 1.0)
    }, ...]
    """
    task_query = {"$in": task.split(",")} if task else {}

    match_query = {"date": {"$gte": start, "$lte": end}}
    if task_query:
        match_query["task"] = task_query

    # Get all switches in period, sorted by date
    raw_switches = list(switches.find(match_query).sort("date", 1))

    if not raw_switches:
        return []

    blocks = []
    current_block = None

    for switch in raw_switches:
        ws = switch["workspace"]
        task_id = switch.get("task")
        switch_start = switch["date"].replace(tzinfo=utctz).astimezone(timezone)
        switch_duration = switch["delta"]
        switch_end = switch_start + timedelta(seconds=switch_duration)

        is_active = ws in ["Plan", "Think", "Work"]

        # Start new block if task changed
        if current_block is None or current_block["task_id"] != task_id:
            if current_block is not None:
                blocks.append(current_block)

            # Get task path with history fallback
            task_path = get_task_path(task_id) or "No Task"

            current_block = {
                "task_id": task_id,
                "task_path": task_path,
                "start": switch_start,
                "end": switch_end,
                "duration": switch_duration,
                "active_seconds": switch_duration if is_active else 0,
                "idle_seconds": 0 if is_active else switch_duration
            }
        else:
            # Extend current block
            current_block["end"] = switch_end
            current_block["duration"] += switch_duration
            if is_active:
                current_block["active_seconds"] += switch_duration
            else:
                current_block["idle_seconds"] += switch_duration

    # Add final block
    if current_block is not None:
        blocks.append(current_block)

    # Filter out very short blocks and calculate active ratio
    filtered_blocks = []
    for block in blocks:
        if block["duration"] >= min_block_seconds:
            block["active_ratio"] = block["active_seconds"] / block["duration"] if block["duration"] > 0 else 0
            filtered_blocks.append(block)

    return filtered_blocks


def get_raw_switches(start, end, task=None):
    """
    Get all raw switch documents in the period.

    Returns list of switches:
    [{
        'workspace': str,
        'task_id': str,
        'task_path': str,
        'date': datetime,
        'delta': int (seconds)
    }, ...]
    """
    task_query = {"$in": task.split(",")} if task else {}

    match_query = {"date": {"$gte": start, "$lte": end}}
    if task_query:
        match_query["task"] = task_query

    raw_switches = list(switches.find(match_query).sort("date", 1))

    result = []
    for switch in raw_switches:
        task_id = switch.get("task")
        # Get task path with history fallback
        task_path = get_task_path(task_id) or "No Task"

        result.append({
            "workspace": switch["workspace"],
            "task_id": task_id,
            "task_path": task_path,
            "date": switch["date"].replace(tzinfo=utctz).astimezone(timezone),
            "delta": switch["delta"]
        })

    return result


def get_period_totals(start, end, task=None):
    task_query = {"$in": task.split(",")} if task else {}

    match_query = {"date": {"$gte": start, "$lte": end}}
    if task_query:
        match_query["task"] = task_query

    pipeline = [
        {"$match": match_query},
        {"$sort": {"date": 1}},
        {
            "$group": {
                "_id": None,
                "documents_in_range": {"$push": "$$ROOT"},
                "first_doc": {"$first": "$$ROOT"},
                "last_doc": {"$last": "$$ROOT"},
            }
        },
        {
            "$lookup": {
                "from": "switch",
                "let": {"first_date": "$first_doc.date", "task": "$first_doc.task"},
                "pipeline": [
                    {
                        "$match": {
                            "$expr": {
                                "$and": [
                                    {
                                        "$lt": ["$date", "$$first_date"]
                                    },  # Only before the first date
                                    {
                                        "$eq": ["$task", "$$task"]
                                    },  # Must have the same task
                                ]
                            }
                        }
                    },
                    {"$sort": {"date": -1}},  # Get the most recent (closest) document
                    {"$limit": 1},  # Only the immediate previous document
                ],
                "as": "before_first",
            }
        },
        {
            "$project": {
                "documents": {
                    "$concatArrays": [
                        {"$ifNull": ["$before_first", []]},  # Add only if found
                        "$documents_in_range",
                    ]
                }
            }
        },
        {"$unwind": "$documents"},
        {"$replaceRoot": {"newRoot": "$documents"}},
        {
            "$group": {
                "_id": "$workspace",
                "total": {"$sum": "$delta"},
            }
        },
    ]

    results = list(switches.aggregate(pipeline))

    if not results:
        return [{"ws": "No Data", "total": ""}]

    pipeline_before_after = [
        # Match documents within the date range
        {"$match": match_query},
        {"$sort": {"date": 1}},
        {
            "$group": {
                "_id": None,
                "first_doc": {"$first": "$$ROOT"},
                "last_doc": {"$last": "$$ROOT"},
            }
        },
        # Lookup to get one document before the first document in the range
        {
            "$lookup": {
                "from": "switch",
                "let": {"first_date": "$first_doc.date", "task": "$first_doc.task"},
                "pipeline": [
                    {
                        "$match": {
                            "$expr": {
                                "$and": [
                                    {
                                        "$lt": ["$date", "$$first_date"]
                                    },  # Only before the first date
                                    {
                                        "$eq": ["$task", "$$task"]
                                    },  # Must have the same task
                                ]
                            }
                        }
                    },
                    {"$sort": {"date": -1}},  # Get the most recent (closest) document
                    {"$limit": 1},  # Only the immediate previous document
                ],
                "as": "before_first",
            }
        },
        {
            "$project": {
                "before_first": {
                    "$ifNull": [{"$arrayElemAt": ["$before_first", 0]}, ""]
                },
                "last_doc": "$last_doc",  # Include the last_doc from the matched period
            }
        },
    ]

    aux_results = list(switches.aggregate(pipeline_before_after))

    # Safety check: if aux_results is empty, return early with no data
    if not aux_results:
        return [{"ws": "No Data", "total": ""}]

    bfirst = aux_results[0]["before_first"]
    start_delta = 0

    if bfirst:
        bfdate = bfirst["date"].replace(tzinfo=utctz)
        time_since_bfirst = round((start - bfdate.astimezone(timezone)).total_seconds())

        # Only apply start_delta if the before_first switch actually crosses into the period
        # If time_since_bfirst > bfirst["delta"], the switch ended before the period started
        if time_since_bfirst <= bfirst["delta"]:
            start_delta = time_since_bfirst

    ldoc = aux_results[0]["last_doc"]
    lastdate = ldoc["date"].replace(tzinfo=utctz)
    end_delta = round((end - lastdate.astimezone(timezone)).total_seconds())

    rows = []
    active_vs_idle = {"Active": 0, "Idle": 0}

    for result in results:
        if bfirst:
            if result["_id"] == bfirst["workspace"]:
                # Safety: ensure start_delta doesn't exceed total
                adjustment = min(start_delta, result["total"])
                result["total"] -= adjustment

        if end < now():
            if result["_id"] == ldoc["workspace"]:
                # Safety: ensure we don't subtract more than the total
                adjustment = ldoc["delta"] - end_delta
                safe_adjustment = min(adjustment, result["total"])
                result["total"] -= safe_adjustment

    for result in results:
        if result["total"] > 0:
            rows.append(
                {"ws": result["_id"], "total": convert_seconds(result["total"])}
            )
            if result["_id"] in ["Think", "Plan", "Work"]:
                active_vs_idle["Active"] += result["total"]
            if result["_id"] in ["Away", "Other"]:
                active_vs_idle["Idle"] += result["total"]

    order = ["Plan", "Think", "Work", "Other", "Away", "Active", "Idle"]

    rows = sorted(rows, key=lambda x: order.index(x["ws"]))

    for k, v in active_vs_idle.items():
        rows.append({"ws": k, "total": convert_seconds(v)})

    return rows


# print(
#     get_period_totals(
#         datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone)
#         - timedelta(days=1),
#         datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone)
#         - timedelta(days=1),
#         # "ffbe198e",
#     )
# )

# print(
#     get_period_totals(
#         datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone),
#         datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone),
#         "5fc751ec",
#     )
# )