Files
deskmeter/dmapp/dmweb/get_period_times.py
2025-12-19 21:08:39 -03:00

551 lines
16 KiB
Python

from collections import Counter, defaultdict
from datetime import datetime, timedelta
from pathlib import Path
from pymongo import MongoClient
from zoneinfo import ZoneInfo
timezone = ZoneInfo("America/Argentina/Buenos_Aires")
utctz = ZoneInfo("UTC")
client = MongoClient()
db = client.deskmeter
switches = db.switch
tasks = db.task
task_history = db.task_history
task_file = "/home/mariano/LETRAS/adm/task/main"
task_dir = Path(task_file).parent
def parse_task_line(line):
"""Parse a task line to extract task name and ID."""
line = line.strip()
if not line:
return None, None
parts = line.split("|")
if len(parts) > 1:
task_name = parts[0].strip()
id_parts = parts[1].split()
if id_parts:
task_id = id_parts[0].strip()
return task_name, task_id
return task_name, None
return parts[0].strip(), None
def load_task_from_files(task_id):
"""Search task directory files for a task ID and load it into task_history."""
for task_filepath in task_dir.glob("*"):
if not task_filepath.is_file():
continue
current_path = []
try:
with open(task_filepath, "r") as f:
for line in f:
if not line.strip():
continue
indent = len(line) - len(line.lstrip())
level = indent // 4
task_name, found_id = parse_task_line(line)
if task_name is None:
continue
current_path = current_path[:level]
current_path.append(task_name)
full_path = "/".join(current_path)
if found_id == task_id:
# Found it! Insert into task_history
task_history.update_one(
{"task_id": task_id},
{"$set": {
"path": full_path,
"task_id": task_id,
"source_file": task_filepath.name
}},
upsert=True
)
return full_path
except:
# Skip files that can't be read
continue
return None
def get_task_path(task_id):
"""
Get task path from tasks collection, falling back to task_history.
If not found, searches task directory files and populates task_history on-demand.
"""
if not task_id:
return None
# Try current tasks first
task_doc = tasks.find_one({"task_id": task_id})
if task_doc and "path" in task_doc:
return task_doc["path"]
# Try task history cache
task_doc = task_history.find_one({"task_id": task_id})
if task_doc and "path" in task_doc:
return task_doc["path"]
# Not in cache, search files and populate history
task_path = load_task_from_files(task_id)
if task_path:
return task_path
# Still not found, return ID as fallback
return task_id
def get_current_task_info():
"""Get current task ID and path from state and tasks collection"""
states = db.state
current_doc = states.find_one({"_id": "current"})
if not current_doc or "task" not in current_doc:
return None, None
task_id = current_doc["task"]
task_doc = tasks.find_one({"task_id": task_id})
if task_doc and "path" in task_doc:
return task_id, task_doc["path"]
return task_id, None
def get_task_time_seconds(start, end, task_id, workspaces=None):
"""Get total seconds for a task within a time period using MongoDB aggregation."""
if workspaces is None:
workspaces = ["Plan", "Think", "Work"]
pipeline = [
{
"$match": {
"date": {"$gte": start, "$lte": end},
"task": task_id,
"workspace": {"$in": workspaces}
}
},
{
"$group": {
"_id": None,
"total_seconds": {"$sum": "$delta"}
}
}
]
result = list(switches.aggregate(pipeline))
if result and len(result) > 0:
return result[0]["total_seconds"]
return 0
def task_or_none(task=None):
if not task:
task = read_and_extract(task_file)
if task == "all":
task = None
return task
def now():
return datetime.now(timezone)
def convert_seconds(seconds, use_days=False):
days = seconds // 86400
hours = (seconds % 86400) // 3600
minutes = (seconds % 3600) // 60
remaining_seconds = seconds % 60
if use_days:
return "{} days, {:02d}:{:02d}:{:02d}".format(
days, hours, minutes, remaining_seconds
)
return "{:02d}:{:02d}:{:02d}".format(hours + days * 24, minutes, remaining_seconds)
def extract(line):
if line.rstrip().endswith("*"):
pipe_index = line.find("|")
if pipe_index != -1 and len(line) > pipe_index + 8:
value = line[pipe_index + 1 : pipe_index + 9]
return value
return None
def read_and_extract(file_path):
with open(file_path, "r") as file:
for line in file:
value = extract(line)
if value:
return value
return None
def get_work_period_totals(start, end):
"""Get period totals grouped by task with full path."""
# Get all tasks with time in the period
pipeline = [
{
"$match": {
"date": {"$gte": start, "$lte": end},
"workspace": {"$in": ["Plan", "Think", "Work"]},
"task": {"$exists": True, "$ne": None}
}
},
{
"$group": {
"_id": "$task",
"total_seconds": {"$sum": "$delta"}
}
}
]
results = list(switches.aggregate(pipeline))
combined_rows = []
for result in results:
task_id = result["_id"]
total_seconds = result["total_seconds"]
if total_seconds > 0:
# Get task path with history fallback
task_path = get_task_path(task_id)
combined_rows.append({
"ws": task_path,
"total": convert_seconds(total_seconds)
})
# Sort by path for consistency
combined_rows.sort(key=lambda x: x["ws"])
return combined_rows
def get_task_blocks_calendar(start, end, task=None, min_block_seconds=300):
"""
Get task blocks for calendar-style visualization.
Groups consecutive switches to the same task into blocks, tracking active/idle time.
Returns list of blocks:
[{
'task_id': str,
'task_path': str,
'start': datetime,
'end': datetime,
'duration': int (total seconds),
'active_seconds': int (Plan/Think/Work time),
'idle_seconds': int (Other/Away time),
'active_ratio': float (0.0 to 1.0)
}, ...]
"""
task_query = {"$in": task.split(",")} if task else {}
match_query = {"date": {"$gte": start, "$lte": end}}
if task_query:
match_query["task"] = task_query
# Get all switches in period, sorted by date
raw_switches = list(switches.find(match_query).sort("date", 1))
if not raw_switches:
return []
blocks = []
current_block = None
for switch in raw_switches:
ws = switch["workspace"]
task_id = switch.get("task")
switch_start = switch["date"].replace(tzinfo=utctz).astimezone(timezone)
switch_duration = switch["delta"]
switch_end = switch_start + timedelta(seconds=switch_duration)
is_active = ws in ["Plan", "Think", "Work"]
# Start new block if task changed
if current_block is None or current_block["task_id"] != task_id:
if current_block is not None:
blocks.append(current_block)
# Get task path with history fallback
task_path = get_task_path(task_id) or "No Task"
current_block = {
"task_id": task_id,
"task_path": task_path,
"start": switch_start,
"end": switch_end,
"duration": switch_duration,
"active_seconds": switch_duration if is_active else 0,
"idle_seconds": 0 if is_active else switch_duration
}
else:
# Extend current block
current_block["end"] = switch_end
current_block["duration"] += switch_duration
if is_active:
current_block["active_seconds"] += switch_duration
else:
current_block["idle_seconds"] += switch_duration
# Add final block
if current_block is not None:
blocks.append(current_block)
# Filter out very short blocks and calculate active ratio
filtered_blocks = []
for block in blocks:
if block["duration"] >= min_block_seconds:
block["active_ratio"] = block["active_seconds"] / block["duration"] if block["duration"] > 0 else 0
filtered_blocks.append(block)
return filtered_blocks
def get_raw_switches(start, end, task=None):
"""
Get all raw switch documents in the period.
Returns list of switches:
[{
'workspace': str,
'task_id': str,
'task_path': str,
'date': datetime,
'delta': int (seconds)
}, ...]
"""
task_query = {"$in": task.split(",")} if task else {}
match_query = {"date": {"$gte": start, "$lte": end}}
if task_query:
match_query["task"] = task_query
raw_switches = list(switches.find(match_query).sort("date", 1))
result = []
for switch in raw_switches:
task_id = switch.get("task")
# Get task path with history fallback
task_path = get_task_path(task_id) or "No Task"
result.append({
"workspace": switch["workspace"],
"task_id": task_id,
"task_path": task_path,
"date": switch["date"].replace(tzinfo=utctz).astimezone(timezone),
"delta": switch["delta"]
})
return result
def get_period_totals(start, end, task=None):
task_query = {"$in": task.split(",")} if task else {}
match_query = {"date": {"$gte": start, "$lte": end}}
if task_query:
match_query["task"] = task_query
pipeline = [
{"$match": match_query},
{"$sort": {"date": 1}},
{
"$group": {
"_id": None,
"documents_in_range": {"$push": "$$ROOT"},
"first_doc": {"$first": "$$ROOT"},
"last_doc": {"$last": "$$ROOT"},
}
},
{
"$lookup": {
"from": "switch",
"let": {"first_date": "$first_doc.date", "task": "$first_doc.task"},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$lt": ["$date", "$$first_date"]
}, # Only before the first date
{
"$eq": ["$task", "$$task"]
}, # Must have the same task
]
}
}
},
{"$sort": {"date": -1}}, # Get the most recent (closest) document
{"$limit": 1}, # Only the immediate previous document
],
"as": "before_first",
}
},
{
"$project": {
"documents": {
"$concatArrays": [
{"$ifNull": ["$before_first", []]}, # Add only if found
"$documents_in_range",
]
}
}
},
{"$unwind": "$documents"},
{"$replaceRoot": {"newRoot": "$documents"}},
{
"$group": {
"_id": "$workspace",
"total": {"$sum": "$delta"},
}
},
]
results = list(switches.aggregate(pipeline))
if not results:
return [{"ws": "No Data", "total": ""}]
pipeline_before_after = [
# Match documents within the date range
{"$match": match_query},
{"$sort": {"date": 1}},
{
"$group": {
"_id": None,
"first_doc": {"$first": "$$ROOT"},
"last_doc": {"$last": "$$ROOT"},
}
},
# Lookup to get one document before the first document in the range
{
"$lookup": {
"from": "switch",
"let": {"first_date": "$first_doc.date", "task": "$first_doc.task"},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$lt": ["$date", "$$first_date"]
}, # Only before the first date
{
"$eq": ["$task", "$$task"]
}, # Must have the same task
]
}
}
},
{"$sort": {"date": -1}}, # Get the most recent (closest) document
{"$limit": 1}, # Only the immediate previous document
],
"as": "before_first",
}
},
{
"$project": {
"before_first": {
"$ifNull": [{"$arrayElemAt": ["$before_first", 0]}, ""]
},
"last_doc": "$last_doc", # Include the last_doc from the matched period
}
},
]
aux_results = list(switches.aggregate(pipeline_before_after))
# Safety check: if aux_results is empty, return early with no data
if not aux_results:
return [{"ws": "No Data", "total": ""}]
bfirst = aux_results[0]["before_first"]
start_delta = 0
if bfirst:
bfdate = bfirst["date"].replace(tzinfo=utctz)
time_since_bfirst = round((start - bfdate.astimezone(timezone)).total_seconds())
# Only apply start_delta if the before_first switch actually crosses into the period
# If time_since_bfirst > bfirst["delta"], the switch ended before the period started
if time_since_bfirst <= bfirst["delta"]:
start_delta = time_since_bfirst
ldoc = aux_results[0]["last_doc"]
lastdate = ldoc["date"].replace(tzinfo=utctz)
end_delta = round((end - lastdate.astimezone(timezone)).total_seconds())
rows = []
active_vs_idle = {"Active": 0, "Idle": 0}
for result in results:
if bfirst:
if result["_id"] == bfirst["workspace"]:
# Safety: ensure start_delta doesn't exceed total
adjustment = min(start_delta, result["total"])
result["total"] -= adjustment
if end < now():
if result["_id"] == ldoc["workspace"]:
# Safety: ensure we don't subtract more than the total
adjustment = ldoc["delta"] - end_delta
safe_adjustment = min(adjustment, result["total"])
result["total"] -= safe_adjustment
for result in results:
if result["total"] > 0:
rows.append(
{"ws": result["_id"], "total": convert_seconds(result["total"])}
)
if result["_id"] in ["Think", "Plan", "Work"]:
active_vs_idle["Active"] += result["total"]
if result["_id"] in ["Away", "Other"]:
active_vs_idle["Idle"] += result["total"]
order = ["Plan", "Think", "Work", "Other", "Away", "Active", "Idle"]
rows = sorted(rows, key=lambda x: order.index(x["ws"]))
for k, v in active_vs_idle.items():
rows.append({"ws": k, "total": convert_seconds(v)})
return rows
# print(
# get_period_totals(
# datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone)
# - timedelta(days=1),
# datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone)
# - timedelta(days=1),
# # "ffbe198e",
# )
# )
# print(
# get_period_totals(
# datetime.today().replace(hour=0, minute=0, second=0, tzinfo=timezone),
# datetime.today().replace(hour=23, minute=59, second=59, tzinfo=timezone),
# "5fc751ec",
# )
# )