From 3f7667016925bb45ec3fd9352de00a45c2facd4c Mon Sep 17 00:00:00 2001 From: buenosairesam Date: Fri, 3 Apr 2026 09:28:03 -0300 Subject: [PATCH] ffmpeg fix --- cht/scrub/proxy.py | 4 +- cht/stream/ffmpeg.py | 24 +++++------- cht/stream/manager.py | 20 +++++++++- cht/telemetry.py | 88 +++++++++++++++++++++++++++++++++++++++++++ cht/window.py | 21 +++++++++-- 5 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 cht/telemetry.py diff --git a/cht/scrub/proxy.py b/cht/scrub/proxy.py index aa2a356..341b655 100644 --- a/cht/scrub/proxy.py +++ b/cht/scrub/proxy.py @@ -15,7 +15,9 @@ import ffmpeg as ffmpeg_lib log = logging.getLogger(__name__) -PROXY_DIR = Path("/tmp/cht_proxy") +from cht.config import DATA_DIR + +PROXY_DIR = DATA_DIR / "proxies" PROXY_HEIGHT = 360 # pixels — low enough for speed, high enough to see content diff --git a/cht/stream/ffmpeg.py b/cht/stream/ffmpeg.py index 7da0394..c9e129d 100644 --- a/cht/stream/ffmpeg.py +++ b/cht/stream/ffmpeg.py @@ -72,27 +72,23 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10, meaningfully vs the previous frame. No periodic fallback so static content produces no spurious frames. - start_time/duration: applied via the select filter expression (NOT as -ss/-t - input options, which break h264 scene detection on MKV). + Uses -ss input seeking for O(1) startup regardless of file size. + pts_time in showinfo output is relative to the seek point. Returns (stdout, stderr) as decoded strings for timestamp parsing. """ scene_expr = f"gt(scene,{scene_threshold})" - # Add time range filter if specified (incremental processing) - time_conditions = [] - if start_time > 0: - time_conditions.append(f"gte(t,{start_time})") + # With -ss input seeking, t starts at 0 from the seek point. + # Only need end boundary (duration), start is handled by -ss. if duration is not None: - time_conditions.append(f"lte(t,{start_time + duration})") + scene_expr = f"({scene_expr})*lte(t,{duration})" - if time_conditions: - time_filter = "*".join(time_conditions) - select_expr = f"({scene_expr})*{time_filter}" - else: - select_expr = scene_expr + input_kwargs = {} + if start_time > 0: + input_kwargs["ss"] = start_time - stream = ffmpeg.input(str(input_path)) - stream = stream.filter("select", select_expr).filter("showinfo") + stream = ffmpeg.input(str(input_path), **input_kwargs) + stream = stream.filter("select", scene_expr).filter("showinfo") output = ( ffmpeg.output( diff --git a/cht/stream/manager.py b/cht/stream/manager.py index adac966..5487cda 100644 --- a/cht/stream/manager.py +++ b/cht/stream/manager.py @@ -68,6 +68,7 @@ class StreamManager: self._segment_offsets = {0: 0.0} # segment_index → global_offset self.scene_threshold = SCENE_THRESHOLD self.readonly = False # True when loaded from existing session + self.telemetry = None # set by window after start log.info("Session: %s", session_id) @classmethod @@ -306,6 +307,8 @@ class StreamManager: def _detect_scenes(self, start_time, end_time): """Run ffmpeg scene detection on a time range. Returns list of new frame entries.""" + import time as _time + t0 = _time.monotonic() duration = end_time - start_time start_number = self._next_frame_number() @@ -335,7 +338,8 @@ class StreamManager: continue pts_match = re.search(r"pts_time:\s*([\d.]+)", line) if pts_match: - pts_time = float(pts_match.group(1)) + # pts_time is relative to -ss seek point, add start_time for local offset + pts_time = float(pts_match.group(1)) + start_time frame_id = f"F{frame_num:04d}" frame_path = self.frames_dir / f"{frame_id}.jpg" if frame_path.exists(): @@ -350,6 +354,20 @@ class StreamManager: frame_num += 1 index_path.write_text(json.dumps(index, indent=2)) + + elapsed_ms = (_time.monotonic() - t0) * 1000 + tel = getattr(self, "telemetry", None) + if tel: + tel.metric("scene_detection", { + "start": start_time, "end": end_time, + "duration": duration, + "frames_found": len(new_frames), + "total_frames": len(index), + "threshold": self.scene_threshold, + "elapsed_ms": round(elapsed_ms), + "file_duration": self._estimate_safe_duration() or 0, + }) + return new_frames def capture_now(self, on_new_frames=None): diff --git a/cht/telemetry.py b/cht/telemetry.py new file mode 100644 index 0000000..fc4ecd1 --- /dev/null +++ b/cht/telemetry.py @@ -0,0 +1,88 @@ +"""Session telemetry — lightweight event/metric log for post-run analysis. + +Writes a JSON-lines file (one event per line) to the session directory. +Each event has a timestamp, type, and payload. Designed to be grep-friendly. + +Usage: + tel = Telemetry(session_dir) + tel.event("scene_threshold_changed", {"from": 0.10, "to": 0.15}) + tel.metric("scene_detection", {"start": 120.0, "end": 135.0, "frames_found": 3, "elapsed_ms": 1200}) + tel.close() +""" + +import json +import logging +import time +from pathlib import Path + +log = logging.getLogger(__name__) + + +class Telemetry: + def __init__(self, session_dir: Path): + self._path = session_dir / "telemetry.jsonl" + self._start = time.monotonic() + self._wall_start = time.time() + self._file = None + self._log_handler = None + try: + session_dir.mkdir(parents=True, exist_ok=True) + self._file = open(self._path, "a") + except Exception as e: + log.warning("Telemetry init failed: %s", e) + + # Also save full logs to session directory + try: + log_path = session_dir / "session.log" + handler = logging.FileHandler(str(log_path), mode="a") + handler.setLevel(logging.DEBUG) + handler.setFormatter(logging.Formatter( + "%(asctime)s %(levelname)-7s %(name)s: %(message)s", + datefmt="%H:%M:%S", + )) + logging.getLogger().addHandler(handler) + self._log_handler = handler + except Exception as e: + log.warning("Log file handler failed: %s", e) + + self.event("session_start", {"session_dir": str(session_dir)}) + + def event(self, name: str, data: dict | None = None) -> None: + """Log a discrete event (setting change, mode switch, user action).""" + self._write("event", name, data or {}) + + def metric(self, name: str, data: dict) -> None: + """Log a measurement (processing time, frame count, etc).""" + self._write("metric", name, data) + + def _write(self, kind: str, name: str, data: dict) -> None: + if not self._file: + return + entry = { + "t": round(time.monotonic() - self._start, 3), + "wall": round(time.time(), 3), + "kind": kind, + "name": name, + **data, + } + try: + self._file.write(json.dumps(entry) + "\n") + self._file.flush() + except Exception: + pass + + def close(self) -> None: + self.event("session_end", {}) + if self._log_handler: + logging.getLogger().removeHandler(self._log_handler) + try: + self._log_handler.close() + except Exception: + pass + self._log_handler = None + if self._file: + try: + self._file.close() + except Exception: + pass + self._file = None diff --git a/cht/window.py b/cht/window.py index 74b72ef..998464c 100644 --- a/cht/window.py +++ b/cht/window.py @@ -28,6 +28,7 @@ from cht.ui.session_dialog import SessionDialog from cht.session import load_frame_index, load_segment_manifest, rebuild_manifest, global_time_to_segment from cht.scrub.manager import ProxyManager from cht.agent.runner import AgentRunner, check_claude_cli +from cht.telemetry import Telemetry log = logging.getLogger(__name__) @@ -42,6 +43,7 @@ class ChtWindow(Adw.ApplicationWindow): self._manifest = [] self._pending_scrub_global = 0.0 self._scrub_pending = False # throttle flag for scrub updates + self._telemetry = None # Core components self._timeline = Timeline() @@ -158,11 +160,17 @@ class ChtWindow(Adw.ApplicationWindow): def _on_scene_threshold(self, val): if self._lifecycle.stream_mgr: + old = self._lifecycle.stream_mgr.scene_threshold self._lifecycle.stream_mgr.scene_threshold = val + if self._telemetry: + self._telemetry.event("scene_threshold_changed", {"from": old, "to": val}) def _on_min_chunk_changed(self, panel, val): import cht.config + old = cht.config.TRANSCRIBE_MIN_CHUNK_S cht.config.TRANSCRIBE_MIN_CHUNK_S = val + if self._telemetry: + self._telemetry.event("min_chunk_changed", {"from": old, "to": val}) def _on_lines_per_group_changed(self, panel, val): import cht.config @@ -268,6 +276,8 @@ class ChtWindow(Adw.ApplicationWindow): self._connect_btn.add_css_class("destructive-action") mgr = self._lifecycle.start(session_id=session_id) + self._telemetry = Telemetry(mgr.session_dir) + mgr.telemetry = self._telemetry self._monitor.set_recording(mgr.recording_path) self._monitor.set_live_source(mgr.relay_url) @@ -285,12 +295,13 @@ class ChtWindow(Adw.ApplicationWindow): def _on_live_toggle(self): if self._timeline.state.live: - # Live → Scrub: don't load growing MKV, let user pick a segment + if self._telemetry: + self._telemetry.event("mode_switch", {"from": "live", "to": "scrub"}) self._timeline.toggle_live(live_player_pos=self._monitor.get_live_position()) - # Refresh manifest so scrub bar shows completed segments self._update_scrub_bar_manifest() else: - # Scrub → Live: restore recording path, refresh GUI, resume + if self._telemetry: + self._telemetry.event("mode_switch", {"from": "scrub", "to": "live"}) mgr = self._lifecycle.stream_mgr if mgr: self._monitor.set_recording(mgr.recording_path) @@ -442,6 +453,10 @@ class ChtWindow(Adw.ApplicationWindow): mgr = self._lifecycle.stream_mgr last_session_id = mgr.session_id if mgr and not mgr.readonly else None + if self._telemetry: + self._telemetry.close() + self._telemetry = None + self._lifecycle.stop() if self._proxy_mgr: