mitus/cht/stream/manager.py

"""
StreamManager: orchestrates ffmpeg for recording and scene detection.

Architecture:
  sender → TCP:4444 → ffmpeg (writes recording.ts)
  recording.ts → mpv (plays via Timeline)
  recording.ts → ffmpeg scene detection (periodic, incremental)
"""

import json
import logging
import re
import time
from pathlib import Path
from threading import Thread

from cht.config import (
    STREAM_HOST,
    STREAM_PORT,
    RELAY_PORT,
    SCENE_RELAY_PORT,
    SCENE_THRESHOLD,
    SESSIONS_DIR,
    AUDIO_EXTRACT_INTERVAL,
    AUDIO_SAFETY_MARGIN,
)
from cht.stream import ffmpeg as ff

log = logging.getLogger(__name__)


def list_sessions():
    """Return list of (session_id, session_dir) sorted newest first."""
    if not SESSIONS_DIR.exists():
        return []
    sessions = []
    for d in sorted(SESSIONS_DIR.iterdir(), reverse=True):
        if d.is_dir() and (d / "frames").exists():
            sessions.append((d.name, d))
    return sessions


def delete_sessions(session_ids):
    """Delete session directories by ID."""
    import shutil
    for sid in session_ids:
        path = SESSIONS_DIR / sid
        if path.exists() and path.is_dir():
            shutil.rmtree(path)
            log.info("Deleted session: %s", sid)


class StreamManager:
    def __init__(self, session_id=None):
        if session_id is None:
            session_id = time.strftime("%Y%m%d_%H%M%S")
        self.session_id = session_id
        self.session_dir = SESSIONS_DIR / session_id
        self.stream_dir = self.session_dir / "stream"
        self.frames_dir = self.session_dir / "frames"
        self.transcript_dir = self.session_dir / "transcript"
        self.audio_dir = self.session_dir / "audio"
        self.agent_dir = self.session_dir / "agent"

        self._procs = {}
        self._threads = {}
        self._stop_flags = set()
        self._segment = 0
        self._segment_offsets = {0: 0.0}  # segment_index → global_offset
        self.scene_threshold = SCENE_THRESHOLD
        self.readonly = False  # True when loaded from existing session
        self.telemetry = None  # set by window after start
        log.info("Session: %s", session_id)

    @classmethod
    def from_existing(cls, session_id):
        """Load an existing session without starting any ffmpeg processes."""
        from cht.session import rebuild_manifest
        mgr = cls(session_id=session_id)
        if not mgr.session_dir.exists():
            raise FileNotFoundError(f"Session not found: {session_id}")
        mgr.readonly = True
        # Point _segment to last recording segment
        segments = mgr.recording_segments
        if segments:
            mgr._segment = len(segments) - 1
        mgr._rebuild_offsets()
        rebuild_manifest(mgr.session_dir)
        log.info("Loaded existing session: %s (%d segments, %d frames)",
                 session_id, len(segments), mgr.frame_count)
        return mgr

    @property
    def current_global_offset(self) -> float:
        """Global time offset for the current recording segment."""
        return self._segment_offsets.get(self._segment, 0.0)

    def _rebuild_offsets(self):
        """Compute global offsets from all segments on disk."""
        from cht.session import probe_duration
        offset = 0.0
        self._segment_offsets = {}
        for i, seg in enumerate(self.recording_segments):
            self._segment_offsets[i] = offset
            offset += probe_duration(seg)

    def _advance_segment_offset(self, completed_segment_path):
        """Update offsets after a segment completes and a new one begins."""
        from cht.session import probe_duration
        dur = probe_duration(completed_segment_path)
        prev_offset = self._segment_offsets.get(self._segment, 0.0)
        self._segment_offsets[self._segment + 1] = prev_offset + dur
        log.info("Segment %d completed (%.1fs), next offset: %.1fs",
                 self._segment, dur, prev_offset + dur)

    @property
    def frame_count(self):
        index_path = self.frames_dir / "index.json"
        if index_path.exists():
            try:
                return len(json.loads(index_path.read_text()))
            except Exception:
                pass
        return 0

    def total_duration(self):
        """Probe total duration across all segments (for completed sessions)."""
        total = 0.0
        for seg in self.recording_segments:
            try:
                import ffmpeg as ffmpeg_lib
                info = ffmpeg_lib.probe(str(seg))
                dur = float(info.get("format", {}).get("duration", 0))
                if dur <= 0:
                    for s in info.get("streams", []):
                        sdur = float(s.get("duration", 0))
                        if sdur > 0:
                            dur = sdur
                            break
                if dur <= 0:
                    dur = seg.stat().st_size / 65_000
                total += dur
            except Exception:
                total += seg.stat().st_size / 65_000
        return total

    def setup_dirs(self):
        for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.audio_dir, self.agent_dir):
            d.mkdir(parents=True, exist_ok=True)

    @property
    def stream_url(self):
        return f"tcp://{STREAM_HOST}:{STREAM_PORT}?listen"

    @property
    def relay_url(self):
        return f"udp://127.0.0.1:{RELAY_PORT}"

    @property
    def scene_relay_url(self):
        return f"udp://127.0.0.1:{SCENE_RELAY_PORT}"

    @property
    def recording_path(self):
        """Current recording segment path."""
        return self.stream_dir / f"recording_{self._segment:03d}.mp4"

    @property
    def recording_segments(self):
        """All recording segments in order."""
        return sorted(self.stream_dir.glob("recording_*.mp4"))

    # -- Recording --

    def start_recorder(self):
        """Start ffmpeg to receive TCP stream, write to fMP4, and relay to UDP."""
        # Start after existing segments (for resumed sessions)
        existing = self.recording_segments
        self._segment = len(existing)
        self._rebuild_offsets()
        self._launch_recorder()

    def restart_recorder(self):
        """Restart recorder into a new segment. Session stays alive."""
        old = self._procs.pop("recorder", None)
        if old:
            ff.stop_proc(old)
        completed_path = self.recording_path
        self._advance_segment_offset(completed_path)
        self._segment += 1
        log.info("Restarting recorder → segment %d (offset %.1fs)",
                 self._segment, self.current_global_offset)
        self._launch_recorder()

    def recorder_alive(self):
        """Check if the recorder process is still running."""
        proc = self._procs.get("recorder")
        return proc is not None and proc.poll() is None

    def _launch_recorder(self):
        node = ff.receive_record_and_relay(
            self.stream_url, self.recording_path, self.relay_url,
        )
        proc = ff.run_async(node, pipe_stderr=True)
        self._procs["recorder"] = proc
        log.info("Recorder: pid=%s → %s", proc.pid, self.recording_path)
        self._start_stderr_reader("recorder", proc)

    # -- Scene Detection --

    def start_scene_detector(self, on_new_frames=None):
        """Periodically run scene detection on new portions of the recording.

        Args:
            on_new_frames: callback(list of {id, timestamp, path}) for new frames
        """
        self._on_new_frames = on_new_frames

        def _detect():
            processed_time = 0.0
            current_segment = None
            last_threshold = self.scene_threshold

            while "stop" not in self._stop_flags:
                time.sleep(1.0)

                # Threshold changed — reset to re-process recent content
                if self.scene_threshold != last_threshold:
                    log.info("Threshold changed %.2f → %.2f, resetting",
                             last_threshold, self.scene_threshold)
                    last_threshold = self.scene_threshold
                    # Back up a bit to re-scan with new sensitivity
                    processed_time = max(0.0, processed_time - 10)

                seg = self.recording_path
                if not seg.exists():
                    continue

                if seg != current_segment:
                    current_segment = seg
                    processed_time = 0.0
                    log.info("Scene detector: switched to %s", seg.name)

                size = seg.stat().st_size
                if size < 100_000:
                    continue

                safe_duration = self._estimate_safe_duration()
                if safe_duration is None or safe_duration <= 0:
                    continue

                process_to = safe_duration - 1
                if process_to <= processed_time + 0.5:
                    continue

                log.info("Scene detection: %.1fs → %.1fs", processed_time, process_to)
                new_frames = self._detect_scenes(
                    start_time=processed_time,
                    end_time=process_to,
                )

                if new_frames:
                    log.info("Found %d new scene frames (total: %d)",
                             len(new_frames), self._next_frame_number() - 1)
                    if self._on_new_frames:
                        self._on_new_frames(new_frames)

                processed_time = process_to

            log.info("Scene detector stopped")

        t = Thread(target=_detect, daemon=True, name="scene_detector")
        t.start()
        self._threads["scene_detector"] = t

    def _estimate_safe_duration(self):
        """Estimate recording duration. Uses ffprobe, falls back to file size.

        For fragmented MP4 (empty_moov), format-level duration is 0 so we
        check stream duration from the last video stream instead.
        """
        try:
            import ffmpeg as ffmpeg_lib
            info = ffmpeg_lib.probe(str(self.recording_path))
            # Format duration works for non-fragmented; 0 for empty_moov fMP4
            dur = float(info.get("format", {}).get("duration", 0))
            if dur > 0:
                return dur
            # Fragmented MP4: check video stream duration
            for stream in info.get("streams", []):
                sdur = float(stream.get("duration", 0))
                if sdur > 0:
                    return sdur
        except Exception:
            pass

        # Fallback: rough estimate from file size (~500kbit/s typical for this stream)
        try:
            size = self.recording_path.stat().st_size
            return size / 65_000  # ~500kbps → 62.5 KB/s
        except Exception:
            return None

    def _next_frame_number(self):
        """Determine next frame number from the index (source of truth)."""
        index_path = self.frames_dir / "index.json"
        if index_path.exists():
            index = json.loads(index_path.read_text())
            return len(index) + 1
        return 1

    def _detect_scenes(self, start_time, end_time):
        """Run ffmpeg scene detection on a time range. Returns list of new frame entries."""
        import time as _time
        t0 = _time.monotonic()
        duration = end_time - start_time
        start_number = self._next_frame_number()

        try:
            _stdout, stderr = ff.extract_scene_frames(
                self.recording_path,
                self.frames_dir,
                scene_threshold=self.scene_threshold,
                start_number=start_number,
                start_time=start_time,
                duration=duration,
            )
        except Exception as e:
            log.error("Scene detection failed: %s", e)
            return []

        # Parse new frames from showinfo output — match each showinfo line
        # to the corresponding file ffmpeg wrote (sequential from start_number)
        new_frames = []
        index_path = self.frames_dir / "index.json"
        index = json.loads(index_path.read_text()) if index_path.exists() else []

        offset = self.current_global_offset
        frame_num = start_number
        for line in stderr.splitlines():
            if "showinfo" not in line:
                continue
            pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
            if pts_match:
                pts_time = float(pts_match.group(1))
                frame_id = f"F{frame_num:04d}"
                frame_path = self.frames_dir / f"{frame_id}.jpg"
                if frame_path.exists():
                    entry = {
                        "id": frame_id,
                        "timestamp": pts_time + offset,
                        "path": str(frame_path),
                        "sent_to_agent": False,
                    }
                    index.append(entry)
                    new_frames.append(entry)
                frame_num += 1

        index_path.write_text(json.dumps(index, indent=2))

        elapsed_ms = (_time.monotonic() - t0) * 1000
        tel = getattr(self, "telemetry", None)
        if tel:
            tel.metric("scene_detection", {
                "start": start_time, "end": end_time,
                "duration": duration,
                "frames_found": len(new_frames),
                "total_frames": len(index),
                "threshold": self.scene_threshold,
                "elapsed_ms": round(elapsed_ms),
                "file_duration": self._estimate_safe_duration() or 0,
            })

        return new_frames

    def capture_now(self, on_new_frames=None):
        """Capture a single frame from the current recording position.

        Grabs the latest available frame (safe_duration - 1s) and adds it
        to the index. Runs in a thread to avoid blocking the UI.
        """
        def _capture():
            safe_duration = self._estimate_safe_duration()
            if not safe_duration or safe_duration < 1:
                log.warning("capture_now: recording too short")
                return

            local_timestamp = safe_duration - 1
            timestamp = local_timestamp + self.current_global_offset
            index_path = self.frames_dir / "index.json"
            index = json.loads(index_path.read_text()) if index_path.exists() else []
            frame_num = len(index) + 1
            frame_id = f"F{frame_num:04d}"
            frame_path = self.frames_dir / f"{frame_id}.jpg"

            try:
                ff.extract_frame_at(self.recording_path, frame_path, local_timestamp)
            except Exception as e:
                log.error("capture_now failed: %s", e)
                return

            if not frame_path.exists():
                log.warning("capture_now: frame not written")
                return

            entry = {
                "id": frame_id,
                "timestamp": timestamp,
                "path": str(frame_path),
                "sent_to_agent": False,
            }
            index.append(entry)
            index_path.write_text(json.dumps(index, indent=2))
            log.info("Manual capture: %s at %.1fs", frame_id, timestamp)

            if on_new_frames:
                on_new_frames([entry])

        Thread(target=_capture, daemon=True, name="capture_now").start()

    # -- Audio Extraction --

    def start_audio_extractor(self, on_new_audio=None):
        """Periodically extract audio from the growing recording as WAV chunks.

        Same incremental pattern as scene detector: polls recording, extracts
        new time range, calls back with (wav_path, start_time, duration).

        Args:
            on_new_audio: callback(wav_path, start_time, duration)
        """
        self._on_new_audio = on_new_audio
        self.audio_dir.mkdir(parents=True, exist_ok=True)

        def _extract():
            processed_time = 0.0
            chunk_num = 0
            current_segment = None

            while "stop" not in self._stop_flags:
                time.sleep(AUDIO_EXTRACT_INTERVAL)

                seg = self.recording_path
                if not seg.exists():
                    continue

                if seg != current_segment:
                    current_segment = seg
                    processed_time = 0.0
                    chunk_num = 0
                    log.info("Audio extractor: switched to %s", seg.name)

                if seg.stat().st_size < 100_000:
                    continue

                safe_duration = self._estimate_safe_duration()
                if safe_duration is None or safe_duration <= 0:
                    continue

                process_to = safe_duration - AUDIO_SAFETY_MARGIN
                if process_to <= processed_time + 1.0:
                    continue

                chunk_duration = process_to - processed_time
                wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"

                try:
                    ff.extract_audio_chunk(
                        seg, wav_path,
                        start_time=processed_time,
                        duration=chunk_duration,
                    )
                except Exception as e:
                    log.error("Audio extraction failed: %s", e)
                    continue

                if wav_path.exists() and wav_path.stat().st_size > 100:
                    global_start = processed_time + self.current_global_offset
                    log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
                             wav_path.name, processed_time, process_to, global_start)
                    if self._on_new_audio:
                        self._on_new_audio(
                            wav_path, global_start, chunk_duration,
                            segment_path=seg, local_start=processed_time,
                        )
                    chunk_num += 1

                processed_time = process_to

            log.info("Audio extractor stopped")

        t = Thread(target=_extract, daemon=True, name="audio_extractor")
        t.start()
        self._threads["audio_extractor"] = t

    # -- Lifecycle --

    def stop_all(self):
        log.info("Stopping all...")
        self._stop_flags.add("stop")
        for name, proc in self._procs.items():
            log.info("Stopping %s", name)
            ff.stop_proc(proc)
        self._procs.clear()

    def _start_stderr_reader(self, name, proc):
        def _read():
            for line in proc.stderr:
                text = line.decode("utf-8", errors="replace").rstrip()
                if text:
                    log.debug("[%s] %s", name, text)
            log.info("[%s] exited: %s", name, proc.poll())

        Thread(target=_read, daemon=True, name=f"{name}_stderr").start()