Compare commits

...

12 Commits

Author SHA1 Message Date
fdc34578a5 add readme 2026-05-07 13:08:50 -03:00
946234eb9e update docs 2026-05-06 11:51:43 -03:00
c8bb6c7581 remove boiler plate prompt 2026-05-06 09:06:23 -03:00
ea9dbf8772 proper tests 2026-04-10 18:29:58 -03:00
e906b0a963 restructure and test, pure python and rust transport both _work_ 2026-04-10 16:25:54 -03:00
9d3ff2c6ba wrap up before restructure 2026-04-10 15:40:56 -03:00
e9e1d14e6b normalize media pipeline at client boundary
- AudioParams.framing field: client declares "raw" or "adts"
- Client strips ADTS from audio before sending (strip_adts)
- Client does H.264 NAL inspection for keyframe detection (h264_is_keyframe)
- Server uses declared sample_rate/channels for ADTS synthesis instead of hardcoded 48kHz/stereo
- Server gates ADTS wrapping on framing field instead of per-packet sniffing

New backends only need to pipe output to demux_and_send() — server and Python unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 13:52:11 -03:00
e92ab933ce ctrl/sync.sh 2026-04-10 13:34:51 -03:00
27c0181d77 somewhat stable 2026-04-10 11:47:15 -03:00
e2ca18d120 improved livedelay 2026-04-10 02:56:04 -03:00
d83576a3ba good checkpoint 2026-04-10 02:24:09 -03:00
6b6bc64ab8 saving status before scene frame fix after rust change 2026-04-10 01:27:09 -03:00
51 changed files with 4142 additions and 394 deletions

2
.gitignore vendored
View File

@@ -7,3 +7,5 @@ __pycache__/
.pytest_cache/ .pytest_cache/
media/target/ media/target/
media/logs/ media/logs/
tests/fixtures/*.mp4
tests/fixtures/*.wav

11
README.md Normal file
View File

@@ -0,0 +1,11 @@
# Mitus
Meeting stream viewer with an embedded AI agent. Captures screen and audio from a Wayland source machine, streams it over TCP to a receiver with GPU-accelerated decode, and runs a Claude Code agent that watches the feed autonomously — transcribing audio, extracting frames on scene changes, and acting on user-defined rules. The agent panel shows a live log of what it observes and the actions it takes, while a thumbnail grid gives a visual timeline of the session. Primary use case: staying present in meetings without manually feeding context to Claude. Provides summarization after the fact
## Docs
```
cd docs && python3 -m http.server 8000
```
Then open <http://localhost:8000>.

View File

@@ -31,18 +31,6 @@ from cht.agent.base import (
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
SYSTEM_PROMPT = """You are an assistant integrated into CHT, a screen recording and analysis tool.
You help the user understand what happened during their recording session.
You have access to frame screenshots extracted from the recording. When frames are mentioned,
use the Read tool to view them. Frame timestamps are in seconds from the start of the recording.
You can use any available tools including WebFetch and WebSearch when the user asks you to
look something up. Use them freely — all tools are pre-authorized.
Your primary role is description and analysis, not code generation. Be concise and specific.
Focus on what's visible in the frames and what's in the transcript."""
MODELS = [ MODELS = [
"claude-sonnet-4-6", "claude-sonnet-4-6",
"claude-opus-4-6", "claude-opus-4-6",
@@ -144,7 +132,6 @@ class ClaudeSDKConnection:
options=ClaudeAgentOptions( options=ClaudeAgentOptions(
model=self._model, model=self._model,
cwd=cwd or ".", cwd=cwd or ".",
system_prompt=SYSTEM_PROMPT,
max_turns=self._max_turns, max_turns=self._max_turns,
permission_mode=self._permission_mode, permission_mode=self._permission_mode,
), ),

View File

@@ -31,10 +31,6 @@ from cht.agent.base import (
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
SYSTEM_PROMPT = """You are an assistant integrated into CHT, a screen recording and analysis tool.
You help the user understand what happened during their recording session.
Be concise and specific. Focus on what's visible in the provided frames."""
_PROVIDER_CONFIGS = { _PROVIDER_CONFIGS = {
"groq": ( "groq": (
"https://api.groq.com/openai/v1", "https://api.groq.com/openai/v1",
@@ -76,7 +72,7 @@ def _frame_to_base64(path) -> str | None:
def _messages_to_openai(messages: list[Message]) -> list[dict]: def _messages_to_openai(messages: list[Message]) -> list[dict]:
"""Convert structured messages to OpenAI chat format.""" """Convert structured messages to OpenAI chat format."""
result = [{"role": "system", "content": SYSTEM_PROMPT}] result: list[dict] = []
for msg in messages: for msg in messages:
if isinstance(msg, UserMessage): if isinstance(msg, UserMessage):

View File

@@ -1,3 +1,4 @@
import argparse
import logging import logging
import os import os
import signal import signal
@@ -55,6 +56,11 @@ class ChtApp(Adw.Application):
win = ChtWindow(application=self) win = ChtWindow(application=self)
win.present() win.present()
# Auto-connect for E2E testing: --auto-connect
# Delay gives the GUI time to fully render before starting the stream.
if os.environ.get("_CHT_AUTO_CONNECT") == "1" and not win._lifecycle.is_streaming:
GLib.timeout_add(2000, lambda: win._on_connect_clicked(None) or False)
_STDERR_SKIP = [b"eglExportDMABUFImage"] _STDERR_SKIP = [b"eglExportDMABUFImage"]
@@ -89,6 +95,16 @@ def _filter_stderr():
def main(): def main():
parser = argparse.ArgumentParser(description="CHT — Stream Viewer + Agent")
parser.add_argument("--auto-connect", action="store_true", help="Connect on startup")
parser.add_argument("--python", action="store_true", help="Use Python transport (default)")
parser.add_argument("--rust", action="store_true", help="Use Rust transport")
args, gtk_args = parser.parse_known_args()
# Store parsed options so do_activate can read them
os.environ["_CHT_AUTO_CONNECT"] = "1" if args.auto_connect else "0"
os.environ["_CHT_RUST_TRANSPORT"] = "1" if args.rust else "0"
_filter_stderr() _filter_stderr()
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG, level=logging.DEBUG,
@@ -96,9 +112,10 @@ def main():
datefmt="%H:%M:%S", datefmt="%H:%M:%S",
) )
log = logging.getLogger("cht") log = logging.getLogger("cht")
log.info("CHT starting") log.info("CHT starting (transport=%s, auto_connect=%s)",
"rust" if args.rust else "python", args.auto_connect)
app = ChtApp() app = ChtApp()
return app.run(sys.argv) return app.run([sys.argv[0]] + gtk_args)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -187,7 +187,10 @@ def detect_scenes_from_pipe(scene_threshold=0.10, flush_frames=2, fps=30):
- stdout: MJPEG pipe (JPEG frames on scene change) - stdout: MJPEG pipe (JPEG frames on scene change)
- stderr: showinfo lines with pts_time timestamps - stderr: showinfo lines with pts_time timestamps
""" """
stream = ffmpeg.input("pipe:0", f="h264", framerate=fps, hwaccel="cuda") stream = ffmpeg.input(
"pipe:0", f="h264", framerate=fps, hwaccel="cuda",
fflags="nobuffer", probesize=32, analyzeduration=0,
)
scene_expr = f"gt(scene,{scene_threshold})" scene_expr = f"gt(scene,{scene_threshold})"
if flush_frames > 0: if flush_frames > 0:
mod_val = 1 + flush_frames mod_val = 1 + flush_frames

View File

@@ -1,6 +1,7 @@
"""Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering.""" """Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering."""
import logging import logging
import time
from threading import Thread from threading import Thread
from gi.repository import GLib from gi.repository import GLib
@@ -66,6 +67,7 @@ class StreamLifecycle:
""" """
self._streaming = True self._streaming = True
self._gone_live = False self._gone_live = False
self._start_monotonic = time.monotonic()
self._rust_transport = rust_transport self._rust_transport = rust_transport
if rust_transport: if rust_transport:
@@ -102,6 +104,28 @@ class StreamLifecycle:
from pathlib import Path from pathlib import Path
from cht.config import DATA_DIR from cht.config import DATA_DIR
marker = DATA_DIR / "active-session" marker = DATA_DIR / "active-session"
# If marker exists, check liveness via data/scene.sock (fixed path).
if marker.exists():
try:
session_dir = Path(marker.read_text().strip())
scene_sock = DATA_DIR / "scene.sock"
if session_dir.exists() and scene_sock.exists():
import socket
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
s.connect(str(scene_sock))
s.close()
log.info("Rust session dir (already active): %s", session_dir)
return session_dir
except OSError:
log.info("Stale scene.sock, cleaning up")
scene_sock.unlink(missing_ok=True)
marker.unlink(missing_ok=True)
log.info("Cleared stale active-session marker")
except Exception:
marker.unlink(missing_ok=True)
elapsed = 0.0 elapsed = 0.0
while elapsed < timeout: while elapsed < timeout:
if marker.exists(): if marker.exists():
@@ -155,13 +179,16 @@ class StreamLifecycle:
GLib.idle_add(self._go_live_once) GLib.idle_add(self._go_live_once)
if self._stream_mgr: if self._stream_mgr:
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames) self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
if self._stream_mgr:
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
def _go_live_once(self): def _go_live_once(self):
if self._stream_mgr: if self._stream_mgr:
elapsed = time.monotonic() - self._start_monotonic
log.info("Going LIVE (startup delay elapsed)") log.info("Going LIVE (startup delay elapsed)")
self._timeline.go_live() self._timeline.go_live()
if self._stream_mgr.telemetry:
self._stream_mgr.telemetry.metric("first_live", {
"elapsed_s": round(elapsed, 2),
})
return False return False
def _tick_live(self): def _tick_live(self):

View File

@@ -58,7 +58,7 @@ class StreamManager:
self.agent_dir = self.session_dir / "agent" self.agent_dir = self.session_dir / "agent"
self.readonly = False self.readonly = False
self.telemetry = None self._telemetry = None
self.recorder = StreamRecorder(self.session_dir) self.recorder = StreamRecorder(self.session_dir)
self.processor = SessionProcessor(self.session_dir) self.processor = SessionProcessor(self.session_dir)
@@ -88,7 +88,7 @@ class StreamManager:
mgr.audio_dir = session_dir / "audio" mgr.audio_dir = session_dir / "audio"
mgr.agent_dir = session_dir / "agent" mgr.agent_dir = session_dir / "agent"
mgr.readonly = False mgr.readonly = False
mgr.telemetry = None mgr._telemetry = None
# No recorder — Rust server owns transport + recording. # No recorder — Rust server owns transport + recording.
mgr.recorder = None mgr.recorder = None
@@ -121,6 +121,16 @@ class StreamManager:
session_id, len(mgr.recorder.recording_segments), mgr.frame_count) session_id, len(mgr.recorder.recording_segments), mgr.frame_count)
return mgr return mgr
@property
def telemetry(self):
return self._telemetry
@telemetry.setter
def telemetry(self, val):
self._telemetry = val
if self.processor:
self.processor._telemetry = val
# -- Recorder delegation -- # -- Recorder delegation --
@property @property
@@ -168,20 +178,27 @@ class StreamManager:
return self.recorder.alive() if self.recorder else True # Rust owns it return self.recorder.alive() if self.recorder else True # Rust owns it
def start_scene_detector(self, on_new_frames=None): def start_scene_detector(self, on_new_frames=None):
if self.recorder: # GUI callback always goes to the processor — it fires on_new_frames
self.recorder.set_on_new_scene_frames(on_new_frames) # after writing the JPEG to disk, regardless of how it got the frame.
else: self.processor.set_on_new_frames(on_new_frames)
self.processor.set_on_new_frames(on_new_frames) if not self.recorder:
# Rust transport: processor connects to scene.sock and runs its own ffmpeg.
self.processor.start_scene_detector(threshold=SCENE_THRESHOLD) self.processor.start_scene_detector(threshold=SCENE_THRESHOLD)
def capture_now(self, on_new_frames=None): def capture_now(self, on_new_frames=None):
self.processor.set_on_new_frames(on_new_frames) self.processor.set_on_new_frames(on_new_frames)
if self.recorder: if self.recorder:
self.recorder.capture_now(on_raw_frame=self.processor.on_captured_frame) self.recorder.capture_now(on_raw_frame=self.processor.on_captured_frame)
else:
# Rust mode: extract current frame directly from the growing fMP4.
self.processor.capture_now_from_file()
def update_scene_threshold(self, new_threshold: float): def update_scene_threshold(self, new_threshold: float):
if self.recorder: if self.recorder:
self.recorder.update_scene_threshold(new_threshold) self.recorder.update_scene_threshold(new_threshold)
else:
# Rust mode: restart scene detector with new threshold.
self.processor.restart_scene_detector(threshold=new_threshold)
# -- Processor delegation -- # -- Processor delegation --

View File

@@ -22,7 +22,7 @@ import socket
import time import time
from pathlib import Path from pathlib import Path
from queue import Queue, Empty from queue import Queue, Empty
from threading import Thread from threading import Thread, Event
from cht.config import ( from cht.config import (
AUDIO_EXTRACT_INTERVAL, AUDIO_EXTRACT_INTERVAL,
@@ -43,13 +43,15 @@ class SessionProcessor:
self.frames_dir = session_dir / "frames" self.frames_dir = session_dir / "frames"
self.audio_dir = session_dir / "audio" self.audio_dir = session_dir / "audio"
self._stop_flags: set[str] = set() self._stop_event = Event()
self._threads: dict[str, Thread] = {} self._threads: dict[str, Thread] = {}
self._on_new_frames = None self._on_new_frames = None
self._on_new_audio = None self._on_new_audio = None
self._last_scene_capture = 0.0
self._get_recording_path = None self._get_recording_path = None
self._get_current_global_offset = None self._get_current_global_offset = None
self._telemetry = None
def attach(self, get_recording_path, get_current_global_offset): def attach(self, get_recording_path, get_current_global_offset):
"""Wire up callbacks to query the recorder's current state.""" """Wire up callbacks to query the recorder's current state."""
@@ -74,6 +76,11 @@ class SessionProcessor:
self._append_frame_index(entry) self._append_frame_index(entry)
log.info("Scene frame: %s at %.1fs", frame_id, global_ts) log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
if self._telemetry:
self._telemetry.metric("scene_frame", {
"id": frame_id, "global_ts": global_ts,
})
if self._on_new_frames: if self._on_new_frames:
self._on_new_frames([entry]) self._on_new_frames([entry])
@@ -86,6 +93,156 @@ class SessionProcessor:
"""Receive a manually captured frame. Write and index it.""" """Receive a manually captured frame. Write and index it."""
self.on_raw_frame(jpeg_bytes, global_ts) self.on_raw_frame(jpeg_bytes, global_ts)
def capture_now_from_file(self):
"""Extract the current frame from the growing fMP4 (Rust transport mode)."""
import tempfile, os as _os
def _capture():
seg = self._get_recording_path() if self._get_recording_path else None
if not seg or not seg.exists():
log.warning("capture_now: no recording file")
return
try:
import subprocess
result = subprocess.run(
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", str(seg)],
capture_output=True, text=True,
)
duration = float(result.stdout.strip())
except Exception as e:
log.warning("capture_now: could not probe duration: %s", e)
return
if duration < 1:
log.warning("capture_now: recording too short")
return
timestamp = max(0, duration - 0.5)
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
ff.extract_frame_at(seg, tmp_path, timestamp)
if not tmp_path.exists():
log.warning("capture_now: frame not written")
return
jpeg_bytes = tmp_path.read_bytes()
except Exception as e:
log.error("capture_now failed: %s", e)
return
finally:
try:
_os.unlink(tmp_path)
except Exception:
pass
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
self.on_raw_frame(jpeg_bytes, timestamp + offset)
Thread(target=_capture, daemon=True, name="capture_now").start()
def _capture_current_frame(self):
"""Capture a fresh frame from the recording file's current tip.
Called when scene detection triggers. The scene filter's own JPEG
is stale (buffered in the encoder), so we extract directly from
the fMP4 which is always near-current.
"""
seg = self._get_recording_path() if self._get_recording_path else None
if not seg or not seg.exists():
return
duration = self._probe_safe_duration(seg)
if not duration or duration < 0.5:
return
local_ts = max(0, duration - 0.3)
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
import tempfile, os as _os
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
ff.extract_frame_at(seg, tmp_path, local_ts)
if not tmp_path.exists() or tmp_path.stat().st_size == 0:
return
jpeg_bytes = tmp_path.read_bytes()
except Exception as e:
log.debug("Scene capture failed: %s", e)
return
finally:
try:
_os.unlink(tmp_path)
except Exception:
pass
self.on_raw_frame(jpeg_bytes, local_ts + offset)
def _extract_scene_frame(self, rec_ts, global_ts):
"""Extract a frame from the recording at a specific timestamp.
Called from the scene detector when showinfo fires. The timestamp
has already been corrected for the offset between the detector's
PTS and the recording's timeline.
The fMP4 file lags ~2s behind real-time due to fragment boundaries.
If the target timestamp isn't available yet, retry briefly.
"""
seg = self._get_recording_path() if self._get_recording_path else None
if not seg or not seg.exists():
return
import tempfile, os as _os
for attempt in range(4): # up to ~3s of waiting
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
ff.extract_frame_at(seg, tmp_path, rec_ts)
if tmp_path.exists() and tmp_path.stat().st_size > 0:
jpeg_bytes = tmp_path.read_bytes()
log.info("Scene frame: rec_ts=%.3f global_ts=%.3f (attempt %d)",
rec_ts, global_ts, attempt)
self.on_raw_frame(jpeg_bytes, global_ts)
return
except Exception:
pass
finally:
try:
_os.unlink(tmp_path)
except Exception:
pass
# Recording file not ready yet — wait for fragments to flush.
if attempt < 3:
time.sleep(1.0)
log.warning("Scene extract gave up at rec_ts=%.3f after retries", rec_ts)
def _wall_clock_offset(self):
"""Seconds elapsed since session start, using wall clock.
The session dir name is the start time in YYYYmmdd_HHMMSS format.
This avoids fMP4 probe lag which underestimates by ~2s.
"""
from datetime import datetime
try:
session_name = self.session_dir.name # e.g. "20260410_020644"
start_time = datetime.strptime(session_name, "%Y%m%d_%H%M%S")
elapsed = (datetime.now() - start_time).total_seconds()
return max(0.0, elapsed)
except Exception as e:
log.warning("Could not compute wall-clock offset: %s", e)
# Fall back to fMP4 probe.
seg = self._get_recording_path() if self._get_recording_path else None
return self._probe_safe_duration(seg) if seg and seg.exists() else 0.0
def restart_scene_detector(self, threshold):
"""Restart scene detector with a new threshold.
Kills the running ffmpeg — the detector thread reconnects automatically
and picks up the new threshold on the next call to start_scene_detector.
"""
if "scene_detector" in self._procs:
ff.stop_proc(self._procs.pop("scene_detector"), timeout=2)
# Spawn a fresh thread with the new threshold; old thread will exit
# when its ffmpeg proc dies.
self.start_scene_detector(threshold=threshold)
# -- Frame index -- # -- Frame index --
@property @property
@@ -134,25 +291,30 @@ class SessionProcessor:
Retries on failure (e.g. ffmpeg dies from bad initial frames). Retries on failure (e.g. ffmpeg dies from bad initial frames).
The server buffers the latest keyframe so reconnects start clean. The server buffers the latest keyframe so reconnects start clean.
""" """
socket_path = self.session_dir / "stream" / "scene.sock" from cht.config import DATA_DIR
socket_path = DATA_DIR / "scene.sock"
# Wait for the socket to appear (server creates it on session start). # Wait for the socket to appear (server creates it on session start).
while "stop" not in self._stop_flags: while not self._stop_event.is_set():
if socket_path.exists(): if socket_path.exists():
break break
time.sleep(0.5) time.sleep(0.5)
if "stop" in self._stop_flags: if self._stop_event.is_set():
return return
while "stop" not in self._stop_flags: while not self._stop_event.is_set():
try: try:
self._run_scene_session(socket_path, threshold) self._run_scene_session(socket_path, threshold)
except Exception: except Exception:
log.exception("Scene detector error") log.exception("Scene detector error")
if "stop" in self._stop_flags: if self._stop_event.is_set():
break
# If the socket is gone, the session ended — don't retry.
if not socket_path.exists():
log.info("Scene detector: socket gone, session ended")
break break
log.info("Scene detector: reconnecting in 2s...") log.info("Scene detector: reconnecting in 2s...")
time.sleep(2.0) self._stop_event.wait(timeout=2.0)
log.info("Scene detector stopped") log.info("Scene detector stopped")
@@ -163,7 +325,7 @@ class SessionProcessor:
try: try:
sock.connect(str(socket_path)) sock.connect(str(socket_path))
except OSError as e: except OSError as e:
log.error("Scene detector: connect failed: %s", e) log.debug("Scene detector: connect failed: %s", e)
return return
log.info("Scene detector: connected, starting ffmpeg") log.info("Scene detector: connected, starting ffmpeg")
@@ -177,7 +339,7 @@ class SessionProcessor:
# Thread: socket → ffmpeg stdin # Thread: socket → ffmpeg stdin
def _feed_stdin(): def _feed_stdin():
try: try:
while "stop" not in self._stop_flags: while not self._stop_event.is_set():
data = sock.recv(65536) data = sock.recv(65536)
if not data: if not data:
break break
@@ -197,9 +359,22 @@ class SessionProcessor:
stdin_t = Thread(target=_feed_stdin, daemon=True, name="scene_stdin") stdin_t = Thread(target=_feed_stdin, daemon=True, name="scene_stdin")
stdin_t.start() stdin_t.start()
# Thread: ffmpeg stderr → parse showinfo timestamps # Compute time offset: detector PTS starts from 0 when it connects,
ts_queue = Queue() # but the recording has been running since session start.
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0 # recording_ts = detector_pts + pts_offset
#
# Use wall-clock time for accurate offset. The fMP4 file lags behind
# by ~2s due to fragment boundaries, so we can't extract at rec_ts
# immediately — _extract_scene_frame handles this by retrying.
pts_offset = self._wall_clock_offset()
global_offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
log.info("Scene detector: pts_offset=%.1f (wall-clock seconds since session start)",
pts_offset)
# Stderr thread: parse showinfo timestamps, apply flush dedup,
# extract frame from recording at corrected timestamp.
flush_window = (SCENE_FLUSH_FRAMES + 1) / 30.0
last_pts = [0.0] # mutable for thread
def _read_stderr(): def _read_stderr():
for raw in proc.stderr: for raw in proc.stderr:
@@ -209,7 +384,14 @@ class SessionProcessor:
if "showinfo" in line: if "showinfo" in line:
pts_match = re.search(r"pts_time:\s*([\d.]+)", line) pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
if pts_match: if pts_match:
ts_queue.put(float(pts_match.group(1))) pts_time = float(pts_match.group(1))
if pts_time - last_pts[0] < flush_window:
log.debug("Skipping flush frame at pts=%.3f", pts_time)
continue
last_pts[0] = pts_time
# Extract frame from recording at corrected timestamp.
rec_ts = pts_time + pts_offset
self._extract_scene_frame(rec_ts, rec_ts + global_offset)
elif line.startswith("[") or "error" in line.lower() or "warning" in line.lower(): elif line.startswith("[") or "error" in line.lower() or "warning" in line.lower():
log.debug("[scene] %s", line) log.debug("[scene] %s", line)
log.debug("[scene] stderr closed") log.debug("[scene] stderr closed")
@@ -217,44 +399,14 @@ class SessionProcessor:
stderr_t = Thread(target=_read_stderr, daemon=True, name="scene_stderr") stderr_t = Thread(target=_read_stderr, daemon=True, name="scene_stderr")
stderr_t.start() stderr_t.start()
# Main: ffmpeg stdout → extract JPEG frames # Main: drain stdout to prevent ffmpeg from stalling.
last_pts = 0.0 # We don't use the JPEG data — frames come from the recording.
buf = b""
raw_fd = proc.stdout.fileno() raw_fd = proc.stdout.fileno()
while True: while os.read(raw_fd, 65536):
chunk = os.read(raw_fd, 65536) pass
if not chunk:
break
buf += chunk
while True:
soi = buf.find(b"\xff\xd8")
if soi < 0:
buf = b""
break
eoi = buf.find(b"\xff\xd9", soi + 2)
if eoi < 0:
buf = buf[soi:]
break
jpeg_data = buf[soi:eoi + 2]
buf = buf[eoi + 2:]
try:
pts_time = ts_queue.get(timeout=2.0)
except Empty:
log.warning("No timestamp for scene frame")
pts_time = 0.0
# Skip flush frames (within 100ms of previous = duplicate)
if pts_time - last_pts < 0.1:
log.debug("Skipping flush frame at pts=%.3f", pts_time)
continue
last_pts = pts_time
global_ts = pts_time + offset
self.on_raw_frame(jpeg_data, global_ts)
ff.stop_proc(proc, timeout=3) ff.stop_proc(proc, timeout=3)
log.info("Scene detector: ffmpeg exited (last_pts=%.1f)", last_pts) log.info("Scene detector: ffmpeg exited (last_pts=%.1f)", last_pts[0])
def start_audio_extractor(self, on_new_audio=None): def start_audio_extractor(self, on_new_audio=None):
"""Periodically extract audio from the growing fMP4 as WAV chunks.""" """Periodically extract audio from the growing fMP4 as WAV chunks."""
@@ -265,10 +417,16 @@ class SessionProcessor:
self._threads["audio_extractor"] = t self._threads["audio_extractor"] = t
def stop(self): def stop(self):
self._stop_flags.add("stop") self._stop_event.set()
for name, proc in getattr(self, "_procs", {}).items(): for name, proc in getattr(self, "_procs", {}).items():
ff.stop_proc(proc, timeout=3) ff.stop_proc(proc, timeout=3)
self._procs = {} if hasattr(self, "_procs") else {} self._procs = {}
# Join all threads so caller knows they're done before starting a new session
for name, t in list(self._threads.items()):
t.join(timeout=5)
if t.is_alive():
log.warning("Thread %s still alive after stop timeout", name)
self._threads.clear()
def _has_audio_stream(self, seg: Path) -> bool: def _has_audio_stream(self, seg: Path) -> bool:
try: try:
@@ -295,9 +453,7 @@ class SessionProcessor:
chunk_num = 0 chunk_num = 0
current_source = None current_source = None
while "stop" not in self._stop_flags: while not self._stop_event.wait(timeout=AUDIO_EXTRACT_INTERVAL):
time.sleep(AUDIO_EXTRACT_INTERVAL)
source = self._find_audio_source() source = self._find_audio_source()
if not source: if not source:
continue continue
@@ -315,6 +471,25 @@ class SessionProcessor:
if safe_duration is None or safe_duration <= 0: if safe_duration is None or safe_duration <= 0:
continue continue
# Raw AAC files (from Rust server) have no reliable duration metadata.
# ffprobe wildly overestimates (e.g. 1569s for a 50s session).
# Cap to wall-clock elapsed time as a sanity bound.
wall_elapsed = self._wall_clock_offset()
if wall_elapsed > 0 and safe_duration > wall_elapsed * 1.5:
log.debug("Audio: capping probed duration %.1fs to wall-clock %.1fs",
safe_duration, wall_elapsed)
safe_duration = wall_elapsed
# Fail-safe: processed_time can accumulate past the file if the
# source was recreated (e.g. server restarted same session).
if processed_time > safe_duration:
log.warning(
"Audio extractor: processed_time %.1fs > file duration %.1fs — resetting",
processed_time, safe_duration,
)
processed_time = 0.0
chunk_num = 0
process_to = safe_duration - AUDIO_SAFETY_MARGIN process_to = safe_duration - AUDIO_SAFETY_MARGIN
if process_to <= processed_time + 1.0: if process_to <= processed_time + 1.0:
continue continue
@@ -335,11 +510,19 @@ class SessionProcessor:
global_start = processed_time + offset global_start = processed_time + offset
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)", log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
wav_path.name, processed_time, process_to, global_start) wav_path.name, processed_time, process_to, global_start)
if self._telemetry:
self._telemetry.metric("audio_chunk", {
"chunk": chunk_num, "start": processed_time,
"end": process_to, "global_start": global_start,
})
if self._on_new_audio: if self._on_new_audio:
self._on_new_audio( try:
wav_path, global_start, chunk_duration, self._on_new_audio(
segment_path=source, local_start=processed_time, wav_path, global_start, chunk_duration,
) segment_path=source, local_start=processed_time,
)
except Exception as e:
log.error("Audio callback failed: %s", e)
chunk_num += 1 chunk_num += 1
processed_time = process_to processed_time = process_to

View File

@@ -147,7 +147,7 @@ class MonitorWidget(Gtk.Box):
gl_area.make_current() gl_area.make_current()
self._live_player = Player() self._live_player = Player()
self._live_player.init_gl( self._live_player.init_gl(
update_callback=lambda: GLib.idle_add(self._live_gl.queue_render) update_callback=lambda: GLib.idle_add(self._live_gl.queue_render, priority=GLib.PRIORITY_HIGH)
) )
log.info("Live player created") log.info("Live player created")
if self._live_source_url and not self._live_loaded: if self._live_source_url and not self._live_loaded:
@@ -162,7 +162,7 @@ class MonitorWidget(Gtk.Box):
self._live_loaded = False self._live_loaded = False
def _on_live_render(self, gl_area, _ctx): def _on_live_render(self, gl_area, _ctx):
if not self._live_player: if not self._live_player or not self._live_loaded:
return True return True
fbo = ctypes.c_int(0) fbo = ctypes.c_int(0)
_libGL.glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ctypes.byref(fbo)) _libGL.glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ctypes.byref(fbo))
@@ -175,7 +175,7 @@ class MonitorWidget(Gtk.Box):
gl_area.make_current() gl_area.make_current()
self._review_player = Player() self._review_player = Player()
self._review_player.init_gl( self._review_player.init_gl(
update_callback=lambda: GLib.idle_add(self._review_gl.queue_render) update_callback=lambda: GLib.idle_add(self._review_gl.queue_render, priority=GLib.PRIORITY_HIGH)
) )
log.info("Review player created") log.info("Review player created")

View File

@@ -104,7 +104,8 @@ class Player:
"""Load a live stream URL with low-latency options.""" """Load a live stream URL with low-latency options."""
self._player["cache"] = "no" self._player["cache"] = "no"
self._player["demuxer-max-bytes"] = "512KiB" self._player["demuxer-max-bytes"] = "512KiB"
self._player["audio-buffer"] = 0.2 self._player["demuxer-readahead-secs"] = 0.5
self._player["audio-buffer"] = 0.1
log.info("mpv load_live: %s", url) log.info("mpv load_live: %s", url)
self._player.loadfile(str(url), mode="replace") self._player.loadfile(str(url), mode="replace")

View File

@@ -1,6 +1,7 @@
"""Main application window — wires Timeline to all components.""" """Main application window — wires Timeline to all components."""
import logging import logging
import os
from pathlib import Path from pathlib import Path
import gi import gi
@@ -46,6 +47,7 @@ class ChtWindow(Adw.ApplicationWindow):
self._pending_scrub_global = 0.0 self._pending_scrub_global = 0.0
self._scrub_pending = False # throttle flag for scrub updates self._scrub_pending = False # throttle flag for scrub updates
self._telemetry = None self._telemetry = None
self._threshold_timeout_id = None
# Core components # Core components
self._timeline = Timeline() self._timeline = Timeline()
@@ -161,11 +163,23 @@ class ChtWindow(Adw.ApplicationWindow):
) )
def _on_scene_threshold(self, val): def _on_scene_threshold(self, val):
if self._lifecycle.stream_mgr and not self._lifecycle.stream_mgr.readonly: if not (self._lifecycle.stream_mgr and not self._lifecycle.stream_mgr.readonly):
old = self._lifecycle.stream_mgr.scene_threshold return
self._lifecycle.stream_mgr.update_scene_threshold(val) if self._telemetry:
if self._telemetry: self._telemetry.event("scene_threshold_changed",
self._telemetry.event("scene_threshold_changed", {"from": old, "to": val}) {"from": self._lifecycle.stream_mgr.scene_threshold, "to": val})
# Debounce: wait 500ms after user stops dragging, then restart in background.
if self._threshold_timeout_id:
GLib.source_remove(self._threshold_timeout_id)
self._threshold_timeout_id = GLib.timeout_add(500, self._apply_threshold, val)
def _apply_threshold(self, val):
self._threshold_timeout_id = None
mgr = self._lifecycle.stream_mgr
if mgr and not mgr.readonly:
Thread(target=mgr.update_scene_threshold, args=(val,),
daemon=True, name="threshold_update").start()
return False # don't repeat
def _on_min_chunk_changed(self, panel, val): def _on_min_chunk_changed(self, panel, val):
import cht.config import cht.config
@@ -247,6 +261,9 @@ class ChtWindow(Adw.ApplicationWindow):
self._update_scrub_bar_manifest() self._update_scrub_bar_manifest()
self._populate_model_dropdown() self._populate_model_dropdown()
# Show "Continue" since there's an active session to resume
self._connect_btn.set_label("Continue")
# Load persisted agent conversation # Load persisted agent conversation
self._agent.load_from_session(mgr.session_dir) self._agent.load_from_session(mgr.session_dir)
if self._agent.thread.messages: if self._agent.thread.messages:
@@ -263,8 +280,11 @@ class ChtWindow(Adw.ApplicationWindow):
audio_dir = mgr.audio_dir audio_dir = mgr.audio_dir
audio_dir.mkdir(parents=True, exist_ok=True) audio_dir.mkdir(parents=True, exist_ok=True)
full_wav = audio_dir / "full.wav" full_wav = audio_dir / "full.wav"
# Rust transport writes audio to a separate file (fMP4 has no audio track).
aac_path = mgr.stream_dir / "audio.aac"
source = aac_path if aac_path.exists() else segments[0]
try: try:
ff.extract_audio_chunk(segments[0], full_wav) ff.extract_audio_chunk(source, full_wav)
self._waveform_engine.compute_full(full_wav) self._waveform_engine.compute_full(full_wav)
peaks = self._waveform_engine.peaks peaks = self._waveform_engine.peaks
bucket_dur = self._waveform_engine.bucket_duration bucket_dur = self._waveform_engine.bucket_duration
@@ -282,7 +302,8 @@ class ChtWindow(Adw.ApplicationWindow):
self._connect_btn.remove_css_class("suggested-action") self._connect_btn.remove_css_class("suggested-action")
self._connect_btn.add_css_class("destructive-action") self._connect_btn.add_css_class("destructive-action")
mgr = self._lifecycle.start(session_id=session_id, rust_transport=True) rust = os.environ.get("_CHT_RUST_TRANSPORT", "0") == "1"
mgr = self._lifecycle.start(session_id=session_id, rust_transport=rust)
if mgr is None: if mgr is None:
log.error("Failed to start stream — no cht-server session found") log.error("Failed to start stream — no cht-server session found")
self._connect_btn.set_label("Connect") self._connect_btn.set_label("Connect")
@@ -483,8 +504,19 @@ class ChtWindow(Adw.ApplicationWindow):
if self._proxy_mgr: if self._proxy_mgr:
self._proxy_mgr.cancel() self._proxy_mgr.cancel()
self._proxy_mgr = None self._proxy_mgr = None
self._manifest = []
self._connect_btn.set_label("Connect")
self._connect_btn.remove_css_class("destructive-action")
self._connect_btn.add_css_class("suggested-action")
if reload_session and last_session_id:
# Stop live player before transitioning to review mode
self._monitor.reset()
self._load_session(last_session_id)
return
# Full reset — only when not reloading
self._manifest = []
self._timeline.reset() self._timeline.reset()
self._timeline_controls.scrub_bar.set_manifest([]) self._timeline_controls.scrub_bar.set_manifest([])
self._monitor.reset() self._monitor.reset()
@@ -493,18 +525,10 @@ class ChtWindow(Adw.ApplicationWindow):
self._transcriber.reset() self._transcriber.reset()
self._agent.clear_history() self._agent.clear_history()
self._known_frames = set() self._known_frames = set()
self._frames_panel.clear() self._frames_panel.clear()
self._transcript_panel.clear() self._transcript_panel.clear()
self._connect_btn.set_label("Connect")
self._connect_btn.remove_css_class("destructive-action")
self._connect_btn.add_css_class("suggested-action")
self.set_title(APP_NAME) self.set_title(APP_NAME)
if reload_session and last_session_id:
GLib.idle_add(self._load_session, last_session_id)
def _on_close(self, *args): def _on_close(self, *args):
self.teardown() self.teardown()

372
ctrl/bench.py Normal file
View File

@@ -0,0 +1,372 @@
#!/usr/bin/env python3
"""Post-hoc session benchmark — extract timing metrics from session logs.
Usage:
python ctrl/bench.py --session data/sessions/20260410_160441
python ctrl/bench.py --latest # pick most recent session
python ctrl/bench.py --latest --json # machine-readable output
Parses telemetry.jsonl, session.log, frames/index.json, and (if present)
media/logs/server.log. No live session required — works on finished sessions.
Metrics:
M1 Full startup Connect → first transcript
M1a → first duration update
M1b → first scene frame
M1c → first audio chunk
M1d → first transcript
M5 Audio lag How far audio extraction trails real-time
M6 Transcript lag Time from audio ready to transcript done
M7 Frame throughput Scene frames per minute
M9 Recorder health Unexpected restarts / segment rotations
"""
import argparse
import json
import logging
import re
import sys
from datetime import datetime
from pathlib import Path
log = logging.getLogger("bench")
PROJECT_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = PROJECT_DIR / "data"
SESSIONS_DIR = DATA_DIR / "sessions"
def parse_log_time(line: str) -> float | None:
"""Parse HH:MM:SS from session.log line → seconds since midnight."""
m = re.match(r"(\d{2}):(\d{2}):(\d{2})", line)
if m:
return int(m[1]) * 3600 + int(m[2]) * 60 + int(m[3])
return None
def load_telemetry(session_dir: Path) -> list[dict]:
path = session_dir / "telemetry.jsonl"
if not path.exists():
return []
entries = []
for line in path.read_text().splitlines():
if line.strip():
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
pass
return entries
def load_session_log(session_dir: Path) -> list[str]:
path = session_dir / "session.log"
return path.read_text().splitlines() if path.exists() else []
def load_frames_index(session_dir: Path) -> list[dict]:
path = session_dir / "frames" / "index.json"
if not path.exists():
return []
try:
return json.loads(path.read_text())
except (json.JSONDecodeError, ValueError):
return []
def find_first_line(lines: list[str], pattern: str) -> tuple[float | None, str | None]:
"""Find first line matching pattern. Returns (time_seconds, full_line)."""
for line in lines:
if pattern in line:
return parse_log_time(line), line
return None, None
def extract_metrics(session_dir: Path) -> dict:
tel = load_telemetry(session_dir)
log_lines = load_session_log(session_dir)
frames = load_frames_index(session_dir)
metrics = {}
# Session start time (from telemetry t=0 wall clock, or first log line)
session_start_t = None
for entry in tel:
if entry.get("name") == "session_start":
session_start_t = parse_log_time(log_lines[0]) if log_lines else None
break
# Session end
session_duration = None
for entry in tel:
if entry.get("name") == "session_end":
session_duration = entry.get("t")
metrics["session_duration_s"] = session_duration
# M1a: start → first duration update
t_start = session_start_t
t_duration, _ = find_first_line(log_lines, "Duration:")
if t_start is not None and t_duration is not None:
metrics["M1a_first_duration_s"] = t_duration - t_start
# M1b: start → first scene frame
t_frame, _ = find_first_line(log_lines, "Scene frame:")
if t_start is not None and t_frame is not None:
metrics["M1b_first_scene_frame_s"] = t_frame - t_start
# M1c: start → first audio chunk
t_audio, _ = find_first_line(log_lines, "Audio chunk:")
if t_start is not None and t_audio is not None:
metrics["M1c_first_audio_chunk_s"] = t_audio - t_start
# M1d: start → first transcript (whisper processing)
t_transcript, _ = find_first_line(log_lines, "faster_whisper: Processing audio")
if t_start is not None and t_transcript is not None:
metrics["M1d_first_transcript_s"] = t_transcript - t_start
# M1: full startup = start → first transcript
if "M1d_first_transcript_s" in metrics:
metrics["M1_full_startup_s"] = metrics["M1d_first_transcript_s"]
# Going LIVE time
t_live, _ = find_first_line(log_lines, "Going LIVE")
if t_start is not None and t_live is not None:
metrics["going_live_s"] = t_live - t_start
# M5: Audio extraction lag
# Parse "Audio chunk: chunk_N (Xs → Ys, global Zs)" lines
audio_lags = []
for line in log_lines:
m = re.search(r"Audio chunk: \S+ \(([\d.]+)s → ([\d.]+)s, global ([\d.]+)s\)", line)
if m:
end_time = float(m[2])
log_t = parse_log_time(line)
if log_t is not None and t_start is not None:
wall_elapsed = log_t - t_start
lag = wall_elapsed - end_time
if lag >= 0:
audio_lags.append(lag)
if audio_lags:
metrics["M5_audio_lag_avg_s"] = round(sum(audio_lags) / len(audio_lags), 1)
metrics["M5_audio_lag_max_s"] = round(max(audio_lags), 1)
metrics["M5_audio_lag_min_s"] = round(min(audio_lags), 1)
metrics["M5_audio_chunk_count"] = len(audio_lags)
# M6: Transcription lag
# Parse faster_whisper "Processing audio with duration MM:SS.mmm" or "HH:MM:SS.mmm"
transcript_durations = []
for line in log_lines:
# MM:SS.mmm format (e.g., 00:06.145)
m = re.search(r"faster_whisper: Processing audio with duration (\d+):([\d.]+)$", line)
if m:
dur = int(m[1]) * 60 + float(m[2])
transcript_durations.append(dur)
continue
# HH:MM:SS.mmm format
m = re.search(r"faster_whisper: Processing audio with duration (\d+):(\d+):([\d.]+)", line)
if m:
dur = int(m[1]) * 3600 + int(m[2]) * 60 + float(m[3])
transcript_durations.append(dur)
if transcript_durations:
metrics["M6_whisper_processing_avg_s"] = round(sum(transcript_durations) / len(transcript_durations), 1)
metrics["M6_transcript_count"] = len(transcript_durations)
# M7: Frame throughput
if frames and session_duration and session_duration > 0:
minutes = session_duration / 60
metrics["M7_frame_throughput_per_min"] = round(len(frames) / minutes, 1)
metrics["M7_total_frames"] = len(frames)
# M9: Recorder health
restarts = sum(1 for l in log_lines if "Recorder died" in l)
segments = sum(1 for l in log_lines if "Restarting recorder" in l)
metrics["M9_recorder_restarts"] = restarts
metrics["M9_segment_rotations"] = segments
# Scene detection mode
if any("Scene detector: connecting" in l for l in log_lines):
metrics["scene_mode"] = "rust_relay"
elif any("Recorder+scene: pid=" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
else:
metrics["scene_mode"] = "unknown"
# Transport mode — check for Rust-specific markers
if any("Rust session dir" in l or "Attached to Rust session" in l for l in log_lines):
metrics["transport"] = "rust"
elif any("Recorder+scene: pid=" in l for l in log_lines):
metrics["transport"] = "python"
else:
# Check file signatures: Rust writes audio.aac separately, Python muxes into fMP4
aac = session_dir / "stream" / "audio.aac"
if aac.exists():
metrics["transport"] = "rust"
elif any("run_async:" in l for l in log_lines):
metrics["transport"] = "python"
else:
metrics["transport"] = "unknown"
# Scene mode from log markers
if metrics.get("scene_mode") == "unknown":
if any("Recorder+scene: pid=" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
elif any("run_async:" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
return metrics
def print_report(session_dir: Path, metrics: dict):
log.info("=" * 60)
log.info(" CHT Benchmark Report")
log.info(" Session: %s", session_dir.name)
log.info(" Transport: %s", metrics.get("transport", "?"))
log.info(" Scene mode: %s", metrics.get("scene_mode", "?"))
log.info(" Duration: %ss", metrics.get("session_duration_s", "?"))
log.info("=" * 60)
rows = [
("M1", "Full startup", "M1_full_startup_s", "s"),
("M1a", " → first duration", "M1a_first_duration_s", "s"),
("M1b", " → first scene frame", "M1b_first_scene_frame_s", "s"),
("M1c", " → first audio chunk", "M1c_first_audio_chunk_s", "s"),
("M1d", " → first transcript", "M1d_first_transcript_s", "s"),
("", " → going live", "going_live_s", "s"),
("M5", "Audio lag (avg)", "M5_audio_lag_avg_s", "s"),
("M5", "Audio lag (max)", "M5_audio_lag_max_s", "s"),
("M5", "Audio chunks", "M5_audio_chunk_count", ""),
("M6", "Whisper processing (avg)", "M6_whisper_processing_avg_s", "s"),
("M6", "Transcripts produced", "M6_transcript_count", ""),
("M7", "Frame throughput", "M7_frame_throughput_per_min", "/min"),
("M7", "Total frames", "M7_total_frames", ""),
("M9", "Recorder restarts", "M9_recorder_restarts", ""),
("M9", "Segment rotations", "M9_segment_rotations", ""),
]
for code, label, key, unit in rows:
val = metrics.get(key)
if val is not None:
log.info(" %4s %28s %s%s", code, label, val, unit)
else:
log.info(" %4s %28s -", code, label)
def compare_ground_truth(session_dir: Path, gt: dict) -> dict:
"""Compare detected scene frames against ground truth scene changes."""
frames = load_frames_index(session_dir)
gt_scenes = gt.get("scenes", [])
if not frames or not gt_scenes:
return {"error": "no frames or no ground truth scenes"}
detected_ts = sorted(f["timestamp"] for f in frames)
expected_ts = sorted(s["timestamp_s"] for s in gt_scenes)
# For each expected scene change, find the closest detected frame
matches = []
for exp_ts in expected_ts:
best = None
best_delta = float("inf")
for det_ts in detected_ts:
delta = det_ts - exp_ts
if abs(delta) < abs(best_delta):
best_delta = delta
best = det_ts
matches.append({
"expected_s": exp_ts,
"detected_s": best,
"delta_s": round(best_delta, 3) if best is not None else None,
})
deltas = [m["delta_s"] for m in matches if m["delta_s"] is not None]
return {
"expected_scenes": len(expected_ts),
"detected_frames": len(detected_ts),
"matches": matches,
"avg_delta_s": round(sum(deltas) / len(deltas), 3) if deltas else None,
"max_delta_s": round(max(abs(d) for d in deltas), 3) if deltas else None,
"missed": sum(1 for m in matches if m["delta_s"] is None or abs(m["delta_s"]) > 10),
}
def print_ground_truth_report(gt: dict):
log.info("")
log.info(" Scene detection vs ground truth:")
log.info(" Expected scenes: %d", gt.get("expected_scenes", 0))
log.info(" Detected frames: %d", gt.get("detected_frames", 0))
if gt.get("avg_delta_s") is not None:
log.info(" Avg detection delta: %ss", gt["avg_delta_s"])
log.info(" Max detection delta: %ss", gt["max_delta_s"])
if gt.get("missed", 0) > 0:
log.warning(" Missed scenes: %d", gt["missed"])
for m in gt.get("matches", []):
status = "OK" if m["delta_s"] is not None and abs(m["delta_s"]) < 5 else "MISS"
det = f"{m['detected_s']:.1f}s" if m["detected_s"] is not None else "---"
delta = f"+{m['delta_s']:.1f}s" if m["delta_s"] is not None else ""
log.info(" %4s expected=%5.1fs detected=%s %s", status, m["expected_s"], det, delta)
def find_latest_session() -> Path | None:
if not SESSIONS_DIR.exists():
return None
dirs = sorted(SESSIONS_DIR.iterdir(), reverse=True)
for d in dirs:
if d.is_dir() and (d / "telemetry.jsonl").exists():
return d
return None
def main():
parser = argparse.ArgumentParser(description="CHT session benchmark")
parser.add_argument("--session", type=Path, help="Path to session directory")
parser.add_argument("--latest", action="store_true", help="Use most recent session")
parser.add_argument("--json", action="store_true", help="Output JSON instead of table")
parser.add_argument("--ground-truth", type=Path, help="Ground truth JSON for scene comparison")
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
if args.latest:
session_dir = find_latest_session()
if not session_dir:
log.error("No sessions found")
sys.exit(1)
elif args.session:
session_dir = args.session
else:
parser.print_help()
sys.exit(1)
if not session_dir.exists():
log.error("Session not found: %s", session_dir)
sys.exit(1)
metrics = extract_metrics(session_dir)
metrics["session_id"] = session_dir.name
# Ground truth comparison
if args.ground_truth and args.ground_truth.exists():
gt = json.loads(args.ground_truth.read_text())
comparison = compare_ground_truth(session_dir, gt)
metrics["ground_truth"] = comparison
if args.json:
sys.stdout.write(json.dumps(metrics, indent=2) + "\n")
else:
print_report(session_dir, metrics)
if "ground_truth" in metrics:
print_ground_truth_report(metrics["ground_truth"])
# Save report
bench_dir = DATA_DIR / "bench"
bench_dir.mkdir(parents=True, exist_ok=True)
report_path = bench_dir / f"{session_dir.name}.json"
report_path.write_text(json.dumps(metrics, indent=2))
if __name__ == "__main__":
main()

164
ctrl/bench_delay.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""Live scene detection latency benchmark (M4).
Measures time from a triggered visual change on the sender to a new JPEG
appearing in the receiver's frames/ directory.
Usage (run on receiver, sender accessible via SSH):
python ctrl/bench_delay.py --session-dir data/sessions/CURRENT --sender mariano@sender
python ctrl/bench_delay.py --frames-dir data/sessions/CURRENT/frames --sender mariano@sender
How it works:
1. Records the current frame count in frames/index.json
2. SSH to sender, triggers a visual change (xdotool workspace switch)
3. Polls frames/index.json for a new entry (or watches via mtime)
4. Measures wall-clock difference = scene detection latency
For repeated measurements, use --repeat N with --interval S between triggers.
"""
import argparse
import json
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
log = logging.getLogger("bench_delay")
def get_frame_count(frames_dir: Path) -> int:
index = frames_dir / "index.json"
if not index.exists():
return 0
try:
return len(json.loads(index.read_text()))
except (json.JSONDecodeError, ValueError):
return 0
def get_latest_frame_mtime(frames_dir: Path) -> float:
index = frames_dir / "index.json"
if not index.exists():
return 0.0
return index.stat().st_mtime
def trigger_scene_change(sender: str, method: str = "workspace") -> float:
"""Trigger a visual change on the sender. Returns wall-clock time of trigger."""
if method == "workspace":
# xdotool switch workspace — causes a full-screen visual change
cmd = ["ssh", sender, "DISPLAY=:0 xdotool key super+Right"]
elif method == "color":
# Flash a fullscreen color using xterm (more dramatic change)
cmd = ["ssh", sender,
"DISPLAY=:0 bash -c 'xterm -fullscreen -bg red -e sleep 0.5 &'"]
else:
log.error("Unknown trigger method: %s", method)
sys.exit(1)
t = time.monotonic()
wall = time.time()
try:
subprocess.run(cmd, timeout=5, capture_output=True)
except subprocess.TimeoutExpired:
log.warning("SSH trigger timed out")
return wall
def wait_for_new_frame(frames_dir: Path, initial_count: int,
timeout: float = 15.0, poll_interval: float = 0.1) -> float | None:
"""Wait for a new frame to appear. Returns wall-clock time when detected, or None."""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
count = get_frame_count(frames_dir)
if count > initial_count:
return time.time()
time.sleep(poll_interval)
return None
def run_measurement(frames_dir: Path, sender: str, method: str) -> dict:
initial_count = get_frame_count(frames_dir)
trigger_wall = trigger_scene_change(sender, method)
detected_wall = wait_for_new_frame(frames_dir, initial_count)
if detected_wall is None:
return {"trigger_wall": trigger_wall, "latency_s": None, "timed_out": True}
latency = detected_wall - trigger_wall
return {
"trigger_wall": trigger_wall,
"detected_wall": detected_wall,
"latency_s": round(latency, 3),
"timed_out": False,
}
def main():
parser = argparse.ArgumentParser(description="Scene detection latency benchmark")
parser.add_argument("--frames-dir", type=Path, help="Path to frames/ directory")
parser.add_argument("--session-dir", type=Path, help="Path to session directory")
parser.add_argument("--sender", required=True, help="SSH target for sender (user@host)")
parser.add_argument("--method", default="workspace", choices=["workspace", "color"],
help="How to trigger visual change")
parser.add_argument("--repeat", type=int, default=3, help="Number of measurements")
parser.add_argument("--interval", type=float, default=5.0, help="Seconds between triggers")
parser.add_argument("--json", action="store_true", help="Output JSON")
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
if args.session_dir:
frames_dir = args.session_dir / "frames"
elif args.frames_dir:
frames_dir = args.frames_dir
else:
parser.error("Provide --frames-dir or --session-dir")
return
if not frames_dir.exists():
log.error("Frames dir not found: %s", frames_dir)
sys.exit(1)
results = []
for i in range(args.repeat):
if i > 0:
time.sleep(args.interval)
log.info("Trigger %d/%d...", i + 1, args.repeat)
r = run_measurement(frames_dir, args.sender, args.method)
if r["timed_out"]:
log.warning("TIMEOUT (no frame in 15s)")
else:
log.info(" latency: %ss", r["latency_s"])
results.append(r)
latencies = [r["latency_s"] for r in results if r["latency_s"] is not None]
if args.json:
print(json.dumps({"measurements": results, "summary": {
"count": len(latencies),
"avg_s": round(sum(latencies) / len(latencies), 3) if latencies else None,
"min_s": round(min(latencies), 3) if latencies else None,
"max_s": round(max(latencies), 3) if latencies else None,
"timeouts": sum(1 for r in results if r["timed_out"]),
}}, indent=2))
else:
log.info("M4 Scene detection latency:")
if latencies:
log.info(" avg: %.1fs", sum(latencies) / len(latencies))
log.info(" min: %.1fs", min(latencies))
log.info(" max: %.1fs", max(latencies))
timeouts = sum(1 for r in results if r["timed_out"])
if timeouts:
log.warning(" timeouts: %d/%d", timeouts, len(results))
if __name__ == "__main__":
main()

25
ctrl/client.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
# Start the client (sender) — Python or Rust mode.
#
# Usage:
# ctrl/client.sh --python [RECEIVER_IP] [PORT] # kmsgrab + mpegts (default port 4444)
# ctrl/client.sh --rust [server_addr] # Rust framed protocol (default mcrndeb:4447)
#
# Default: --python
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
MODE="python"
# Parse mode flag
if [[ "${1:-}" == "--python" ]]; then
MODE="python"; shift
elif [[ "${1:-}" == "--rust" ]]; then
MODE="rust"; shift
fi
if [ "$MODE" = "rust" ]; then
exec "$PROJECT_DIR/media/ctrl/client.sh" "$@"
else
exec sudo python3 "$PROJECT_DIR/sender/stream_av.py" "$@"
fi

116
ctrl/e2e_test.sh Executable file
View File

@@ -0,0 +1,116 @@
#!/bin/bash
# E2E benchmark test — fully automated, run from the SENDER machine.
#
# Starts everything via SSH, captures test video, collects results.
#
# Usage:
# ctrl/e2e_test.sh --python [--duration 30]
# ctrl/e2e_test.sh --rust [--duration 30]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
cd "$PROJECT_DIR"
DURATION=30
INTERVAL=5
RECEIVER="mcrndeb"
RDIR="wdir/cht"
MODE="python"
PLAY_DELAY=3
while [[ $# -gt 0 ]]; do
case $1 in
--python) MODE="python"; shift ;;
--rust) MODE="rust"; shift ;;
--duration) DURATION="$2"; shift 2 ;;
--interval) INTERVAL="$2"; shift 2 ;;
--receiver) RECEIVER="$2"; shift 2 ;;
--play-delay) PLAY_DELAY="$2"; shift 2 ;;
*) echo "Unknown arg: $1"; exit 1 ;;
esac
done
VIDEO="tests/fixtures/test_scene_${DURATION}s.mp4"
GROUND_TRUTH="tests/fixtures/test_scene_${DURATION}s_ground_truth.json"
# Generate test video if needed
if [ ! -f "$VIDEO" ]; then
echo "=== Generating test video ==="
python3 ctrl/gen_test_video.py --duration "$DURATION" --interval "$INTERVAL"
fi
# PIDs to clean up
PIDS=()
cleanup() {
echo "=== Cleaning up ==="
for pid in "${PIDS[@]}"; do
kill "$pid" 2>/dev/null || true
done
# Kill sudo'd client
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
# Stop remote processes
ssh "$RECEIVER" "pkill -f 'cht-server|cht.app' 2>/dev/null" || true
wait 2>/dev/null
}
trap cleanup EXIT INT TERM
echo "=== E2E test: $MODE mode, ${DURATION}s ==="
# Step 1: Start receiver side (on mcrndeb via SSH)
if [ "$MODE" = "rust" ]; then
echo "--- Starting Rust server on $RECEIVER ---"
ssh -tt "$RECEIVER" "cd $RDIR && ctrl/server.sh" &
PIDS+=($!)
sleep 2
echo "--- Starting app on $RECEIVER (rust transport, auto-connect) ---"
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=1 ctrl/app.sh" &
PIDS+=($!)
sleep 3
else
echo "--- Starting app on $RECEIVER (python transport, auto-connect) ---"
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=0 ctrl/app.sh" &
PIDS+=($!)
sleep 3
fi
# Step 2: Play test video fullscreen on sender
echo "--- Playing test video fullscreen ---"
mpv --fullscreen --loop-file=inf --no-terminal "$VIDEO" &
PIDS+=($!)
sleep "$PLAY_DELAY"
# Step 3: Start client (sender)
echo "--- Starting $MODE client → $RECEIVER ---"
if [ "$MODE" = "rust" ]; then
ctrl/client.sh --rust "${RECEIVER}:4447" &
else
ctrl/client.sh --python "$RECEIVER" &
fi
PIDS+=($!)
# Step 4: Wait for capture + processing
WAIT=$(( DURATION + 15 ))
echo "--- Waiting ${WAIT}s for capture + processing ---"
sleep "$WAIT"
# Step 5: Stop sender side
echo "--- Stopping sender ---"
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
kill "${PIDS[-1]}" 2>/dev/null || true # mpv
sleep 2
# Step 6: Stop receiver side
echo "--- Stopping receiver ---"
ssh "$RECEIVER" "pkill -f 'cht.app' 2>/dev/null" || true
sleep 2
ssh "$RECEIVER" "pkill -f 'cht-server' 2>/dev/null" || true
sleep 1
# Step 7: Benchmark
echo ""
echo "=== Benchmark results ($MODE) ==="
ssh "$RECEIVER" "cd $RDIR && python3 ctrl/bench.py --latest --ground-truth $GROUND_TRUTH"
echo ""
echo "JSON: ssh $RECEIVER 'cd $RDIR && python3 ctrl/bench.py --latest --json --ground-truth $GROUND_TRUTH'"

148
ctrl/gen_test_video.py Executable file
View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""Generate a test video with known scene changes and audio markers.
The video is played fullscreen on the sender while kmsgrab captures it —
simulating a meeting with deterministic, reproducible content.
- Scene changes: solid color blocks every INTERVAL seconds
- Overlay: large elapsed-seconds counter for visual sync measurement
- Audio: sine tone (changes frequency each scene for transcription ground truth)
Outputs:
tests/fixtures/test_scene_30s.mp4
tests/fixtures/test_scene_30s_ground_truth.json
Usage:
python ctrl/gen_test_video.py [--duration 30] [--interval 5]
"""
import argparse
import json
import logging
import subprocess
import sys
from pathlib import Path
log = logging.getLogger("gen_test_video")
PROJECT_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"
# Scene colors (RGB hex)
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]
# Speech sample (Harvard sentences, public domain, Open Speech Repository)
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
def ensure_speech_sample():
"""Download speech sample if not present."""
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
if SPEECH_SAMPLE.exists():
return
log.info("Downloading speech sample from Open Speech Repository...")
import urllib.request
urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
log.info("Saved: %s", SPEECH_SAMPLE)
def generate(duration: int, interval: int):
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
ensure_speech_sample()
video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"
num_scenes = duration // interval
nc = len(COLORS)
# Video: colored segments with timer overlay, concatenated.
# Audio: speech sample looped to fill duration (real speech for whisper testing).
filter_parts = []
for i in range(num_scenes):
color = COLORS[i % nc]
seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
offset = i * interval
filter_parts.append(
f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
f"borderw=6:bordercolor=black"
f"[v{i}]"
)
v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
# Audio: speech sample is input 1 (input 0 is the lavfi dummy)
filter_parts.append(
f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
f"atrim=0:{duration},volume=0.8[aout]"
)
filter_complex = ";\n".join(filter_parts)
cmd = [
"ffmpeg", "-y",
"-f", "lavfi", "-i", "anullsrc", # dummy (video segments come from filter)
*[arg for i in range(num_scenes) for arg in []], # no extra inputs needed for video
"-i", str(SPEECH_SAMPLE), # speech audio input
"-filter_complex", filter_complex,
"-map", "[vout]", "-map", "[aout]",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
"-g", "30", "-keyint_min", "30",
"-c:a", "aac", "-b:a", "128k",
"-t", str(duration),
str(video_path),
"-hide_banner", "-loglevel", "warning",
]
log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
log.info("Output: %s", video_path)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log.error("ffmpeg failed:\n%s", result.stderr)
sys.exit(1)
log.info("Video generated: %s", video_path)
# Write ground truth
scenes = []
for i in range(num_scenes):
scenes.append({
"scene_index": i,
"timestamp_s": i * interval,
"color_hex": COLORS[i % nc],
})
truth = {
"duration_s": duration,
"interval_s": interval,
"num_scenes": num_scenes,
"video_path": str(video_path),
"scenes": scenes,
}
truth_path.write_text(json.dumps(truth, indent=2))
log.info("Ground truth: %s", truth_path)
return video_path, truth_path
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(description="Generate CHT test video")
parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
args = parser.parse_args()
generate(args.duration, args.interval)
if __name__ == "__main__":
main()

View File

@@ -1,8 +0,0 @@
#!/bin/bash
# Start the sender on this machine
# Usage: ./sender.sh RECEIVER_IP [PORT]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
exec sudo "$PROJECT_DIR/sender/stream_av.sh" "$@"

8
ctrl/server.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/bash
# Start the Rust media server (receiver).
# Thin wrapper around media/ctrl/server.sh.
# Usage: ctrl/server.sh [port]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
exec "$PROJECT_DIR/media/ctrl/server.sh" "$@"

View File

@@ -7,10 +7,17 @@ REMOTE="${1:-mariano@mcrndeb}"
REMOTE_PATH="${2:-~/wdir/cht/}" REMOTE_PATH="${2:-~/wdir/cht/}"
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)" PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
# Ask git directly what it ignores — more reliable than rsync's .gitignore parsing,
# and correctly reflects the current branch after a checkout.
EXCLUDE_FILE=$(mktemp)
trap "rm -f '$EXCLUDE_FILE'" EXIT
git -C "$PROJECT_DIR" ls-files --others --ignored --exclude-standard --directory \
> "$EXCLUDE_FILE" 2>/dev/null || true
rsync -avz --delete \ rsync -avz --delete \
--exclude='.git/' \ --exclude='.git/' \
--exclude='media/target/' \ --exclude='data/' \
--exclude='media/logs/' \ --exclude-from="$EXCLUDE_FILE" \
--filter=':- .gitignore' \
"$PROJECT_DIR/" \ "$PROJECT_DIR/" \
"${REMOTE}:${REMOTE_PATH}" "${REMOTE}:${REMOTE_PATH}"

34
docs/README.md Normal file
View File

@@ -0,0 +1,34 @@
# Mitus — Documentation
## View
```
cd docs && python3 -m http.server 8000
```
Then open <http://localhost:8000> in a browser.
## Re-render diagrams
After editing any `graphs/*.dot` file:
```
./render.sh
```
Requires `graphviz` (`sudo apt install graphviz`).
## Layout
```
docs/
├── index.html main page (overview, diagrams, walkthroughs)
├── viewer.html pan/zoom viewer for individual SVGs
├── render.sh regenerate all SVGs from .dot sources
└── graphs/
├── system.{dot,svg} top-level architecture
├── python_pipeline.{dot,svg} Python transport (default)
├── rust_client.{dot,svg} Rust client (sender)
├── rust_server.{dot,svg} Rust server (receiver)
└── crates.{dot,svg} Rust workspace crates
```

View File

@@ -1,4 +1,4 @@
// Cargo workspace crate dependency graph // Mitus — Rust transport workspace (media/) crate dependency graph
digraph crates { digraph crates {
graph [fontname="monospace" bgcolor="#1e1e2e" pad="0.5"] graph [fontname="monospace" bgcolor="#1e1e2e" pad="0.5"]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
@@ -21,7 +21,7 @@ digraph crates {
client [label="cht-client [sender, Wayland]\n─────────────────────────────\nbackends/subprocess.rs ffmpeg CLI + PulseAudio\n NUT demux → EncodedPacket\nbackends/mod.rs Backend enum\ncapture.rs KmsCapture (direct backend)\nencoder.rs VaapiEncoder + MediaType\npipeline.rs capture→encode thread\nmain.rs wait_for_server, transport,\n YYYYMMDD_HHMMSS session IDs" client [label="cht-client [sender, Wayland]\n─────────────────────────────\nbackends/subprocess.rs ffmpeg CLI + PulseAudio\n NUT demux → EncodedPacket\nbackends/mod.rs Backend enum\ncapture.rs KmsCapture (direct backend)\nencoder.rs VaapiEncoder + MediaType\npipeline.rs capture→encode thread\nmain.rs wait_for_server, transport,\n YYYYMMDD_HHMMSS session IDs"
fillcolor="#1e2d3e" color="#89b4fa"] fillcolor="#1e2d3e" color="#89b4fa"]
server [label="cht-server [receiver, mcrndeb]\n─────────────────────────────\nmain.rs TCP listener\n routes Video/Audio/Control\nsession.rs ffmpeg subprocess:\n fMP4 + UDP relay\n ADTS audio writer\n Scene relay (Unix socket)\n keyframe buffering" server [label="cht-server [receiver, mcrn]\n─────────────────────────────\nmain.rs TCP listener\n routes Video/Audio/Control\nsession.rs ffmpeg subprocess:\n fMP4 + UDP relay\n ADTS audio writer\n Scene relay (Unix socket)\n keyframe buffering"
fillcolor="#1e2d3e" color="#89b4fa"] fillcolor="#1e2d3e" color="#89b4fa"]
// Deps // Deps

View File

@@ -170,7 +170,7 @@
<g id="node10" class="node"> <g id="node10" class="node">
<title>server</title> <title>server</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1119.22,-417.07 760.42,-417.07 760.42,-247.43 1119.22,-247.43 1119.22,-417.07"/> <polygon fill="#1e2d3e" stroke="#89b4fa" points="1119.22,-417.07 760.42,-417.07 760.42,-247.43 1119.22,-247.43 1119.22,-417.07"/>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-396.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server &#160;[receiver, mcrndeb]</text> <text xml:space="preserve" text-anchor="middle" x="939.82" y="-396.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server &#160;[receiver, mcrn]</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-379.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────────────────────</text> <text xml:space="preserve" text-anchor="middle" x="939.82" y="-379.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────────────────────</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-362.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">main.rs &#160;&#160;&#160;&#160;&#160;&#160;TCP listener</text> <text xml:space="preserve" text-anchor="middle" x="939.82" y="-362.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">main.rs &#160;&#160;&#160;&#160;&#160;&#160;TCP listener</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-344.82" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;routes Video/Audio/Control</text> <text xml:space="preserve" text-anchor="middle" x="939.82" y="-344.82" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;routes Video/Audio/Control</text>

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -0,0 +1,86 @@
// Mitus — Python transport pipeline (default mode, --python or no flag)
// Sender bash script wraps ffmpeg CLI; receiver runs ffmpeg in-process via Python.
digraph python_pipeline {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
// Hardware / OS
drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]
pulse [label="PulseAudio\n─────────────\nmonitor: default sink\nmic: webcam (C922)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]
net [label="TCP :4444\nmpegts" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
subgraph cluster_sender {
label="Sender — sender/stream_av.sh" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
watchdog [label="watchdog loop\n─────────────\nffmpeg restart on stall\n(total_size or frame stuck > 10s)\nimmediate restart on\nDRM plane format change"
fillcolor="#2d2038" color="#cba6f7"]
ffmpeg_send [label="ffmpeg CLI\n─────────────\nkmsgrab → hwmap=vaapi\nscale_vaapi 1920x1080 nv12\nh264_vaapi (qp=20, gop=30, no B-frames)\namix(monitor, mic) → aac 128k\nmpegts → TCP"
fillcolor="#1e2d3e" color="#89b4fa"]
}
subgraph cluster_recorder {
label="StreamRecorder — cht/stream/recorder.py" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
ffmpeg_recv [label="ffmpeg listener\n─────────────\nlisten=1 on TCP :4444\n→ 2 outputs:\n fragmented MP4 (recording_*.mp4)\n UDP :4445 (mpegts → mpv)\n stdout pipe (showinfo)"
fillcolor="#1e2d3e" color="#89b4fa"]
scene_pipe [label="scene-detect parser\n─────────────\nreads stdout pipe\nshowinfo → scene timestamps\nemits raw_frame(jpeg, ts)"
fillcolor="#2d2038" color="#cba6f7"]
}
subgraph cluster_processor {
label="SessionProcessor — cht/stream/processor.py" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
frame_writer [label="frame writer\n─────────────\nwrites JPEG to frames/\nappends to index.json\nfires on_new_frames(ts, path)"
fillcolor="#2d2038" color="#cba6f7"]
audio_extract [label="audio extractor\n─────────────\npolls fMP4 for new audio\nffmpeg → 16 kHz mono WAV\nchunks for transcription"
fillcolor="#2d2038" color="#cba6f7"]
tracker [label="RecordingTracker\n─────────────\nffprobe duration\nsums segments\nfeeds timeline UI"
fillcolor="#2d2038" color="#cba6f7"]
}
transcriber [label="TranscriberEngine\n─────────────\ncht/transcriber/engine.py\nfaster-whisper (CUDA)\ngrouped segments → transcript.json"
fillcolor="#2d2038" color="#cba6f7"]
gui [label="Mitus GUI (GTK4)\n─────────────\nMonitor (mpv UDP)\nScrub bar · Frames · Transcript\nAgent input/output"
fillcolor="#2d2038" color="#cba6f7"]
// Outputs
fmp4 [label="stream/\nrecording_*.mp4" shape=folder fillcolor="#2a2a3e" color="#585b70"]
udp [label="UDP :4445\n→ mpv" shape=parallelogram fillcolor="#2a2a3e" color="#585b70"]
frames [label="frames/\nindex.json + *.jpg" shape=folder fillcolor="#2a2a3e" color="#585b70"]
audio [label="audio/\nchunk_*.wav" shape=folder fillcolor="#2a2a3e" color="#585b70"]
txt [label="transcript.json" shape=folder fillcolor="#2a2a3e" color="#585b70"]
// Flow — sender
drm -> ffmpeg_send [label="kmsgrab"]
pulse -> ffmpeg_send [label="-f pulse"]
watchdog -> ffmpeg_send [style=dashed label="restart"]
ffmpeg_send -> net
// Flow — recorder
net -> ffmpeg_recv [label="mpegts"]
ffmpeg_recv -> fmp4
ffmpeg_recv -> udp
ffmpeg_recv -> scene_pipe [label="stdout"]
udp -> gui [label="live\nmonitor"]
// Flow — processor
scene_pipe -> frame_writer [label="raw_frame"]
frame_writer -> frames
fmp4 -> audio_extract [label="poll" style=dashed]
audio_extract -> audio
audio -> transcriber [label="WAV"]
transcriber -> txt
fmp4 -> tracker [label="ffprobe" style=dashed]
tracker -> gui [label="duration"]
// Flow — GUI
frames -> gui
txt -> gui
}

View File

@@ -0,0 +1,308 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: python_pipeline Pages: 1 -->
<svg width="1067pt" height="1624pt"
viewBox="0.00 0.00 1067.00 1624.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1580.56)">
<title>python_pipeline</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1580.56 1024.25,-1580.56 1024.25,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_sender</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="159.75,-1176.05 159.75,-1529.36 533.75,-1529.36 533.75,-1176.05 159.75,-1176.05"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1512.06" font-family="monospace" font-size="14.00" fill="#a6adc8">Sender — sender/stream_av.sh</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_recorder</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="174.75,-664.16 174.75,-1000.24 512.75,-1000.24 512.75,-664.16 174.75,-664.16"/>
<text xml:space="preserve" text-anchor="middle" x="343.75" y="-982.94" font-family="monospace" font-size="14.00" fill="#a6adc8">StreamRecorder — cht/stream/recorder.py</text>
</g>
<g id="clust3" class="cluster">
<title>cluster_processor</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="135.75,-484.12 135.75,-628.9 873.75,-628.9 873.75,-484.12 135.75,-484.12"/>
<text xml:space="preserve" text-anchor="middle" x="504.75" y="-611.6" font-family="monospace" font-size="14.00" fill="#a6adc8">SessionProcessor — cht/stream/processor.py</text>
</g>
<!-- drm -->
<g id="node1" class="node">
<title>drm</title>
<path fill="#1e3a2f" stroke="#a6e3a1" d="M151.5,-1464.85C151.5,-1468.42 117.55,-1471.32 75.75,-1471.32 33.95,-1471.32 0,-1468.42 0,-1464.85 0,-1464.85 0,-1406.59 0,-1406.59 0,-1403.02 33.95,-1400.12 75.75,-1400.12 117.55,-1400.12 151.5,-1403.02 151.5,-1406.59 151.5,-1406.59 151.5,-1464.85 151.5,-1464.85"/>
<path fill="none" stroke="#a6e3a1" d="M151.5,-1464.85C151.5,-1461.27 117.55,-1458.37 75.75,-1458.37 33.95,-1458.37 0,-1461.27 0,-1464.85"/>
<text xml:space="preserve" text-anchor="middle" x="75.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">/dev/dri/card0</text>
<text xml:space="preserve" text-anchor="middle" x="75.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">(KMS scanout)</text>
</g>
<!-- ffmpeg_send -->
<g id="node5" class="node">
<title>ffmpeg_send</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="525.62,-1322.08 167.88,-1322.08 167.88,-1184.05 525.62,-1184.05 525.62,-1322.08"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1300.14" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg CLI</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1282.89" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1265.64" font-family="monospace" font-size="14.00" fill="#cdd6f4">kmsgrab → hwmap=vaapi</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1248.39" font-family="monospace" font-size="14.00" fill="#cdd6f4">scale_vaapi 1920x1080 nv12</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1231.14" font-family="monospace" font-size="14.00" fill="#cdd6f4">h264_vaapi (qp=20, gop=30, no B&#45;frames)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1213.89" font-family="monospace" font-size="14.00" fill="#cdd6f4">amix(monitor, mic) → aac 128k</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1196.64" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts → TCP</text>
</g>
<!-- drm&#45;&gt;ffmpeg_send -->
<g id="edge1" class="edge">
<title>drm&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" d="M122.29,-1400.17C139.89,-1387.11 155.75,-1375.33 155.75,-1375.33 155.75,-1375.33 189.94,-1353.62 229.39,-1328.57"/>
<polygon fill="#585b70" stroke="#585b70" points="231.13,-1331.62 237.69,-1323.31 227.37,-1325.71 231.13,-1331.62"/>
<text xml:space="preserve" text-anchor="middle" x="235.85" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">kmsgrab</text>
</g>
<!-- pulse -->
<g id="node2" class="node">
<title>pulse</title>
<path fill="#1e3a2f" stroke="#a6e3a1" d="M751.38,-1484.25C751.38,-1490.2 704.48,-1495.04 646.75,-1495.04 589.02,-1495.04 542.12,-1490.2 542.12,-1484.25 542.12,-1484.25 542.12,-1387.19 542.12,-1387.19 542.12,-1381.24 589.02,-1376.4 646.75,-1376.4 704.48,-1376.4 751.38,-1381.24 751.38,-1387.19 751.38,-1387.19 751.38,-1484.25 751.38,-1484.25"/>
<path fill="none" stroke="#a6e3a1" d="M751.38,-1484.25C751.38,-1478.3 704.48,-1473.47 646.75,-1473.47 589.02,-1473.47 542.12,-1478.3 542.12,-1484.25"/>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1456.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">PulseAudio</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">monitor: default sink</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1405.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">mic: webcam (C922)</text>
</g>
<!-- pulse&#45;&gt;ffmpeg_send -->
<g id="edge2" class="edge">
<title>pulse&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" d="M555.11,-1379.54C528.25,-1363.36 498.43,-1345.41 469.95,-1328.25"/>
<polygon fill="#585b70" stroke="#585b70" points="472.01,-1325.41 461.64,-1323.25 468.4,-1331.41 472.01,-1325.41"/>
<text xml:space="preserve" text-anchor="middle" x="547.71" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">&#45;f pulse</text>
</g>
<!-- net -->
<g id="node3" class="node">
<title>net</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="461.05,-1147.05 279.22,-1147.05 232.45,-1043.49 414.28,-1043.49 461.05,-1147.05"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1099.22" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1081.97" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts</text>
</g>
<!-- ffmpeg_recv -->
<g id="node6" class="node">
<title>ffmpeg_recv</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="505,-966.99 188.5,-966.99 188.5,-828.96 505,-828.96 505,-966.99"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-945.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg listener</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-927.8" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-910.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">listen=1 on TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-893.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ 2 outputs:</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-876.05" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;fragmented MP4 (recording_*.mp4)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-858.8" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;UDP :4445 (mpegts → mpv)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-841.55" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;stdout pipe (showinfo)</text>
</g>
<!-- net&#45;&gt;ffmpeg_recv -->
<g id="edge5" class="edge">
<title>net&#45;&gt;ffmpeg_recv</title>
<path fill="none" stroke="#585b70" d="M346.75,-1043.21C346.75,-1023.48 346.75,-1000.44 346.75,-978.64"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-978.88 346.75,-968.88 343.25,-978.88 350.25,-978.88"/>
<text xml:space="preserve" text-anchor="middle" x="371.5" y="-1012.19" font-family="monospace" font-size="14.00" fill="#a6adc8">mpegts</text>
</g>
<!-- watchdog -->
<g id="node4" class="node">
<title>watchdog</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="500.88,-1496.11 192.62,-1496.11 192.62,-1375.33 500.88,-1375.33 500.88,-1496.11"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1474.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">watchdog loop</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1456.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg restart on stall</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">(total_size or frame stuck &gt; 10s)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1405.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">immediate restart on</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1387.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">DRM plane format change</text>
</g>
<!-- watchdog&#45;&gt;ffmpeg_send -->
<g id="edge3" class="edge">
<title>watchdog&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M346.75,-1375.07C346.75,-1361.9 346.75,-1347.74 346.75,-1333.92"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-1333.93 346.75,-1323.93 343.25,-1333.93 350.25,-1333.93"/>
<text xml:space="preserve" text-anchor="middle" x="375.62" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">restart</text>
</g>
<!-- ffmpeg_send&#45;&gt;net -->
<g id="edge4" class="edge">
<title>ffmpeg_send&#45;&gt;net</title>
<path fill="none" stroke="#585b70" d="M346.75,-1183.73C346.75,-1175.4 346.75,-1166.91 346.75,-1158.67"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-1158.73 346.75,-1148.73 343.25,-1158.73 350.25,-1158.73"/>
</g>
<!-- scene_pipe -->
<g id="node7" class="node">
<title>scene_pipe</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="458.12,-775.69 199.38,-775.69 199.38,-672.16 458.12,-672.16 458.12,-775.69"/>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-753.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">scene&#45;detect parser</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-736.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-719.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads stdout pipe</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-702" font-family="monospace" font-size="14.00" fill="#cdd6f4">showinfo → scene timestamps</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-684.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">emits raw_frame(jpeg, ts)</text>
</g>
<!-- ffmpeg_recv&#45;&gt;scene_pipe -->
<g id="edge8" class="edge">
<title>ffmpeg_recv&#45;&gt;scene_pipe</title>
<path fill="none" stroke="#585b70" d="M339.58,-828.48C338.15,-814.79 336.65,-800.49 335.25,-787.03"/>
<polygon fill="#585b70" stroke="#585b70" points="338.77,-787.04 334.25,-777.46 331.81,-787.77 338.77,-787.04"/>
<text xml:space="preserve" text-anchor="middle" x="362.5" y="-797.66" font-family="monospace" font-size="14.00" fill="#a6adc8">stdout</text>
</g>
<!-- fmp4 -->
<g id="node13" class="node">
<title>fmp4</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="680.62,-749.82 677.62,-753.82 656.62,-753.82 653.62,-749.82 520.88,-749.82 520.88,-698.04 680.62,-698.04 680.62,-749.82"/>
<text xml:space="preserve" text-anchor="middle" x="600.75" y="-727.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="600.75" y="-710.63" font-family="monospace" font-size="14.00" fill="#cdd6f4">recording_*.mp4</text>
</g>
<!-- ffmpeg_recv&#45;&gt;fmp4 -->
<g id="edge6" class="edge">
<title>ffmpeg_recv&#45;&gt;fmp4</title>
<path fill="none" stroke="#585b70" d="M447.87,-828.48C484.57,-803.62 524.28,-776.73 554.03,-756.57"/>
<polygon fill="#585b70" stroke="#585b70" points="555.75,-759.63 562.07,-751.13 551.83,-753.84 555.75,-759.63"/>
</g>
<!-- udp -->
<g id="node14" class="node">
<title>udp</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="981.05,-775.71 799.22,-775.71 752.45,-672.15 934.28,-672.15 981.05,-775.71"/>
<text xml:space="preserve" text-anchor="middle" x="866.75" y="-727.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">UDP :4445</text>
<text xml:space="preserve" text-anchor="middle" x="866.75" y="-710.63" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ mpv</text>
</g>
<!-- ffmpeg_recv&#45;&gt;udp -->
<g id="edge7" class="edge">
<title>ffmpeg_recv&#45;&gt;udp</title>
<path fill="none" stroke="#585b70" d="M505.24,-844.54C594.36,-815.05 702.36,-779.32 776.98,-754.63"/>
<polygon fill="#585b70" stroke="#585b70" points="777.91,-758.01 786.31,-751.55 775.71,-751.36 777.91,-758.01"/>
</g>
<!-- frame_writer -->
<g id="node8" class="node">
<title>frame_writer</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="419.38,-595.65 144.12,-595.65 144.12,-492.12 419.38,-492.12 419.38,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">frame writer</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">writes JPEG to frames/</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">appends to index.json</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">fires on_new_frames(ts, path)</text>
</g>
<!-- scene_pipe&#45;&gt;frame_writer -->
<g id="edge10" class="edge">
<title>scene_pipe&#45;&gt;frame_writer</title>
<path fill="none" stroke="#585b70" d="M315.28,-671.91C309.92,-651.61 303.71,-628.06 298.09,-606.8"/>
<polygon fill="#585b70" stroke="#585b70" points="301.54,-606.15 295.61,-597.38 294.77,-607.94 301.54,-606.15"/>
<text xml:space="preserve" text-anchor="middle" x="346.58" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">raw_frame</text>
</g>
<!-- frames -->
<g id="node15" class="node">
<title>frames</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="464,-192.31 461,-196.31 440,-196.31 437,-192.31 279.5,-192.31 279.5,-140.53 464,-140.53 464,-192.31"/>
<text xml:space="preserve" text-anchor="middle" x="371.75" y="-170.37" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/</text>
<text xml:space="preserve" text-anchor="middle" x="371.75" y="-153.12" font-family="monospace" font-size="14.00" fill="#cdd6f4">index.json + *.jpg</text>
</g>
<!-- frame_writer&#45;&gt;frames -->
<g id="edge11" class="edge">
<title>frame_writer&#45;&gt;frames</title>
<path fill="none" stroke="#585b70" d="M293.98,-491.86C312.47,-414.71 347.09,-270.31 363.15,-203.29"/>
<polygon fill="#585b70" stroke="#585b70" points="366.47,-204.47 365.39,-193.93 359.66,-202.84 366.47,-204.47"/>
</g>
<!-- audio_extract -->
<g id="node9" class="node">
<title>audio_extract</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="671.75,-595.65 437.75,-595.65 437.75,-492.12 671.75,-492.12 671.75,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio extractor</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">polls fMP4 for new audio</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg → 16 kHz mono WAV</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunks for transcription</text>
</g>
<!-- audio -->
<g id="node16" class="node">
<title>audio</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="620.12,-455.12 617.12,-459.12 596.12,-459.12 593.12,-455.12 493.38,-455.12 493.38,-403.34 620.12,-403.34 620.12,-455.12"/>
<text xml:space="preserve" text-anchor="middle" x="556.75" y="-433.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/</text>
<text xml:space="preserve" text-anchor="middle" x="556.75" y="-415.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunk_*.wav</text>
</g>
<!-- audio_extract&#45;&gt;audio -->
<g id="edge13" class="edge">
<title>audio_extract&#45;&gt;audio</title>
<path fill="none" stroke="#585b70" d="M555.66,-491.83C555.8,-483.48 555.96,-474.97 556.1,-467.05"/>
<polygon fill="#585b70" stroke="#585b70" points="559.59,-467.16 556.27,-457.1 552.6,-467.03 559.59,-467.16"/>
</g>
<!-- tracker -->
<g id="node10" class="node">
<title>tracker</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="865.88,-595.65 689.62,-595.65 689.62,-492.12 865.88,-492.12 865.88,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">RecordingTracker</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffprobe duration</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">sums segments</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">feeds timeline UI</text>
</g>
<!-- gui -->
<g id="node12" class="node">
<title>gui</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="860.62,-103.53 568.88,-103.53 568.88,0 860.62,0 860.62,-103.53"/>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-81.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">Mitus GUI (GTK4)</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-64.34" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-47.09" font-family="monospace" font-size="14.00" fill="#cdd6f4">Monitor (mpv UDP)</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-29.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scrub bar · Frames · Transcript</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-12.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent input/output</text>
</g>
<!-- tracker&#45;&gt;gui -->
<g id="edge17" class="edge">
<title>tracker&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M771.41,-491.83C767.7,-462.02 763.75,-430.23 763.75,-430.23 763.75,-430.23 763.75,-430.23 763.75,-165.42 763.75,-165.42 752.97,-140.64 741.53,-114.33"/>
<polygon fill="#585b70" stroke="#585b70" points="744.76,-112.98 737.56,-105.21 738.34,-115.77 744.76,-112.98"/>
<text xml:space="preserve" text-anchor="middle" x="796.75" y="-276.4" font-family="monospace" font-size="14.00" fill="#a6adc8">duration</text>
</g>
<!-- transcriber -->
<g id="node11" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="726,-332.84 409.5,-332.84 409.5,-229.31 726,-229.31 726,-332.84"/>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-310.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">TranscriberEngine</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-293.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-276.4" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/transcriber/engine.py</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-259.15" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper (CUDA)</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-241.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">grouped segments → transcript.json</text>
</g>
<!-- txt -->
<g id="node17" class="node">
<title>txt</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="691.62,-184.42 688.62,-188.42 667.62,-188.42 664.62,-184.42 531.88,-184.42 531.88,-148.42 691.62,-148.42 691.62,-184.42"/>
<text xml:space="preserve" text-anchor="middle" x="611.75" y="-161.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript.json</text>
</g>
<!-- transcriber&#45;&gt;txt -->
<g id="edge15" class="edge">
<title>transcriber&#45;&gt;txt</title>
<path fill="none" stroke="#585b70" d="M587.69,-229.02C592.18,-217.52 596.79,-205.72 600.76,-195.56"/>
<polygon fill="#585b70" stroke="#585b70" points="603.99,-196.9 604.37,-186.31 597.47,-194.35 603.99,-196.9"/>
</g>
<!-- fmp4&#45;&gt;audio_extract -->
<g id="edge12" class="edge">
<title>fmp4&#45;&gt;audio_extract</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M594.26,-697.8C588.16,-674.22 578.81,-638 570.73,-606.73"/>
<polygon fill="#585b70" stroke="#585b70" points="574.2,-606.17 568.31,-597.37 567.42,-607.92 574.2,-606.17"/>
<text xml:space="preserve" text-anchor="middle" x="598.37" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">poll</text>
</g>
<!-- fmp4&#45;&gt;tracker -->
<g id="edge16" class="edge">
<title>fmp4&#45;&gt;tracker</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M625.73,-697.8C649.89,-673.5 687.4,-635.77 719.11,-603.87"/>
<polygon fill="#585b70" stroke="#585b70" points="721.4,-606.53 725.97,-596.97 716.43,-601.6 721.4,-606.53"/>
<text xml:space="preserve" text-anchor="middle" x="712.82" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">ffprobe</text>
</g>
<!-- udp&#45;&gt;gui -->
<g id="edge9" class="edge">
<title>udp&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M874.49,-671.91C882.47,-619.3 893.75,-544.88 893.75,-544.88 893.75,-544.88 893.75,-544.88 893.75,-165.42 893.75,-165.42 850.06,-137.93 805.85,-110.1"/>
<polygon fill="#585b70" stroke="#585b70" points="807.74,-107.15 797.41,-104.79 804.01,-113.08 807.74,-107.15"/>
<text xml:space="preserve" text-anchor="middle" x="922.62" y="-372.04" font-family="monospace" font-size="14.00" fill="#a6adc8">live</text>
<text xml:space="preserve" text-anchor="middle" x="922.62" y="-354.79" font-family="monospace" font-size="14.00" fill="#a6adc8">monitor</text>
</g>
<!-- frames&#45;&gt;gui -->
<g id="edge18" class="edge">
<title>frames&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M448.73,-140.14C481.04,-129.52 519.87,-116.77 557.75,-104.33"/>
<polygon fill="#585b70" stroke="#585b70" points="558.56,-107.75 566.97,-101.3 556.37,-101.1 558.56,-107.75"/>
</g>
<!-- audio&#45;&gt;transcriber -->
<g id="edge14" class="edge">
<title>audio&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M558.64,-403.11C559.86,-386.92 561.5,-365.12 563.05,-344.48"/>
<polygon fill="#585b70" stroke="#585b70" points="566.53,-344.92 563.79,-334.69 559.55,-344.4 566.53,-344.92"/>
<text xml:space="preserve" text-anchor="middle" x="574.62" y="-363.41" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV</text>
</g>
<!-- txt&#45;&gt;gui -->
<g id="edge19" class="edge">
<title>txt&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M627.39,-148.31C636.43,-138.42 648.4,-125.33 660.57,-112.02"/>
<polygon fill="#585b70" stroke="#585b70" points="662.96,-114.59 667.13,-104.85 657.8,-109.87 662.96,-114.59"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -1,10 +1,10 @@
// Client pipeline data flow // Mitus — Rust client (sender) pipeline — media/client/
// Sender machine (Wayland, VAAPI GPU) // Sender machine (Wayland, VAAPI GPU)
digraph client_pipeline { digraph rust_client {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline] graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"] fillcolor="#313244" color="#585b70" margin="0.25,0.12"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8" labelfontname="monospace"] edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
// Hardware // Hardware
drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"] drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]

View File

@@ -3,11 +3,11 @@
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0) <!-- Generated by graphviz version 14.1.2 (0)
--> -->
<!-- Title: client_pipeline Pages: 1 --> <!-- Title: rust_client Pages: 1 -->
<svg width="1291pt" height="1237pt" <svg width="1291pt" height="1237pt"
viewBox="0.00 0.00 1291.00 1237.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> viewBox="0.00 0.00 1291.00 1237.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1194.19)"> <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1194.19)">
<title>client_pipeline</title> <title>rust_client</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1194.19 1248.2,-1194.19 1248.2,43.2 -43.2,43.2"/> <polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1194.19 1248.2,-1194.19 1248.2,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster"> <g id="clust1" class="cluster">
<title>cluster_main</title> <title>cluster_main</title>

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View File

@@ -1,6 +1,6 @@
// Server pipeline — current implementation // Mitus — Rust server (receiver) pipeline — media/server/
// Receiver machine (mcrndeb: X11, RTX 3080, NVDEC) // Receiver machine (mcrn: X11, RTX 3080, NVDEC)
digraph server_pipeline { digraph rust_server {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline] graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"] fillcolor="#313244" color="#585b70" margin="0.25,0.12"]

View File

@@ -3,11 +3,11 @@
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0) <!-- Generated by graphviz version 14.1.2 (0)
--> -->
<!-- Title: server_pipeline Pages: 1 --> <!-- Title: rust_server Pages: 1 -->
<svg width="1429pt" height="1141pt" <svg width="1429pt" height="1141pt"
viewBox="0.00 0.00 1429.00 1141.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> viewBox="0.00 0.00 1429.00 1141.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1097.94)"> <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1097.94)">
<title>server_pipeline</title> <title>rust_server</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1097.94 1385.33,-1097.94 1385.33,43.2 -43.2,43.2"/> <polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1097.94 1385.33,-1097.94 1385.33,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster"> <g id="clust1" class="cluster">
<title>cluster_rust</title> <title>cluster_rust</title>

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

77
docs/graphs/system.dot Normal file
View File

@@ -0,0 +1,77 @@
// Mitus — top-level architecture
// Sender (Wayland, VAAPI) → network → Receiver (X11, NVDEC/NVENC) → Mitus GUI app
// Two transport modes share the same recording layout and same GUI.
digraph system {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=LR pad="0.6" splines=polyline nodesep=0.5 ranksep=0.8]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.14"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
subgraph cluster_sender {
label="Sender machine — Wayland, VAAPI GPU" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
capture_py [label="kmsgrab + PulseAudio\n─────────────\nsender/stream_av.sh\nffmpeg CLI · h264_vaapi · AAC\nmpegts over TCP" fillcolor="#2d2038" color="#cba6f7"]
capture_rs [label="cht-client (Rust)\n─────────────\nmedia/client/\nffmpeg subprocess (subprocess backend)\nNUT demux → mpsc → WirePacket TCP" fillcolor="#1e2d3e" color="#89b4fa"]
}
subgraph cluster_net {
label="Network" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
net_py [label="TCP :4444\nmpegts" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
net_rs [label="TCP :4447\nWirePacket framing" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
}
subgraph cluster_receiver {
label="Receiver (mcrn) — X11, NVENC/NVDEC GPU" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
recorder_py [label="StreamRecorder (Python)\n─────────────\ncht/stream/recorder.py\nffmpeg listener · TCP receive\nfMP4 writer · UDP relay\nstdout-pipe scene detect"
fillcolor="#2d2038" color="#cba6f7"]
recorder_rs [label="cht-server (Rust)\n─────────────\nmedia/server/\nWirePacket router\nfMP4 + UDP relay (ffmpeg)\nADTS audio writer\nUnix-socket scene relay"
fillcolor="#1e2d3e" color="#89b4fa"]
processor [label="SessionProcessor (Python)\n─────────────\ncht/stream/processor.py\nfMP4 → audio.wav (ffmpeg)\nchunked WAVs for transcribe\n[Rust mode: scene detect via\nUnix socket → ffmpeg pipe]"
fillcolor="#2d2038" color="#cba6f7"]
transcriber [label="Transcriber\n─────────────\ncht/transcriber/engine.py\nfaster-whisper · CUDA\nsegment grouping"
fillcolor="#2d2038" color="#cba6f7"]
gui [label="Mitus GUI (GTK4 + libadwaita)\n─────────────\ncht/window.py · cht/ui/*\nMonitor (mpv UDP) · Scrub bar\nFrames panel · Transcript panel\nAgent input/output"
fillcolor="#2d2038" color="#cba6f7"]
agent [label="Agent runner\n─────────────\ncht/agent/*\nClaude SDK · OpenAI/Groq\n@F frame refs · @T transcript refs"
fillcolor="#2d2038" color="#cba6f7"]
store [label="data/<session_id>/\n─────────────\nstream/recording_*.mp4\nstream/audio.aac (Rust mode)\nframes/*.jpg + index.json\naudio/chunk_*.wav\ntranscript.json · thread.json"
shape=folder fillcolor="#2a2a3e" color="#585b70"]
}
// Python transport flow
capture_py -> net_py [color="#cba6f7"]
net_py -> recorder_py [color="#cba6f7"]
recorder_py -> store [color="#cba6f7"]
recorder_py -> processor [label="raw scene\nframes" color="#cba6f7"]
// Rust transport flow
capture_rs -> net_rs [color="#89b4fa"]
net_rs -> recorder_rs [color="#89b4fa"]
recorder_rs -> store [color="#89b4fa"]
recorder_rs -> processor [label="scene.sock\n(H.264)" style=dashed color="#a6e3a1"]
// Shared downstream
store -> processor [style=dashed]
processor -> transcriber [label="WAV chunks"]
transcriber -> store [label="transcript.json"]
store -> gui [label="files + watchers"]
gui -> agent [label="@-mentions"]
agent -> store [label="thread.json" style=dashed]
// Legend
subgraph cluster_legend {
label="Legend" fontcolor="#a6adc8" color="#585b70" fontname="monospace"
l_py [label="Python" fillcolor="#2d2038" color="#cba6f7"]
l_rs [label="Rust" fillcolor="#1e2d3e" color="#89b4fa"]
l_io [label="I/O · network" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
l_fs [label="filesystem" shape=folder fillcolor="#2a2a3e" color="#585b70"]
}
}

262
docs/graphs/system.svg Normal file
View File

@@ -0,0 +1,262 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: system Pages: 1 -->
<svg width="3430pt" height="767pt"
viewBox="0.00 0.00 3430.00 767.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 724.2)">
<title>system</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-724.2 3386.51,-724.2 3386.51,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_sender</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="8,-40 8,-329 373.5,-329 373.5,-40 8,-40"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-311.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Sender machine — Wayland, VAAPI GPU</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_net</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="416.5,-37 416.5,-333 815.06,-333 815.06,-37 416.5,-37"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-315.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Network</text>
</g>
<g id="clust3" class="cluster">
<title>cluster_receiver</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="858.06,-8 858.06,-349 3335.31,-349 3335.31,-8 858.06,-8"/>
<text xml:space="preserve" text-anchor="middle" x="2096.69" y="-331.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Receiver (mcrn) — X11, NVENC/NVDEC GPU</text>
</g>
<g id="clust4" class="cluster">
<title>cluster_legend</title>
<polygon fill="#1e1e2e" stroke="#585b70" points="34.24,-337 34.24,-673 347.26,-673 347.26,-337 34.24,-337"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-655.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Legend</text>
</g>
<!-- capture_py -->
<g id="node1" class="node">
<title>capture_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="328.38,-296.2 53.12,-296.2 53.12,-189.8 328.38,-189.8 328.38,-296.2"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-272.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">kmsgrab + PulseAudio</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-255.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-238.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">sender/stream_av.sh</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-221.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg CLI · h264_vaapi · AAC</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-203.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts over TCP</text>
</g>
<!-- net_py -->
<g id="node3" class="node">
<title>net_py</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="730.08,-299.66 548.25,-299.66 501.48,-190.34 683.31,-190.34 730.08,-299.66"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-248.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-231.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts</text>
</g>
<!-- capture_py&#45;&gt;net_py -->
<g id="edge1" class="edge">
<title>capture_py&#45;&gt;net_py</title>
<path fill="none" stroke="#cba6f7" d="M328.8,-243.65C388.75,-243.93 457.83,-244.26 513.09,-244.52"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="512.77,-248.02 522.79,-244.57 512.8,-241.02 512.77,-248.02"/>
</g>
<!-- capture_rs -->
<g id="node2" class="node">
<title>capture_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="365.5,-154.2 16,-154.2 16,-47.8 365.5,-47.8 365.5,-154.2"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-130.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;client (Rust)</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-113.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-96.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">media/client/</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-79.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg subprocess (subprocess backend)</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-61.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">NUT demux → mpsc → WirePacket TCP</text>
</g>
<!-- net_rs -->
<g id="node4" class="node">
<title>net_rs</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="807.06,-154.66 502.78,-154.66 424.5,-45.34 728.79,-45.34 807.06,-154.66"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-103.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4447</text>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-86.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">WirePacket framing</text>
</g>
<!-- capture_rs&#45;&gt;net_rs -->
<g id="edge5" class="edge">
<title>capture_rs&#45;&gt;net_rs</title>
<path fill="none" stroke="#89b4fa" d="M365.95,-100.59C394.5,-100.52 423.99,-100.45 452.24,-100.38"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="451.91,-103.89 461.91,-100.36 451.9,-96.89 451.91,-103.89"/>
</g>
<!-- recorder_py -->
<g id="node5" class="node">
<title>recorder_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1141.31,-315.83 866.06,-315.83 866.06,-192.17 1141.31,-192.17 1141.31,-315.83"/>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-292.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">StreamRecorder (Python)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-275.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-257.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/stream/recorder.py</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-240.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg listener · TCP receive</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-223.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 writer · UDP relay</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-206.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">stdout&#45;pipe scene detect</text>
</g>
<!-- net_py&#45;&gt;recorder_py -->
<g id="edge2" class="edge">
<title>net_py&#45;&gt;recorder_py</title>
<path fill="none" stroke="#cba6f7" d="M707.75,-247.12C751.47,-248.14 805,-249.39 854.39,-250.54"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="854.26,-254.04 864.34,-250.77 854.43,-247.04 854.26,-254.04"/>
</g>
<!-- recorder_rs -->
<g id="node6" class="node">
<title>recorder_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1124.81,-156.45 882.56,-156.45 882.56,-15.55 1124.81,-15.55 1124.81,-156.45"/>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-133.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server (Rust)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-115.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-98.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">media/server/</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-81.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">WirePacket router</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-64.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 + UDP relay (ffmpeg)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-46.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">ADTS audio writer</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-29.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix&#45;socket scene relay</text>
</g>
<!-- net_rs&#45;&gt;recorder_rs -->
<g id="edge6" class="edge">
<title>net_rs&#45;&gt;recorder_rs</title>
<path fill="none" stroke="#89b4fa" d="M764.6,-94.64C799.57,-93.37 836.63,-92.02 870.83,-90.78"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="870.94,-94.28 880.8,-90.42 870.68,-87.29 870.94,-94.28"/>
</g>
<!-- processor -->
<g id="node7" class="node">
<title>processor</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1548.81,-219.45 1281.81,-219.45 1281.81,-78.55 1548.81,-78.55 1548.81,-219.45"/>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-196.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">SessionProcessor (Python)</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-178.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-161.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/stream/processor.py</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-144.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 → audio.wav (ffmpeg)</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-127.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunked WAVs for transcribe</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-109.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">[Rust mode: scene detect via</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-92.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix socket → ffmpeg pipe]</text>
</g>
<!-- recorder_py&#45;&gt;processor -->
<g id="edge4" class="edge">
<title>recorder_py&#45;&gt;processor</title>
<path fill="none" stroke="#cba6f7" d="M1141.66,-218.89C1183.07,-208.27 1228.67,-196.59 1270.52,-185.86"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="1271.36,-189.26 1280.18,-183.38 1269.62,-182.48 1271.36,-189.26"/>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-232.1" font-family="monospace" font-size="14.00" fill="#a6adc8">raw scene</text>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-214.85" font-family="monospace" font-size="14.00" fill="#a6adc8">frames</text>
</g>
<!-- store -->
<g id="node11" class="node">
<title>store</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="2388.56,-233.45 2385.56,-237.45 2364.56,-237.45 2361.56,-233.45 2113.31,-233.45 2113.31,-92.55 2388.56,-92.55 2388.56,-233.45"/>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-210.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">data/&lt;session_id&gt;/</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-192.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-175.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/recording_*.mp4</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-158.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/audio.aac (Rust mode)</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-141.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/*.jpg + index.json</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-123.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/chunk_*.wav</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-106.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript.json · thread.json</text>
</g>
<!-- recorder_py&#45;&gt;store -->
<g id="edge3" class="edge">
<title>recorder_py&#45;&gt;store</title>
<path fill="none" stroke="#cba6f7" d="M1141.45,-253.45C1198.97,-253.22 1252.81,-253 1252.81,-253 1252.81,-253 1548.81,-237 1548.81,-237 1548.81,-237 1931.56,-211 1931.56,-211 1931.56,-211 2084.31,-200 2084.31,-200 2084.31,-200 2091.16,-198.47 2102.08,-196.03"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="2102.69,-199.48 2111.68,-193.89 2101.16,-192.65 2102.69,-199.48"/>
</g>
<!-- recorder_rs&#45;&gt;processor -->
<g id="edge8" class="edge">
<title>recorder_rs&#45;&gt;processor</title>
<path fill="none" stroke="#a6e3a1" stroke-dasharray="5,2" d="M1125.12,-98.86C1188.33,-105.61 1252.81,-112.5 1252.81,-112.5 1252.81,-112.5 1259.59,-114.03 1270.37,-116.47"/>
<polygon fill="#a6e3a1" stroke="#a6e3a1" points="1269.31,-119.82 1279.84,-118.61 1270.85,-112.99 1269.31,-119.82"/>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-133.7" font-family="monospace" font-size="14.00" fill="#a6adc8">scene.sock</text>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-116.45" font-family="monospace" font-size="14.00" fill="#a6adc8">(H.264)</text>
</g>
<!-- recorder_rs&#45;&gt;store -->
<g id="edge7" class="edge">
<title>recorder_rs&#45;&gt;store</title>
<path fill="none" stroke="#89b4fa" d="M1125.03,-75.14C1199.82,-68.4 1281.81,-61 1281.81,-61 1281.81,-61 1689.31,-59 1689.31,-59 1689.31,-59 1931.56,-59 1931.56,-59 1931.56,-59 2018.95,-87.55 2102.16,-114.73"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="2101.04,-118.04 2111.63,-117.82 2103.21,-111.39 2101.04,-118.04"/>
</g>
<!-- transcriber -->
<g id="node8" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1931.56,-183.2 1689.31,-183.2 1689.31,-76.8 1931.56,-76.8 1931.56,-183.2"/>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-159.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">Transcriber</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-142.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-125.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/transcriber/engine.py</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-108.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper · CUDA</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-90.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">segment grouping</text>
</g>
<!-- processor&#45;&gt;transcriber -->
<g id="edge10" class="edge">
<title>processor&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M1549.13,-142.58C1590.57,-140.58 1636.24,-138.37 1677.61,-136.37"/>
<polygon fill="#585b70" stroke="#585b70" points="1677.54,-139.88 1687.36,-135.9 1677.2,-132.89 1677.54,-139.88"/>
<text xml:space="preserve" text-anchor="middle" x="1619.06" y="-144.59" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV chunks</text>
</g>
<!-- transcriber&#45;&gt;store -->
<g id="edge11" class="edge">
<title>transcriber&#45;&gt;store</title>
<path fill="none" stroke="#585b70" d="M1931.84,-139.06C1984.38,-143.02 2046.5,-147.69 2101.83,-151.85"/>
<polygon fill="#585b70" stroke="#585b70" points="2101.42,-155.33 2111.66,-152.59 2101.95,-148.35 2101.42,-155.33"/>
<text xml:space="preserve" text-anchor="middle" x="2022.44" y="-154.38" font-family="monospace" font-size="14.00" fill="#a6adc8">transcript.json</text>
</g>
<!-- gui -->
<g id="node9" class="node">
<title>gui</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="2870.31,-244.83 2578.56,-244.83 2578.56,-121.17 2870.31,-121.17 2870.31,-244.83"/>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-221.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">Mitus GUI (GTK4 + libadwaita)</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-204.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-186.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/window.py · cht/ui/*</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-169.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">Monitor (mpv UDP) · Scrub bar</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-152.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">Frames panel · Transcript panel</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-135.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent input/output</text>
</g>
<!-- agent -->
<g id="node10" class="node">
<title>agent</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="3327.31,-161.2 3010.81,-161.2 3010.81,-54.8 3327.31,-54.8 3327.31,-161.2"/>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-137.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent runner</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-120.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-103.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/agent/*</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-86.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">Claude SDK · OpenAI/Groq</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-68.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">@F frame refs · @T transcript refs</text>
</g>
<!-- gui&#45;&gt;agent -->
<g id="edge13" class="edge">
<title>gui&#45;&gt;agent</title>
<path fill="none" stroke="#585b70" d="M2870.68,-158.39C2911.74,-151.43 2956.79,-143.8 2999.13,-136.63"/>
<polygon fill="#585b70" stroke="#585b70" points="2999.64,-140.09 3008.91,-134.97 2998.47,-133.19 2999.64,-140.09"/>
<text xml:space="preserve" text-anchor="middle" x="2940.56" y="-156.17" font-family="monospace" font-size="14.00" fill="#a6adc8">@&#45;mentions</text>
</g>
<!-- agent&#45;&gt;store -->
<g id="edge14" class="edge">
<title>agent&#45;&gt;store</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M3010.47,-96.22C2939.05,-90.89 2870.31,-85.75 2870.31,-85.75 2870.31,-85.75 2578.56,-85.75 2578.56,-85.75 2578.56,-85.75 2486.47,-107.53 2400.08,-127.96"/>
<polygon fill="#585b70" stroke="#585b70" points="2399.43,-124.52 2390.51,-130.23 2401.05,-131.33 2399.43,-124.52"/>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-89.7" font-family="monospace" font-size="14.00" fill="#a6adc8">thread.json</text>
</g>
<!-- store&#45;&gt;processor -->
<g id="edge9" class="edge">
<title>store&#45;&gt;processor</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M2113.07,-179.34C2026.78,-189.64 1931.56,-201 1931.56,-201 1931.56,-201 1689.31,-201 1689.31,-201 1689.31,-201 1625.9,-188.92 1560.2,-176.41"/>
<polygon fill="#585b70" stroke="#585b70" points="1561.18,-173.03 1550.7,-174.6 1559.87,-179.91 1561.18,-173.03"/>
</g>
<!-- store&#45;&gt;gui -->
<g id="edge12" class="edge">
<title>store&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M2388.95,-168.81C2444.64,-171.17 2509.32,-173.92 2566.86,-176.36"/>
<polygon fill="#585b70" stroke="#585b70" points="2566.62,-179.85 2576.76,-176.78 2566.92,-172.86 2566.62,-179.85"/>
<text xml:space="preserve" text-anchor="middle" x="2483.56" y="-179.33" font-family="monospace" font-size="14.00" fill="#a6adc8">files + watchers</text>
</g>
<!-- l_py -->
<g id="node12" class="node">
<title>l_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="233.5,-382.7 148,-382.7 148,-345.3 233.5,-345.3 233.5,-382.7"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-359.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python</text>
</g>
<!-- l_rs -->
<g id="node13" class="node">
<title>l_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="225.25,-455.7 156.25,-455.7 156.25,-418.3 225.25,-418.3 225.25,-455.7"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-432.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">Rust</text>
</g>
<!-- l_io -->
<g id="node14" class="node">
<title>l_io</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="339.26,-566.41 103.01,-566.41 42.24,-491.59 278.49,-491.59 339.26,-566.41"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-524.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">I/O · network</text>
</g>
<!-- l_fs -->
<g id="node15" class="node">
<title>l_fs</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="250,-639.71 247,-643.71 226,-643.71 223,-639.71 131.5,-639.71 131.5,-602.29 250,-602.29 250,-639.71"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-616.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">filesystem</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

581
docs/index.html Normal file
View File

@@ -0,0 +1,581 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Mitus — Architecture</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap');
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
background: #1e1e2e;
color: #cdd6f4;
font-family: 'Inter', sans-serif;
line-height: 1.6;
height: 100vh;
overflow: hidden;
display: flex;
flex-direction: column;
}
header {
padding: 16px 24px;
border-bottom: 1px solid #313244;
display: flex;
align-items: baseline;
gap: 16px;
flex-shrink: 0;
}
header h1 {
font-family: 'JetBrains Mono', monospace;
font-size: 22px;
font-weight: 600;
letter-spacing: 3px;
color: #89b4fa;
}
header .subtitle {
font-size: 13px;
color: #6c7086;
letter-spacing: 1px;
text-transform: uppercase;
}
.layout {
display: flex;
flex: 1;
min-height: 0;
}
nav {
display: flex;
flex-direction: column;
width: 220px;
flex-shrink: 0;
background: #181825;
border-right: 1px solid #313244;
padding: 8px 0;
overflow-y: auto;
}
nav a {
padding: 10px 20px;
font-family: 'JetBrains Mono', monospace;
font-size: 12px;
color: #a6adc8;
text-decoration: none;
border-left: 2px solid transparent;
cursor: pointer;
transition: all 0.15s;
}
nav a:hover { color: #cdd6f4; background: #313244; }
nav a.active { color: #89b4fa; border-left-color: #89b4fa; background: #1e2d3e; }
nav .group {
font-family: 'JetBrains Mono', monospace;
font-size: 10px;
color: #585b70;
letter-spacing: 1px;
text-transform: uppercase;
padding: 16px 20px 6px;
}
main {
flex: 1;
overflow: auto;
padding: 32px 48px;
}
.graph-section {
display: none;
animation: fadeIn 0.2s ease;
}
.graph-section.active { display: block; }
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
.graph-section h2 {
font-family: 'JetBrains Mono', monospace;
font-size: 15px;
font-weight: 500;
color: #a6adc8;
margin-bottom: 8px;
letter-spacing: 1px;
}
.graph-section p {
font-size: 13px;
color: #6c7086;
margin-bottom: 24px;
max-width: 800px;
}
.graph-container {
background: #11111b;
border: 1px solid #313244;
padding: 24px;
overflow: auto;
}
.graph-container a { display: block; }
.graph-container img { max-width: 100%; height: auto; }
.legend {
display: flex;
gap: 24px;
margin-top: 16px;
font-size: 11px;
font-family: 'JetBrains Mono', monospace;
color: #6c7086;
}
.legend span::before {
content: '';
display: inline-block;
width: 8px;
height: 8px;
margin-right: 6px;
border-radius: 50%;
}
.legend .python::before { background: #cba6f7; }
.legend .rust::before { background: #89b4fa; }
.legend .hw::before { background: #a6e3a1; }
.legend .fs::before { background: #585b70; }
/* Repo tree */
.tree-container {
background: #11111b;
border: 1px solid #313244;
padding: 24px;
overflow: auto;
}
.repo-tree {
font-family: 'JetBrains Mono', monospace;
font-size: 13px;
line-height: 1.7;
color: #a6adc8;
}
.t-root { color: #89b4fa; font-weight: 600; font-size: 15px; }
.t-dir { color: #cdd6f4; font-weight: 500; }
.t-rust { color: #89b4fa; font-weight: 500; }
.t-py { color: #cba6f7; font-weight: 500; }
.t-comment { color: #6c7086; }
/* Prose sections */
.graph-section h3 {
font-family: 'JetBrains Mono', monospace;
font-size: 13px;
font-weight: 500;
color: #cdd6f4;
letter-spacing: 1px;
margin: 32px 0 10px;
text-transform: uppercase;
}
.prose { max-width: 820px; }
.prose p {
font-size: 14px;
color: #a6adc8;
margin-bottom: 14px;
line-height: 1.7;
}
.prose p b { color: #cdd6f4; font-weight: 600; }
.prose code {
font-family: 'JetBrains Mono', monospace;
font-size: 12px;
color: #89b4fa;
background: #181825;
padding: 1px 5px;
border-radius: 3px;
}
.prose pre {
background: #11111b;
border: 1px solid #313244;
padding: 14px 16px;
margin: 8px 0 18px;
border-radius: 4px;
overflow-x: auto;
}
.prose pre code {
background: transparent;
padding: 0;
color: #cdd6f4;
font-size: 12px;
}
.prose ul {
margin: 8px 0 16px 20px;
font-size: 14px;
color: #a6adc8;
line-height: 1.7;
}
.prose ul li { margin-bottom: 6px; }
.prose .note {
border-left: 3px solid #f9e2af;
background: #2a2a3e;
padding: 10px 14px;
margin: 12px 0 18px;
font-size: 13px;
color: #cdd6f4;
}
.cmp-table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
margin: 8px 0 20px;
border: 1px solid #313244;
}
.cmp-table th {
text-align: left;
background: #181825;
color: #a6adc8;
font-family: 'JetBrains Mono', monospace;
font-size: 11px;
letter-spacing: 1px;
padding: 10px 14px;
border-bottom: 1px solid #313244;
}
.cmp-table td {
padding: 10px 14px;
color: #a6adc8;
border-bottom: 1px solid #313244;
vertical-align: top;
}
.cmp-table tr:last-child td { border-bottom: none; }
/* Mobile */
.menu-toggle {
display: none;
background: transparent;
border: 1px solid #313244;
color: #cdd6f4;
padding: 6px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 14px;
cursor: pointer;
line-height: 1;
margin-left: auto;
}
.menu-toggle:hover { background: #313244; }
.nav-backdrop {
display: none;
position: absolute;
inset: 0;
background: rgba(0, 0, 0, 0.5);
z-index: 10;
}
.layout.nav-open .nav-backdrop { display: block; }
@media (max-width: 720px) {
header { padding: 10px 12px; gap: 8px; }
header h1 { font-size: 16px; letter-spacing: 1px; }
header .subtitle { display: none; }
.menu-toggle { display: inline-block; }
.layout { position: relative; }
nav {
position: absolute;
left: 0; top: 0; bottom: 0;
width: 220px;
z-index: 20;
transform: translateX(-100%);
transition: transform 0.2s ease;
box-shadow: 2px 0 8px rgba(0, 0, 0, 0.5);
}
.layout.nav-open nav { transform: translateX(0); }
main { padding: 16px; }
.graph-section h2 { font-size: 13px; }
.prose p, .prose ul { font-size: 13px; }
}
</style>
</head>
<body>
<header>
<h1>MITUS</h1>
<span class="subtitle">Stream viewer + agent — architecture</span>
<button class="menu-toggle" onclick="toggleNav()" aria-label="Toggle navigation"></button>
</header>
<div class="layout">
<div class="nav-backdrop" onclick="toggleNav()"></div>
<nav>
<div class="group">Overview</div>
<a class="active" onclick="show('overview')">Goal &amp; walkthrough</a>
<a onclick="show('usage')">Usage</a>
<a onclick="show('system')">System</a>
<div class="group">Transports</div>
<a onclick="show('python')">Python pipeline</a>
<a onclick="show('rust_client')">Rust client</a>
<a onclick="show('rust_server')">Rust server</a>
<a onclick="show('crates')">Rust crates</a>
<div class="group">Reference</div>
<a onclick="show('repo')">Repository</a>
<a onclick="show('notes')">Design notes</a>
</nav>
<main>
<section id="overview" class="graph-section active">
<h2>GOAL &amp; WALKTHROUGH</h2>
<p>Mitus records a remote desktop, transcribes its audio, extracts scene-change frames, and exposes both to an LLM agent for ad-hoc Q&amp;A.</p>
<div class="prose">
<h3>What it is</h3>
<p>A two-machine setup: the <b>sender</b> (a Wayland desktop) captures screen + audio and ships an encoded stream to the <b>receiver</b>. The receiver records to disk, runs scene detection on the live feed to extract per-event JPEG frames, transcribes the audio, and presents the result in a GTK4 GUI. The GUI doubles as an LLM client: select a frame or transcript span, hit Enter, and an agent (Claude SDK or any OpenAI-compatible endpoint) answers using the selected media as context.</p>
<h3>Why the split</h3>
<p>Capture wants Wayland + a VAAPI-friendly GPU; analysis wants CUDA for both faster-whisper and ffmpeg scene detection. Different machines, different drivers — the network stream is the seam. The receiver also runs the GUI because the recordings are stored locally and the agent talks to large frames as files, not blobs over a wire.</p>
<h3>Two transport modes</h3>
<p>Both modes produce the <b>same on-disk session layout</b> (<code>data/&lt;session_id&gt;/stream/</code>, <code>frames/</code>, <code>audio/</code>, <code>transcript.json</code>) so the GUI doesn't care which path the bytes took. The choice is a CLI flag.</p>
<ul>
<li><b>Python (default).</b> Sender is a bash watchdog wrapping <code>ffmpeg</code> CLI. Receiver is <code>cht/stream/recorder.py</code>: an <code>ffmpeg</code> listener that writes fragmented MP4 + relays UDP to <code>mpv</code> + emits scene frames out of an <code>showinfo</code> stdout pipe. Simple, all in one process, every restart costs a few seconds.</li>
<li><b>Rust (<code>--rust</code>).</b> A standalone Rust workspace under <code>media/</code>: <code>cht-client</code> on the sender, <code>cht-server</code> on the receiver. Wire protocol is a typed <code>WirePacket</code> framing instead of raw mpegts. Scene detection still runs in Python via a Unix-socket relay from the server. Connect time drops from ~20s to ~3s; session reload from disk is 12s.</li>
</ul>
<div class="note">The <code>media/</code> directory holds the Rust transport. While both modes coexist, that name is a misnomer — a future rename is planned. For now, "Rust transport" and "<code>media/</code>" mean the same thing.</div>
<h3>What the agent sees</h3>
<p>Two reference syntaxes resolve to media when sent: <code>@F0001</code><code>@F0042</code> for frames, <code>@T0001</code><code>@T0010</code> for transcript segments. Single-word verbs <code>describe</code> and <code>answer</code> are sent verbatim — no system prompt, no boilerplate. If you want detail, you type it. The agent runner injects only the referenced frame paths and transcript text alongside the user message.</p>
</div>
</section>
<section id="usage" class="graph-section">
<h2>USAGE</h2>
<p>How to start a session — sender side, receiver side, both transports.</p>
<div class="prose">
<p>Both <code>ctrl/client.sh</code> and <code>ctrl/app.sh</code> take a transport flag — <code>--python</code> (default) or <code>--rust</code>. The <code>ctrl/</code> wrappers are the entrypoints; <code>media/ctrl/*</code> and <code>sender/stream_av.py</code> are implementation details they dispatch to.</p>
<h3>Receiver (mcrn) — GUI</h3>
<p><b>Python transport (default):</b></p>
<pre><code>./ctrl/app.sh --python</code></pre>
<p><b>Rust transport:</b></p>
<pre><code>./ctrl/server.sh # cht-server on TCP :4447 (Rust mode only)
./ctrl/app.sh --rust</code></pre>
<p>Python mode does its own TCP listening inside the GUI process — no separate server step.</p>
<h3>Sender</h3>
<p><b>Python transport:</b></p>
<pre><code>./ctrl/client.sh --python [RECEIVER_IP] [PORT] # default port 4444</code></pre>
<p>(Runs <code>sudo python3 sender/stream_av.py</code> under the hood — <code>sudo</code> is required for <code>kmsgrab</code>.)</p>
<p><b>Rust transport:</b></p>
<pre><code>./ctrl/client.sh --rust [server_addr] # default mcrndeb:4447</code></pre>
<h3>Sync</h3>
<p>Both machines share the same source tree; <code>ctrl/sync.sh</code> rsyncs from the dev host to <code>mcrndeb</code>. The receiver's filesystem is also bind-mounted at <code>~/mcrn</code> on the dev host for quick file access.</p>
<h3>Inside the GUI</h3>
<ul>
<li><b>Frames panel</b> — click to select; <code>←/→</code> navigate.</li>
<li><b>Transcript panel</b> — click to select; <code>↑/↓</code> navigate; <code>Shift</code> to extend.</li>
<li><b>Enter</b> — sends <code>answer</code> + selected refs to the agent.</li>
<li><b>Describe / Answer</b> buttons — same idea, single-word verb prepended.</li>
<li><b>Agent input</b> — type freely; <code>@F1-3</code> and <code>@T5</code> attach refs.</li>
<li><b>Esc</b> — clear selection. <b>Del</b> — clear agent output.</li>
<li><b>Ctrl+R</b> — manual segment cut.</li>
</ul>
<h3>Agent provider</h3>
<p>Resolution order in <code>cht/agent/runner.py</code>:</p>
<ul>
<li><code>GROQ_API_KEY</code> → OpenAI-compatible client against Groq.</li>
<li><code>OPENAI_API_KEY</code> → OpenAI / OpenAI-compatible.</li>
<li>(default) → Claude Code SDK using your local CC subscription.</li>
</ul>
</div>
</section>
<section id="system" class="graph-section">
<h2>SYSTEM ARCHITECTURE</h2>
<p>End-to-end view: sender capture → network → receiver record + analyse → GUI + agent. Both transports converge on the same on-disk session layout.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/system.svg"><img src="graphs/system.svg" alt="System architecture"></a>
</div>
<div class="legend">
<span class="python">Python</span>
<span class="rust">Rust</span>
<span class="hw">Hardware / external</span>
<span class="fs">Filesystem</span>
</div>
</section>
<section id="python" class="graph-section">
<h2>PYTHON PIPELINE</h2>
<p>Default mode. Bash + ffmpeg CLI on the sender; <code>StreamRecorder</code> + <code>SessionProcessor</code> in <code>cht/stream/</code> on the receiver. Scene detection rides the recorder's <code>ffmpeg</code> stdout pipe — sub-second latency, no extra process.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/python_pipeline.svg"><img src="graphs/python_pipeline.svg" alt="Python pipeline"></a>
</div>
<div class="legend">
<span class="python">Python module</span>
<span class="rust">External binary (ffmpeg)</span>
<span class="hw">Hardware / OS source</span>
<span class="fs">Filesystem output</span>
</div>
</section>
<section id="rust_client" class="graph-section">
<h2>RUST CLIENT — sender</h2>
<p><code>media/client/</code> — replaces <code>sender/stream_av.sh</code> when running with <code>--rust</code>. Two backends: subprocess (default, wraps ffmpeg CLI) and an experimental direct VAAPI capture/encoder.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/rust_client.svg"><img src="graphs/rust_client.svg" alt="Rust client pipeline"></a>
</div>
</section>
<section id="rust_server" class="graph-section">
<h2>RUST SERVER — receiver</h2>
<p><code>media/server/</code> — replaces <code>StreamRecorder</code> when running with <code>--rust</code>. TCP listener with a typed <code>WirePacket</code> framing; routes Video/Audio/Control packets to ffmpeg recording, ADTS audio, and a Unix-socket scene relay.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/rust_server.svg"><img src="graphs/rust_server.svg" alt="Rust server pipeline"></a>
</div>
</section>
<section id="crates" class="graph-section">
<h2>RUST CRATES</h2>
<p>Cargo workspace under <code>media/</code>: three crates (<code>cht-common</code>, <code>cht-client</code>, <code>cht-server</code>) and their external deps. Designed to be reusable as a standalone tool — <code>mpr</code> is expected to depend on it too.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/crates.svg"><img src="graphs/crates.svg" alt="Rust crates"></a>
</div>
</section>
<section id="repo" class="graph-section">
<h2>REPOSITORY STRUCTURE</h2>
<p>Top-level layout. Python app under <code>cht/</code>; Rust transport under <code>media/</code>; sender bash under <code>sender/</code>; ops scripts under <code>ctrl/</code>.</p>
<div class="tree-container">
<pre class="repo-tree"><span class="t-root">cht/</span>
├── <span class="t-py">cht/</span> <span class="t-comment">Python app (GTK4 GUI, recording, transcribe, agent)</span>
│ ├── app.py · window.py <span class="t-comment">entrypoint + main window</span>
│ ├── config.py · session.py <span class="t-comment">app config, session manifest</span>
│ ├── stream/ <span class="t-comment">recorder · processor · tracker · lifecycle · ffmpeg helpers</span>
│ ├── audio/ <span class="t-comment">waveform engine</span>
│ ├── transcriber/ <span class="t-comment">faster-whisper engine</span>
│ ├── scrub/ <span class="t-comment">proxy manager (scrub-mode preview)</span>
│ ├── index/ <span class="t-comment">frame index helpers</span>
│ ├── agent/ <span class="t-comment">runner · base · tools · claude_sdk_connection · openai_connection</span>
│ └── ui/ <span class="t-comment">timeline · monitor · scrub_bar · frames_panel · transcript_panel</span>
<span class="t-comment">agent_input · agent_output · markdown · keyboard · mpv · waveform</span>
├── <span class="t-rust">media/</span> <span class="t-comment">Rust transport workspace (Cargo) — to be renamed once both modes coexist</span>
│ ├── common/ <span class="t-comment">cht-common — WirePacket, ControlMessage, logging</span>
│ ├── client/ <span class="t-comment">cht-client — sender (Wayland, VAAPI)</span>
│ ├── server/ <span class="t-comment">cht-server — receiver (TCP listener, ffmpeg fan-out)</span>
│ └── ctrl/ <span class="t-comment">build.sh · client.sh · server.sh</span>
├── <span class="t-dir">sender/</span> <span class="t-comment">Python-mode sender — stream_av.sh (bash watchdog around ffmpeg CLI)</span>
├── <span class="t-dir">ctrl/</span> <span class="t-comment">app.sh · server.sh · client.sh · sync.sh · bench.py · e2e_test.sh</span>
├── <span class="t-dir">tests/</span> <span class="t-comment">pytest suites — config · ffmpeg · manager · processor · timeline · tracker</span>
├── <span class="t-dir">data/</span> <span class="t-comment">runtime — sessions, active-session pointer (gitignored)</span>
├── <span class="t-dir">logs/</span> <span class="t-comment">runtime logs (gitignored)</span>
├── <span class="t-dir">docs/</span> <span class="t-comment">this site — index.html · viewer.html · graphs/ · render.sh</span>
└── pyproject.toml · uv.lock <span class="t-comment">Python deps via uv</span></pre>
</div>
</section>
<section id="notes" class="graph-section">
<h2>DESIGN NOTES</h2>
<p>Why some non-obvious choices look the way they do.</p>
<div class="prose">
<h3>Same on-disk layout from both transports</h3>
<p>The GUI, transcript, scene index, and agent never branch on transport mode — they only read files. The recording layout is the contract; the network protocol underneath is replaceable. This is what made the Rust port feasible without rewriting the analysis side.</p>
<h3>Scene detection lives in the recorder, not the processor</h3>
<p>In Python mode, scene-change frames come straight off the recorder's <code>ffmpeg</code> stdout pipe — sub-second, single process. Polling the fragmented MP4 from a separate process would add 35 s of disk-IPC latency. In Rust mode the same property is approximated by relaying raw H.264 over <code>scene.sock</code> to a separate ffmpeg, but that relay turns out to be the source of most current scene-detection pain (see <i>The scene detection saga</i> below).</p>
<h3>Why bother with the Rust port</h3>
<p>Two measured wins drove the work: connect time dropped from ~20 s (CLI ffmpeg startup + mpegts negotiation) to ~3 s (typed handshake), and session reload from disk dropped to 12 s. The Python recorder still works fine for development; the Rust path matters when you reconnect a lot.</p>
<h3>One-word verbs, no system prompt</h3>
<p>Pressing Enter sends <code>answer</code> + selected refs verbatim. There is no system prompt and no instruction template wrapping the message. If a question needs detail, the user types it — the model sees exactly what you'd see, not a contract you'd have to debug.</p>
<h3>Subprocess backend over a custom encoder</h3>
<p>The Rust client wraps the same <code>ffmpeg</code> CLI the Python sender uses, demuxes its NUT output in-process, and ships <code>EncodedPacket</code>s. Less code to own than a direct VAAPI encode path, and it inherits ffmpeg's robustness around odd Wayland/DRM transitions. The direct VAAPI backend exists but is experimental.</p>
<h3>Sender as a watchdog, not a daemon</h3>
<p>Python-mode <code>stream_av.sh</code> is a bash loop that restarts <code>ffmpeg</code> on stall (no progress for 10 s) and restarts immediately on the DRM-plane format change that fullscreen apps trigger. Cheaper and more reliable than building stall detection into a long-lived process.</p>
<h3>Struggles — the scene detection saga</h3>
<p>Scene detection is the part of the system that has fought back the hardest. The short version: <b>scene detection wants to live in the same ffmpeg process that does the decoding</b>, and every architecture change has had to relearn that.</p>
<h3>1. The "one behind" bug and the flush trick</h3>
<p>Original Python pipeline ran scene detection as a branch of the same <code>ffmpeg</code> that records: <code>select='gt(scene,T)'</code><code>showinfo</code> → MJPEG. The MJPEG encoder + muxer holds the selected frame in its internal buffer until <i>another</i> selected frame pushes it out — so the JPEG you receive at time <i>T</i> is actually the previous scene change, not the current one. Classic "one behind".</p>
<p>Workaround: a flush trick — select extra adjacent frames after each scene change so the real frame gets pushed through immediately (<code>SCENE_FLUSH_FRAMES</code>, see <code>cht/config.py</code>, used in <code>cht/stream/ffmpeg.py</code> :: <code>receive_record_relay_and_detect</code>). Worked reliably <b>only because everything was in one ffmpeg process</b>.</p>
<h3>2. The Rust relay broke it</h3>
<p>When transport moved to Rust, the recorder split into two processes: Rust-side ffmpeg writes fMP4 + UDP, and a separate Python-side ffmpeg consumes raw H.264 from <code>scene.sock</code> for scene detection. Two new failure modes appeared:</p>
<ul>
<li><b>The flush trick stopped flushing.</b> The MJPEG encoder behaves differently in a standalone pipe-fed ffmpeg vs. as a branch of a multi-output process — adjacent extra frames no longer reliably push the previous selection through.</li>
<li><b>Decoder corruption from dropped packets.</b> The Rust relay uses <code>try_send</code> with a 100 ms socket write timeout (<code>media/server/src/session.rs</code>). On any backpressure the relay drops H.264 packets, which corrupts the downstream decoder until the next keyframe — and missed keyframes mean missed scene detections.</li>
</ul>
<h3>3. Three dead ends</h3>
<ul>
<li><b>fMP4-tip extraction.</b> Trigger on showinfo, then extract the frame from the just-written fragmented MP4. Fragments only finalize at keyframe boundaries (~2 s with GOP 30), so <code>ffprobe</code> reports stale duration and the extracted frame comes from the <i>previous</i> scene.</li>
<li><b>Single Rust ffmpeg with mixed outputs.</b> The clean fix would be one ffmpeg in Rust doing record (<code>-c:v copy</code>) + relay (<code>-c:v copy</code>) + scene detect (decode + filter). It doesn't work — ffmpeg won't mix <code>-c:v copy</code> outputs with <code>-filter_complex</code> on a pipe input under <code>-hwaccel cuda</code>.</li>
<li><b>Tighter retry intervals on the extractor.</b> Dropping retry from 1 s to 0.3 s made things <i>worse</i> — concurrent ffmpeg processes thrashing the GPU rather than completing.</li>
</ul>
<h3>4. Where it actually landed</h3>
<p>Current working approach (Rust mode): the relay-fed scene detector fires <code>showinfo</code> with a timestamp, then Python extracts the frame from the recording file at <i>that</i> timestamp, with a wall-clock offset computed from the session-dir name. Reliable frames; ~1 s latency per scene from fMP4 fragment lag plus the per-extract ffmpeg spawn (~0.5 s). It's the system limping along until the proper fix lands. See <code>def/10-scene-detect-to-rust.md</code> and <code>def/ISSUES.md</code> R1, R3 for the full record.</p>
<div class="note"><b>Lesson.</b> The flush hack is a dead end in any pipe-fed context. Don't try to make it work over relay — move scene detection back into the same process that has the decoded frames. That's the only configuration that has ever been quiet.</div>
<h3>Future work</h3>
<h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">Near term — scene detection as a 3rd output of the Rust server's ffmpeg</h4>
<p>Spec: <code>def/10-scene-detect-to-rust.md</code>. Add a third branch to the existing ffmpeg the Rust server already runs:</p>
<ul>
<li>Output 1: <code>-c:v copy</code> → fMP4 (unchanged)</li>
<li>Output 2: <code>-c:v copy</code> → UDP relay (unchanged)</li>
<li>Output 3: CUDA decode → <code>select='gt(scene,T)'</code><code>showinfo</code> → MJPEG out a second pipe / second Unix socket</li>
</ul>
<p>This restores the single-process invariant — scene detection sees the same decoded frames as the recording branch, the flush behavior matches, no relay packet drops. Removes <code>detect_scenes_from_pipe()</code> in <code>cht/stream/ffmpeg.py</code>, the stdin-feeder thread in <code>cht/stream/processor.py</code>, and <code>scene_relay_task</code> in <code>media/server/src/session.rs</code>.</p>
<p>Adjacent improvements once that lands:</p>
<ul>
<li><b>Long-running extractor.</b> Keep one ffmpeg open and pipe seek commands rather than spawning per frame — eliminates the ~0.5 s startup hit.</li>
<li><b>PTS on the wire.</b> Have the Rust server send recording PTS alongside scene events so Python doesn't have to guess a wall-clock offset from the session-dir name (which is also why the first scene frame currently lands 710 s late in Rust mode — <code>def/ISSUES.md</code> R1).</li>
</ul>
<h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">End goal — in-process libav filter graph</h4>
<p>Spec: <code>def/09-media-transport.md</code>. Rust server decodes via NVDEC, runs the scene filter in-process via the libav API, and writes JPEGs directly. No ffmpeg subprocess, no pipe, no relay, no extraction — scene-to-frame latency drops to near zero. The 3rd-output step above is the bridge: same single-process discipline, easier to land, and a clean rewrite target once it works.</p>
<p>Other items deferred to that broader port:</p>
<ul>
<li><b>Frame buffer / fast scrub.</b> GPU ring buffer of the last N decoded frames exposed over shared memory to the Python scrub UI — replaces the mpv proxy MJPEG hack (see <code>def/07-scrub-perf-ceiling.md</code>).</li>
<li><b>Typed control protocol.</b> The current <code>WirePacket</code> framing covers session lifecycle but not parameter changes; spec 09 sketches a control-message channel for things like live <code>scene_threshold</code> updates and reconnect-with-PTS.</li>
<li><b>Audio in the live UDP relay.</b> Rust mode currently has no audio in the live monitor (<code>def/ISSUES.md</code> R2) because the server's ffmpeg only takes video on its stdin. Resolved naturally once the server's ffmpeg also receives the audio track.</li>
</ul>
</div>
</section>
</main>
</div>
<script>
function show(id) {
document.querySelectorAll('.graph-section').forEach(s => s.classList.remove('active'));
document.querySelectorAll('nav a').forEach(a => a.classList.remove('active'));
document.getElementById(id).classList.add('active');
var navLink = document.querySelector('nav a[onclick="show(\'' + id + '\')"]');
if (navLink) navLink.classList.add('active');
document.querySelector('.layout').classList.remove('nav-open');
}
function toggleNav() {
document.querySelector('.layout').classList.toggle('nav-open');
}
</script>
</body>
</html>

View File

@@ -1,21 +1,18 @@
#!/bin/bash #!/bin/bash
# Re-render all Graphviz diagrams to SVG. # Re-render all Graphviz diagrams to SVG.
# Run this after each phase when .dot files are updated. # Run after editing any .dot file under docs/graphs/.
# Usage: ./docs.sh # Usage: ./render.sh
set -euo pipefail set -euo pipefail
DOCS_DIR="$(cd "$(dirname "$0")/../docs" && pwd)" GRAPHS_DIR="$(cd "$(dirname "$0")/graphs" && pwd)"
if ! command -v dot &>/dev/null; then if ! command -v dot &>/dev/null; then
echo "graphviz not found — install with: sudo apt install graphviz" >&2 echo "graphviz not found — install with: sudo apt install graphviz" >&2
exit 1 exit 1
fi fi
for f in "$DOCS_DIR"/*.dot; do for f in "$GRAPHS_DIR"/*.dot; do
svg="${f%.dot}.svg" svg="${f%.dot}.svg"
echo "==> $(basename "$f")$(basename "$svg")" echo "==> $(basename "$f")$(basename "$svg")"
dot -Tsvg "$f" -o "$svg" dot -Tsvg "$f" -o "$svg"
done done
echo "==> done. Serving at http://localhost:9099 (ctrl-c to stop)"
cd "$DOCS_DIR" && python3 -m http.server 9099

97
docs/viewer.html Normal file
View File

@@ -0,0 +1,97 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Graph Viewer</title>
<style>
* { margin: 0; padding: 0; }
body {
background: #1e1e2e;
overflow: hidden;
width: 100vw;
height: 100vh;
}
#container {
width: 100vw;
height: 100vh;
overflow: hidden;
cursor: grab;
}
#container.dragging { cursor: grabbing; }
img {
transform-origin: 0 0;
user-select: none;
-webkit-user-drag: none;
}
</style>
</head>
<body>
<div id="container">
<img id="img" />
</div>
<script>
var src = new URLSearchParams(location.search).get('src');
var img = document.getElementById('img');
var container = document.getElementById('container');
img.src = src;
var scale = 1;
var x = 0, y = 0;
var dragging = false;
var startX, startY, startPanX, startPanY;
function apply() {
img.style.transform = 'translate(' + x + 'px,' + y + 'px) scale(' + scale + ')';
}
img.onload = function() {
var sw = window.innerWidth / img.naturalWidth;
var sh = window.innerHeight / img.naturalHeight;
scale = Math.min(sw, sh) * 0.95;
x = (window.innerWidth - img.naturalWidth * scale) / 2;
y = (window.innerHeight - img.naturalHeight * scale) / 2;
apply();
};
container.addEventListener('wheel', function(e) {
e.preventDefault();
var factor = e.deltaY < 0 ? 1.12 : 0.89;
var rect = container.getBoundingClientRect();
var mx = e.clientX - rect.left;
var my = e.clientY - rect.top;
x = mx - (mx - x) * factor;
y = my - (my - y) * factor;
scale *= factor;
apply();
}, { passive: false });
container.addEventListener('mousedown', function(e) {
if (e.button !== 0) return;
dragging = true;
startX = e.clientX;
startY = e.clientY;
startPanX = x;
startPanY = y;
container.classList.add('dragging');
e.preventDefault();
});
window.addEventListener('mousemove', function(e) {
if (!dragging) return;
x = startPanX + (e.clientX - startX);
y = startPanY + (e.clientY - startY);
apply();
});
window.addEventListener('mouseup', function() {
dragging = false;
container.classList.remove('dragging');
});
container.addEventListener('dblclick', function() {
img.onload();
});
</script>
</body>
</html>

366
logs/client.log Normal file

File diff suppressed because one or more lines are too long

View File

@@ -1,13 +1,16 @@
//! Subprocess backend: spawn ffmpeg CLI for capture+encode. //! Subprocess backend: spawn ffmpeg CLI for capture+encode.
//! //!
//! Spawns ffmpeg with the same hardware pipeline as `stream_av.sh`: //! Spawns ffmpeg with the same hardware pipeline as `stream_av.sh`:
//! kmsgrab → hwmap=derive_device=vaapi → scale_vaapi → h264_vaapi //! kmsgrab -vblank_source vsync → hwmap=derive_device=vaapi → scale_vaapi → h264_vaapi
//! + PulseAudio desktop audio + mic → amix → AAC //! + PulseAudio desktop audio + mic → amix → AAC
//! //!
//! -vblank_source vsync forces a frame grab on every display vblank, regardless
//! of page flips. Without it, kmsgrab only grabs when the compositor flips a
//! new buffer — a static/slow screen yields 1fps.
//!
//! ffmpeg outputs NUT format to stdout. We demux that pipe with ffmpeg-next //! ffmpeg outputs NUT format to stdout. We demux that pipe with ffmpeg-next
//! to get proper AVPackets (keyframe flags, timestamps) without parsing //! to get proper AVPackets (keyframe flags, timestamps) without parsing
//! bytestreams. NUT is lighter than mpegts — no TS overhead, exact packet //! bytestreams.
//! metadata in the container layer.
use std::os::fd::AsRawFd; use std::os::fd::AsRawFd;
use std::os::unix::io::RawFd; use std::os::unix::io::RawFd;
@@ -20,6 +23,48 @@ use tracing::{error, info, warn};
use crate::encoder::{EncodedPacket, MediaType}; use crate::encoder::{EncodedPacket, MediaType};
/// Check if H.264 data contains an IDR (keyframe) NAL unit.
/// Scans for NAL start codes (00 00 01 or 00 00 00 01) and checks
/// the NAL type (lower 5 bits). Type 5 = IDR slice.
pub fn h264_is_keyframe(data: &[u8]) -> bool {
let mut i = 0;
while i + 3 < data.len() {
if data[i] == 0 && data[i + 1] == 0 {
let (nal_byte, skip) = if data[i + 2] == 1 {
(data.get(i + 3), 4)
} else if data[i + 2] == 0 && i + 4 < data.len() && data[i + 3] == 1 {
(data.get(i + 4), 5)
} else {
(None, 1)
};
if let Some(&b) = nal_byte {
let nal_type = b & 0x1F;
if nal_type == 5 {
return true;
}
}
i += skip;
} else {
i += 1;
}
}
false
}
/// Strip ADTS header from AAC data if present. Returns raw AAC frame.
/// ADTS header is 7 bytes (no CRC) or 9 bytes (with CRC).
fn strip_adts(data: &[u8]) -> Vec<u8> {
if data.len() >= 7 && data[0] == 0xFF && (data[1] & 0xF0) == 0xF0 {
let has_crc = (data[1] & 0x01) == 0; // protection_absent=0 means CRC present
let header_len = if has_crc { 9 } else { 7 };
if data.len() > header_len {
return data[header_len..].to_vec();
}
}
data.to_vec()
}
pub struct SubprocessConfig { pub struct SubprocessConfig {
pub device: String, pub device: String,
pub fps: u32, pub fps: u32,
@@ -66,6 +111,23 @@ pub fn run(
// Keep stdout alive for the duration of demuxing. // Keep stdout alive for the duration of demuxing.
let _stdout_guard = stdout; let _stdout_guard = stdout;
// Watch for stop flag on a separate thread and kill ffmpeg to unblock
// the packet iterator (which is a blocking read on the pipe fd).
let stop_watcher = stop.clone();
let child_pid = child.id();
std::thread::Builder::new()
.name("ffmpeg-stop-watcher".into())
.spawn(move || {
while !stop_watcher.load(Ordering::Relaxed) {
std::thread::sleep(std::time::Duration::from_millis(100));
}
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
let _ = kill(Pid::from_raw(child_pid as i32), Signal::SIGINT);
info!("Stop watcher: sent SIGINT to ffmpeg pid={child_pid}");
})
.expect("spawn stop watcher");
let result = demux_and_send(fd, packet_tx, stop, &mut child); let result = demux_and_send(fd, packet_tx, stop, &mut child);
// Clean up subprocess regardless of result. // Clean up subprocess regardless of result.
@@ -129,6 +191,9 @@ fn detect_default_source(pulse_server: &str) -> Option<String> {
fn spawn_ffmpeg(cfg: &SubprocessConfig) -> Result<Child> { fn spawn_ffmpeg(cfg: &SubprocessConfig) -> Result<Child> {
let audio = detect_audio_sources(); let audio = detect_audio_sources();
// fps filter after scale_vaapi pads/duplicates frames to fill gaps when
// kmsgrab captures fewer frames than the target rate (e.g. compositor
// skips flips on static content). Keeps the output stream at a stable fps.
let filter = format!( let filter = format!(
"hwmap=derive_device=vaapi,scale_vaapi=w={}:h={}:format=nv12,fps={}", "hwmap=derive_device=vaapi,scale_vaapi=w={}:h={}:format=nv12,fps={}",
cfg.width, cfg.height, cfg.fps, cfg.width, cfg.height, cfg.fps,
@@ -139,7 +204,10 @@ fn spawn_ffmpeg(cfg: &SubprocessConfig) -> Result<Child> {
"-init_hw_device".into(), format!("drm=drm:{}", cfg.device), "-init_hw_device".into(), format!("drm=drm:{}", cfg.device),
"-init_hw_device".into(), "vaapi=va@drm".into(), "-init_hw_device".into(), "vaapi=va@drm".into(),
// Video input (kmsgrab) // Video input (kmsgrab)
"-thread_queue_size".into(), "64".into(), // -vblank_source vsync: grab on every display vblank, not just page flips.
// Without this, a static screen (e.g. talking-head meeting) gives 1fps
// because the compositor rarely flips a new buffer.
"-thread_queue_size".into(), "512".into(),
"-device".into(), cfg.device.clone(), "-device".into(), cfg.device.clone(),
"-f".into(), "kmsgrab".into(), "-f".into(), "kmsgrab".into(),
"-framerate".into(), cfg.fps.to_string(), "-framerate".into(), cfg.fps.to_string(),
@@ -298,9 +366,14 @@ fn demux_and_send(
} }
} else if let Some((audio_idx, audio_tb_num, audio_tb_den)) = audio_info { } else if let Some((audio_idx, audio_tb_num, audio_tb_den)) = audio_info {
if stream_idx == audio_idx { if stream_idx == audio_idx {
// Strip ADTS header if present — normalize to raw AAC on the wire.
// mpegts backends (e.g. gpu-screen-recorder) wrap AAC in ADTS;
// NUT (ffmpeg) sends raw AAC. Stripping here makes the wire
// format consistent regardless of capture backend.
let audio_data = strip_adts(&data);
let encoded = EncodedPacket { let encoded = EncodedPacket {
media_type: MediaType::Audio, media_type: MediaType::Audio,
data, data: audio_data,
pts: packet.pts().unwrap_or(0), pts: packet.pts().unwrap_or(0),
dts: packet.dts().unwrap_or(0), dts: packet.dts().unwrap_or(0),
keyframe: packet.is_key(), keyframe: packet.is_key(),

View File

@@ -58,6 +58,7 @@ async fn main() -> Result<()> {
sample_rate: 48000, sample_rate: 48000,
channels: 2, channels: 2,
codec: "aac".into(), codec: "aac".into(),
framing: "raw".into(),
}, },
}; };
protocol::write_packet(&mut writer, &session_start.to_wire_packet()?).await?; protocol::write_packet(&mut writer, &session_start.to_wire_packet()?).await?;

View File

@@ -10,3 +10,6 @@ serde_json = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
tracing-subscriber = { workspace = true } tracing-subscriber = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt"] }

View File

@@ -166,6 +166,14 @@ pub struct AudioParams {
pub sample_rate: u32, pub sample_rate: u32,
pub channels: u16, pub channels: u16,
pub codec: String, pub codec: String,
/// Audio framing on the wire: "raw" (no container headers) or "adts".
/// Default "raw" — client strips ADTS before sending.
#[serde(default = "default_framing")]
pub framing: String,
}
fn default_framing() -> String {
"raw".into()
} }
impl ControlMessage { impl ControlMessage {
@@ -231,6 +239,7 @@ mod tests {
sample_rate: 48000, sample_rate: 48000,
channels: 2, channels: 2,
codec: "aac".into(), codec: "aac".into(),
framing: "raw".into(),
}, },
}; };
let wire = msg.to_wire_packet().unwrap(); let wire = msg.to_wire_packet().unwrap();
@@ -247,6 +256,117 @@ mod tests {
} }
} }
// -- P1: Audio packet round-trip --
#[test]
fn audio_packet_round_trip() {
let header = PacketHeader {
packet_type: PacketType::Audio,
flags: 0,
length: 256,
timestamp_ns: 500_000_000,
};
let bytes = header.to_bytes();
let decoded = PacketHeader::from_bytes(&bytes).unwrap();
assert_eq!(decoded.packet_type, PacketType::Audio);
assert!(!decoded.is_keyframe());
assert_eq!(decoded.length, 256);
assert_eq!(decoded.timestamp_ns, 500_000_000);
}
// -- P1: Partial / truncated reads --
#[test]
fn header_rejects_unknown_packet_type() {
let mut bytes = [0u8; HEADER_SIZE];
bytes[0] = 99; // not 0, 1, or 2
assert!(PacketHeader::from_bytes(&bytes).is_err());
}
#[test]
fn header_at_max_payload_size_is_accepted() {
let mut bytes = [0u8; HEADER_SIZE];
bytes[4..8].copy_from_slice(&MAX_PAYLOAD_SIZE.to_le_bytes());
assert!(PacketHeader::from_bytes(&bytes).is_ok());
}
#[tokio::test]
async fn read_packet_truncated_header_returns_error() {
// Feed only 8 bytes (half a header) — read_exact should fail.
let short_buf: &[u8] = &[0u8; 8];
let mut reader = tokio::io::BufReader::new(short_buf);
let result = read_packet(&mut reader).await;
assert!(result.is_err(), "expected error on truncated header");
}
#[tokio::test]
async fn read_packet_truncated_payload_returns_error() {
// Write a valid header claiming 100 bytes of payload, but only provide 50.
let header = PacketHeader {
packet_type: PacketType::Video,
flags: FLAG_KEYFRAME,
length: 100,
timestamp_ns: 0,
};
let mut buf = Vec::new();
buf.extend_from_slice(&header.to_bytes());
buf.extend_from_slice(&[0u8; 50]); // only 50 of the promised 100
let mut reader = tokio::io::BufReader::new(buf.as_slice());
let result = read_packet(&mut reader).await;
assert!(result.is_err(), "expected error on truncated payload");
}
#[tokio::test]
async fn write_then_read_round_trip() {
let payload: Vec<u8> = (0u8..128).collect();
let packet = WirePacket {
header: PacketHeader {
packet_type: PacketType::Audio,
flags: 0,
length: payload.len() as u32,
timestamp_ns: 999_999_999,
},
payload: payload.clone(),
};
let mut buf = Vec::new();
write_packet(&mut buf, &packet).await.unwrap();
let mut reader = tokio::io::BufReader::new(buf.as_slice());
let decoded = read_packet(&mut reader).await.unwrap();
assert_eq!(decoded.header.packet_type, PacketType::Audio);
assert_eq!(decoded.header.timestamp_ns, 999_999_999);
assert_eq!(decoded.payload, payload);
}
// -- P1: Timestamp monotonicity helpers --
#[test]
fn zero_timestamp_is_valid() {
let header = PacketHeader {
packet_type: PacketType::Video,
flags: FLAG_KEYFRAME,
length: 0,
timestamp_ns: 0,
};
let decoded = PacketHeader::from_bytes(&header.to_bytes()).unwrap();
assert_eq!(decoded.timestamp_ns, 0);
}
#[test]
fn max_timestamp_round_trips() {
let header = PacketHeader {
packet_type: PacketType::Video,
flags: 0,
length: 0,
timestamp_ns: u64::MAX,
};
let decoded = PacketHeader::from_bytes(&header.to_bytes()).unwrap();
assert_eq!(decoded.timestamp_ns, u64::MAX);
}
#[test] #[test]
fn all_control_variants_serialize() { fn all_control_variants_serialize() {
let messages = vec![ let messages = vec![

View File

@@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# Build and run the media client (sender) # Build and run the media client (sender)
# Requires DRM master access — runs under sudo unless already root. # Requires DRM master access — runs under sudo unless already root.
# Usage: ./client.sh [server_addr] e.g. ./client.sh mcrndeb:4444 # Usage: ./client.sh [server_addr] e.g. ./client.sh mcrndeb:4447
set -euo pipefail set -euo pipefail
MEDIA_DIR="$(cd "$(dirname "$0")/.." && pwd)" MEDIA_DIR="$(cd "$(dirname "$0")/.." && pwd)"

View File

@@ -1,168 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Media Transport — Architecture</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
display: flex;
height: 100vh;
font-family: monospace;
background: #1e1e2e;
color: #cdd6f4;
}
nav {
width: 220px;
min-width: 220px;
background: #181825;
border-right: 1px solid #313244;
display: flex;
flex-direction: column;
padding: 1rem 0;
}
nav h1 {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.1em;
color: #6c7086;
padding: 0 1rem 0.75rem;
border-bottom: 1px solid #313244;
margin-bottom: 0.5rem;
}
nav a {
display: block;
padding: 0.5rem 1rem;
color: #cdd6f4;
text-decoration: none;
font-size: 0.85rem;
border-left: 3px solid transparent;
transition: background 0.1s, border-color 0.1s;
}
nav a:hover { background: #313244; }
nav a.active { border-left-color: #89b4fa; color: #89b4fa; background: #1e2d3e; }
nav .subtitle {
font-size: 0.7rem;
color: #6c7086;
padding: 0 1rem;
margin-top: 0.25rem;
}
nav .phase-badge {
font-size: 0.65rem;
color: #a6e3a1;
float: right;
}
nav .section {
font-size: 0.65rem;
text-transform: uppercase;
letter-spacing: 0.08em;
color: #6c7086;
padding: 1rem 1rem 0.25rem;
}
main {
flex: 1;
display: flex;
flex-direction: column;
overflow: hidden;
}
header {
padding: 0.75rem 1.25rem;
background: #181825;
border-bottom: 1px solid #313244;
display: flex;
align-items: baseline;
gap: 0.75rem;
}
header h2 { font-size: 0.95rem; }
header .desc { font-size: 0.75rem; color: #6c7086; }
.viewer {
flex: 1;
overflow: auto;
padding: 1.5rem;
display: flex;
align-items: flex-start;
justify-content: center;
background: #1e1e2e;
}
.viewer object,
.viewer img {
max-width: 100%;
border-radius: 6px;
box-shadow: 0 4px 24px rgba(0,0,0,0.5);
}
.placeholder {
color: #6c7086;
font-size: 0.85rem;
margin-top: 4rem;
}
</style>
</head>
<body>
<nav>
<h1>Media Transport</h1>
<div class="section">Workspace</div>
<a href="#" data-svg="crates.svg" data-title="Crate Dependency Graph" data-desc="Workspace members and external deps">
Crate graph
</a>
<div class="section">Client (sender)</div>
<a href="#" data-svg="client-pipeline.svg" data-title="Client Pipeline" data-desc="KMS capture + PulseAudio → VAAPI H.264 + AAC → TCP transport">
Pipeline
</a>
<div class="section">Server (receiver)</div>
<a href="#" data-svg="server-pipeline.svg" data-title="Server Pipeline" data-desc="fMP4 recording, UDP live relay, scene detection (UDS → Python), audio extraction">
Pipeline
</a>
</nav>
<main>
<header>
<h2 id="title">Select a diagram</h2>
<span class="desc" id="desc"></span>
</header>
<div class="viewer" id="viewer">
<p class="placeholder">← pick a diagram from the sidebar</p>
</div>
</main>
<script>
const viewer = document.getElementById('viewer');
const titleEl = document.getElementById('title');
const descEl = document.getElementById('desc');
document.querySelectorAll('nav a').forEach(link => {
link.addEventListener('click', e => {
e.preventDefault();
document.querySelectorAll('nav a').forEach(l => l.classList.remove('active'));
link.classList.add('active');
titleEl.textContent = link.dataset.title;
descEl.textContent = link.dataset.desc;
// Use <object> so SVG internal text/links work
viewer.innerHTML = `<object type="image/svg+xml" data="${link.dataset.svg}"></object>`;
});
});
// Auto-select first
document.querySelector('nav a').click();
</script>
</body>
</html>

View File

@@ -98,9 +98,9 @@ async fn handle_client(
info!("control: {ctrl:?}"); info!("control: {ctrl:?}");
match ctrl { match ctrl {
ControlMessage::SessionStart { id, video, .. } => { ControlMessage::SessionStart { id, video, audio } => {
let s = tokio::task::block_in_place(|| { let s = tokio::task::block_in_place(|| {
Session::start(&id, &sessions_dir, video.fps) Session::start(&id, &sessions_dir, video.fps, &audio)
})?; })?;
session = Some(s); session = Some(s);
} }

View File

@@ -19,6 +19,7 @@ use std::process::{Child, ChildStdin, Command, Stdio};
use std::thread; use std::thread;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use cht_common::protocol::AudioParams;
use tokio::io::AsyncWriteExt; use tokio::io::AsyncWriteExt;
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
@@ -33,6 +34,28 @@ struct ScenePacket {
keyframe: bool, keyframe: bool,
} }
/// ADTS configuration derived from AudioParams at session start.
struct AdtsConfig {
/// Whether to wrap audio with ADTS headers (false if client sends ADTS).
wrap: bool,
sr_idx: u8,
ch_cfg: u8,
}
impl AdtsConfig {
fn from_params(params: &AudioParams) -> Self {
let wrap = params.framing == "raw";
let sr_idx = match params.sample_rate {
96000 => 0, 88200 => 1, 64000 => 2, 48000 => 3,
44100 => 4, 32000 => 5, 24000 => 6, 22050 => 7,
16000 => 8, 12000 => 9, 11025 => 10, 8000 => 11,
_ => 3, // default 48kHz
};
let ch_cfg = params.channels.min(7) as u8;
Self { wrap, sr_idx, ch_cfg }
}
}
pub struct Session { pub struct Session {
#[allow(dead_code)] #[allow(dead_code)]
session_dir: PathBuf, session_dir: PathBuf,
@@ -40,13 +63,14 @@ pub struct Session {
ffmpeg: Child, ffmpeg: Child,
video_stdin: Option<ChildStdin>, video_stdin: Option<ChildStdin>,
audio_file: Option<File>, audio_file: Option<File>,
audio_config: AdtsConfig,
scene_tx: Option<tokio::sync::mpsc::Sender<ScenePacket>>, scene_tx: Option<tokio::sync::mpsc::Sender<ScenePacket>>,
#[allow(dead_code)] #[allow(dead_code)]
fps: u32, fps: u32,
} }
impl Session { impl Session {
pub fn start(session_id: &str, sessions_dir: &Path, fps: u32) -> Result<Self> { pub fn start(session_id: &str, sessions_dir: &Path, fps: u32, audio_params: &AudioParams) -> Result<Self> {
let active_session_file = sessions_dir let active_session_file = sessions_dir
.parent() .parent()
.unwrap_or(sessions_dir) .unwrap_or(sessions_dir)
@@ -63,6 +87,8 @@ impl Session {
let mut child = Command::new("ffmpeg") let mut child = Command::new("ffmpeg")
.args([ .args([
"-fflags", "nobuffer",
"-flags", "low_delay",
"-f", "h264", "-f", "h264",
"-framerate", &fps.to_string(), "-framerate", &fps.to_string(),
"-i", "pipe:0", "-i", "pipe:0",
@@ -75,6 +101,7 @@ impl Session {
// UDP relay for live display // UDP relay for live display
"-c:v", "copy", "-c:v", "copy",
"-f", "mpegts", "-f", "mpegts",
"-flush_packets", "1",
RELAY_URL, RELAY_URL,
"-hide_banner", "-loglevel", "warning", "-hide_banner", "-loglevel", "warning",
]) ])
@@ -109,8 +136,10 @@ impl Session {
None None
}); });
// Scene relay: Unix socket for Python scene detection. // Scene relay: Unix socket at data/scene.sock (fixed path).
let socket_path = stream_dir.join(SCENE_SOCKET_NAME); // Python always connects here — no need to discover per-session paths.
let data_dir = sessions_dir.parent().unwrap_or(sessions_dir);
let socket_path = data_dir.join(SCENE_SOCKET_NAME);
let (scene_tx, scene_rx) = tokio::sync::mpsc::channel(32); let (scene_tx, scene_rx) = tokio::sync::mpsc::channel(32);
info!("Scene relay: spawning for {}", socket_path.display()); info!("Scene relay: spawning for {}", socket_path.display());
tokio::spawn(scene_relay_task(socket_path, scene_rx)); tokio::spawn(scene_relay_task(socket_path, scene_rx));
@@ -129,6 +158,7 @@ impl Session {
ffmpeg: child, ffmpeg: child,
video_stdin: Some(video_stdin), video_stdin: Some(video_stdin),
audio_file, audio_file,
audio_config: AdtsConfig::from_params(audio_params),
scene_tx: Some(scene_tx), scene_tx: Some(scene_tx),
fps, fps,
}) })
@@ -147,9 +177,13 @@ impl Session {
pub fn write_audio(&mut self, data: &[u8]) -> Result<()> { pub fn write_audio(&mut self, data: &[u8]) -> Result<()> {
if let Some(f) = &mut self.audio_file { if let Some(f) = &mut self.audio_file {
// Wrap raw AAC frame with ADTS header so the file is playable/parseable. if self.audio_config.wrap {
// Assumes AAC-LC, 48kHz, stereo (matches client's encoder config). // Client sends raw AAC — wrap with ADTS using declared params.
write_adts_frame(f, data)?; write_adts_frame(f, data, &self.audio_config)?;
} else {
// Client sends ADTS-framed audio — write as-is.
f.write_all(data).context("write ADTS audio")?;
}
} }
Ok(()) Ok(())
} }
@@ -169,8 +203,13 @@ impl Session {
Ok(s) => info!("ffmpeg recorder exited: {s}"), Ok(s) => info!("ffmpeg recorder exited: {s}"),
Err(e) => warn!("ffmpeg recorder wait error: {e}"), Err(e) => warn!("ffmpeg recorder wait error: {e}"),
} }
// Clear the active session marker. // Clear the active session marker only if it still points to our session.
let _ = fs::remove_file(&self.active_session_file); // Another session may have overwritten it if the server restarted.
if let Ok(content) = fs::read_to_string(&self.active_session_file) {
if content.trim() == self.session_dir.to_str().unwrap_or("") {
let _ = fs::remove_file(&self.active_session_file);
}
}
} }
} }
@@ -285,27 +324,23 @@ async fn scene_relay_task(
/// Write a raw AAC frame wrapped in a 7-byte ADTS header. /// Write a raw AAC frame wrapped in a 7-byte ADTS header.
/// ///
/// Fixed params: AAC-LC profile, 48 kHz sample rate, 2 channels (stereo). /// Uses sample rate and channel count from the session's AudioParams
/// These match the client's `-c:a aac -b:a 128k` default config. /// rather than hardcoded values, so any backend can declare its format.
fn write_adts_frame(w: &mut impl Write, aac_data: &[u8]) -> Result<()> { fn write_adts_frame(w: &mut impl Write, aac_data: &[u8], cfg: &AdtsConfig) -> Result<()> {
// ADTS fixed header fields: const PROFILE_MINUS1: u8 = 1; // AAC-LC (object_type 2, stored as 2-1=1)
// profile: AAC-LC = 1 (stored as profile-1 = 0 in MPEG-4 ID mode)
// sample_rate: 48000 → index 3
// channels: 2 → channel_configuration 2
const PROFILE_MINUS1: u8 = 1; // AAC-LC
const SR_IDX: u8 = 3; // 48 kHz
const CH_CFG: u8 = 2; // stereo
let frame_len = (aac_data.len() + 7) as u16; // total ADTS frame = header + payload let sr_idx = cfg.sr_idx;
let ch_cfg = cfg.ch_cfg;
let frame_len = (aac_data.len() + 7) as u16;
let header: [u8; 7] = [ let header: [u8; 7] = [
// byte 0-1: syncword(12) | ID(1)=0(MPEG4) | layer(2)=0 | protection(1)=1(no CRC) // byte 0-1: syncword(12) | ID(1)=0(MPEG4) | layer(2)=0 | protection(1)=1(no CRC)
0xFF, 0xFF,
0xF1, 0xF1,
// byte 2: profile(2) | sr_idx(4) | private(1)=0 | ch_cfg[2](1) // byte 2: profile(2) | sr_idx(4) | private(1)=0 | ch_cfg[2](1)
(PROFILE_MINUS1 << 6) | (SR_IDX << 2) | ((CH_CFG >> 2) & 1), (PROFILE_MINUS1 << 6) | (sr_idx << 2) | ((ch_cfg >> 2) & 1),
// byte 3: ch_cfg[1:0](2) | orig(1)=0 | home(1)=0 | copyright_id(1)=0 | copyright_start(1)=0 | frame_len[12:11](2) // byte 3: ch_cfg[1:0](2) | orig(1)=0 | home(1)=0 | copyright_id(1)=0 | copyright_start(1)=0 | frame_len[12:11](2)
((CH_CFG & 3) << 6) | ((frame_len >> 11) as u8 & 0x03), ((ch_cfg & 3) << 6) | ((frame_len >> 11) as u8 & 0x03),
// byte 4: frame_len[10:3](8) // byte 4: frame_len[10:3](8)
((frame_len >> 3) & 0xFF) as u8, ((frame_len >> 3) & 0xFF) as u8,
// byte 5: frame_len[2:0](3) | buffer_fullness[10:6](5) // byte 5: frame_len[2:0](3) | buffer_fullness[10:6](5)

View File

@@ -0,0 +1,38 @@
{
"duration_s": 30,
"interval_s": 5,
"num_scenes": 6,
"video_path": "/home/mariano/wdir/cht/tests/fixtures/test_scene_30s.mp4",
"scenes": [
{
"scene_index": 0,
"timestamp_s": 0,
"color_hex": "FF0000"
},
{
"scene_index": 1,
"timestamp_s": 5,
"color_hex": "0000FF"
},
{
"scene_index": 2,
"timestamp_s": 10,
"color_hex": "00FF00"
},
{
"scene_index": 3,
"timestamp_s": 15,
"color_hex": "FFFF00"
},
{
"scene_index": 4,
"timestamp_s": 20,
"color_hex": "FF00FF"
},
{
"scene_index": 5,
"timestamp_s": 25,
"color_hex": "00FFFF"
}
]
}

1
tests/fixtures/test_speech.ogg vendored Normal file
View File

@@ -0,0 +1 @@
File not found: /v1/AUTH_mw/wikipedia-commons-local-public.22/2/22/En-us-United_States_Constitution-Article_1-Section_1.ogg

View File

@@ -10,8 +10,10 @@ from cht.config import (
STREAM_HOST, STREAM_HOST,
STREAM_PORT, STREAM_PORT,
SCENE_THRESHOLD, SCENE_THRESHOLD,
MAX_FRAME_INTERVAL, SCENE_FLUSH_FRAMES,
SEGMENT_DURATION, SEGMENT_DURATION,
AUDIO_EXTRACT_INTERVAL,
AUDIO_SAFETY_MARGIN,
) )
@@ -41,8 +43,13 @@ def test_scene_threshold_range():
assert 0 < SCENE_THRESHOLD < 1 assert 0 < SCENE_THRESHOLD < 1
def test_max_frame_interval_positive(): def test_scene_flush_frames_non_negative():
assert MAX_FRAME_INTERVAL > 0 assert SCENE_FLUSH_FRAMES >= 0
def test_audio_intervals_positive():
assert AUDIO_EXTRACT_INTERVAL > 0
assert AUDIO_SAFETY_MARGIN > 0
def test_segment_duration_positive(): def test_segment_duration_positive():

View File

@@ -86,6 +86,105 @@ class TestRunAsync:
assert "tcp://0.0.0.0:4444?listen" in " ".join(cmd) assert "tcp://0.0.0.0:4444?listen" in " ".join(cmd)
class TestReceiveRecordRelayAndDetect:
"""P0 regression: single-process pipeline with 3 outputs + scene detection."""
def test_compiles_three_outputs(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
)
cmd_str = " ".join(str(c) for c in node.compile())
# fMP4 recording
assert "rec.mp4" in cmd_str
# UDP relay
assert "udp://127.0.0.1:4445" in cmd_str
# MJPEG pipe (scene detection output)
assert "pipe:1" in cmd_str
def test_fmp4_flags(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "frag_keyframe" in cmd_str
assert "empty_moov" in cmd_str
def test_scene_filter_uses_threshold(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
scene_threshold=0.25,
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "0.25" in cmd_str
assert "scene" in cmd_str
assert "showinfo" in cmd_str
def test_flush_expression_included_when_flush_frames_gt_0(self, tmp_path):
"""P0 regression: flush trick must be present to push real frame through buffer."""
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
flush_frames=2,
)
cmd_str = " ".join(str(c) for c in node.compile())
# Flush expression: eq(n,prev_selected_n+1)*mod(selected_n,N)
assert "prev_selected_n" in cmd_str
assert "mod" in cmd_str
def test_no_flush_expression_when_flush_frames_zero(self, tmp_path):
"""P0 regression: flush=0 should produce a clean scene-only filter."""
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
flush_frames=0,
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "prev_selected_n" not in cmd_str
def test_flush_mod_value_matches_flush_frames(self, tmp_path):
"""P0 regression: mod value must be flush_frames+1 to prevent chaining.
ffmpeg-python escapes commas in filtergraph as \\, so we check the
escaped form in the compiled command.
"""
for n in [1, 2, 3]:
node = ff.detect_scenes_from_pipe(flush_frames=n)
cmd_str = " ".join(str(c) for c in node.compile())
# Commas in filter expressions are escaped as \, in filtergraph
assert f"mod(selected_n\\,{n + 1})" in cmd_str
class TestDetectScenesFromPipe:
def test_reads_from_stdin(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "pipe:0" in cmd_str
def test_writes_mjpeg_to_stdout(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "pipe:1" in cmd_str
assert "mjpeg" in cmd_str
def test_includes_flush_expression(self):
node = ff.detect_scenes_from_pipe(flush_frames=2)
cmd_str = " ".join(str(c) for c in node.compile())
assert "prev_selected_n" in cmd_str
def test_h264_input_format(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "h264" in cmd_str
class TestStopProc: class TestStopProc:
def test_sends_sigint_then_waits(self): def test_sends_sigint_then_waits(self):
proc = MagicMock() proc = MagicMock()

View File

@@ -1,7 +1,6 @@
"""Tests for cht.stream.manager — StreamManager.""" """Tests for cht.stream.manager — StreamManager."""
import json import json
import time
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
import pytest import pytest
@@ -21,12 +20,15 @@ class TestInit:
def test_session_id_custom(self, manager): def test_session_id_custom(self, manager):
assert manager.session_id == "test_session" assert manager.session_id == "test_session"
def test_recording_path(self, manager): def test_recording_path_delegates_to_recorder(self, manager):
assert manager.recording_path.name == "recording_000.mp4" assert manager.recording_path.name == "recording_000.mp4"
def test_dirs_not_created_on_init(self, manager): def test_dirs_not_created_on_init(self, manager):
assert not manager.stream_dir.exists() assert not manager.stream_dir.exists()
def test_relay_url_from_recorder(self, manager):
assert "4445" in manager.relay_url
class TestSetupDirs: class TestSetupDirs:
def test_creates_all_subdirs(self, manager): def test_creates_all_subdirs(self, manager):
@@ -34,61 +36,74 @@ class TestSetupDirs:
assert manager.stream_dir.is_dir() assert manager.stream_dir.is_dir()
assert manager.frames_dir.is_dir() assert manager.frames_dir.is_dir()
assert manager.transcript_dir.is_dir() assert manager.transcript_dir.is_dir()
assert manager.audio_dir.is_dir()
assert manager.agent_dir.is_dir() assert manager.agent_dir.is_dir()
class TestStopAll: class TestStopAll:
@patch("cht.stream.manager.ff.stop_proc") def test_stop_all_calls_processor_and_recorder(self, manager):
def test_stops_all_procs(self, mock_stop, manager): manager.processor.stop = MagicMock()
proc = MagicMock() manager.recorder.stop = MagicMock()
manager._procs = {"recorder": proc}
manager.stop_all() manager.stop_all()
mock_stop.assert_called_with(proc) manager.processor.stop.assert_called_once()
assert len(manager._procs) == 0 manager.recorder.stop.assert_called_once()
def test_sets_stop_flag(self, manager):
manager.stop_all()
assert "stop" in manager._stop_flags
class TestFrameIndex:
def test_next_frame_number_empty(self, manager):
manager.setup_dirs()
assert manager._next_frame_number() == 1
def test_next_frame_number_with_existing(self, manager):
manager.setup_dirs()
index = [{"id": "F0001"}, {"id": "F0002"}]
(manager.frames_dir / "index.json").write_text(json.dumps(index))
assert manager._next_frame_number() == 3
def test_append_frame_index(self, manager):
manager.setup_dirs()
entry = {"id": "F0001", "timestamp": 5.0, "path": "/tmp/F0001.jpg", "sent_to_agent": False}
manager._append_frame_index(entry)
index = json.loads((manager.frames_dir / "index.json").read_text())
assert len(index) == 1
assert index[0]["id"] == "F0001"
def test_append_frame_index_accumulates(self, manager):
manager.setup_dirs()
for i in range(3):
entry = {"id": f"F{i+1:04d}", "timestamp": float(i), "path": f"/tmp/F{i+1:04d}.jpg", "sent_to_agent": False}
manager._append_frame_index(entry)
index = json.loads((manager.frames_dir / "index.json").read_text())
assert len(index) == 3
class TestSceneDetector: class TestSceneDetector:
def test_start_scene_detector_stores_callback(self, manager): def test_python_path_sets_processor_callback(self, manager):
"""Python path (recorder present): on_new_frames goes to processor, not recorder."""
cb = MagicMock() cb = MagicMock()
manager.start_scene_detector(on_new_frames=cb) manager.start_scene_detector(on_new_frames=cb)
assert manager._on_new_frames is cb assert manager.processor._on_new_frames is cb
def test_update_scene_threshold(self, manager): def test_python_path_does_not_start_processor_scene_detector(self, manager):
manager.setup_dirs() """Python path: recorder owns scene detection — processor.start_scene_detector not called."""
# Mock restart_recorder to avoid launching ffmpeg manager.processor.start_scene_detector = MagicMock()
manager.restart_recorder = MagicMock() manager.start_scene_detector(on_new_frames=MagicMock())
manager.processor.start_scene_detector.assert_not_called()
def test_rust_path_sets_callback_and_starts_detector(self, tmp_path):
"""Rust path (no recorder): processor owns scene detection."""
with patch("cht.stream.manager.SESSIONS_DIR", tmp_path):
mgr = StreamManager.__new__(StreamManager)
mgr.recorder = None
mgr.processor = MagicMock()
from cht.stream.manager import SCENE_THRESHOLD
cb = MagicMock()
mgr.start_scene_detector(on_new_frames=cb)
mgr.processor.set_on_new_frames.assert_called_once_with(cb)
mgr.processor.start_scene_detector.assert_called_once_with(threshold=SCENE_THRESHOLD)
def test_update_scene_threshold_restarts_recorder(self, manager):
manager.recorder.update_scene_threshold = MagicMock()
manager.update_scene_threshold(0.25) manager.update_scene_threshold(0.25)
assert manager.scene_threshold == 0.25 manager.recorder.update_scene_threshold.assert_called_once_with(0.25)
manager.restart_recorder.assert_called_once()
class TestFromRustSession:
def test_attaches_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert mgr.recorder is None
assert mgr.session_id == "20260410_120000"
def test_relay_url_fallback_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert "4445" in mgr.relay_url
def test_recorder_alive_returns_true_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert mgr.recorder_alive() is True

231
tests/test_processor.py Normal file
View File

@@ -0,0 +1,231 @@
"""Tests for cht.stream.processor — SessionProcessor.
P0: regression tests for known bugs (flush dedup, index contract, audio callback crash)
P2: scene detection pipeline unit tests
"""
import json
import time
from pathlib import Path
from threading import Event
from unittest.mock import MagicMock, patch
import pytest
from cht.stream.processor import SessionProcessor
@pytest.fixture
def processor(tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.frames_dir.mkdir(parents=True, exist_ok=True)
proc.audio_dir.mkdir(parents=True, exist_ok=True)
proc.attach(
get_recording_path=lambda: None,
get_current_global_offset=lambda: 0.0,
)
yield proc
proc.stop()
# -- P2: on_raw_frame / index contract --
class TestOnRawFrame:
def test_writes_jpeg_to_frames_dir(self, processor, tmp_path):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=5.0)
jpgs = list(processor.frames_dir.glob("*.jpg"))
assert len(jpgs) == 1
def test_index_entry_has_required_fields(self, processor):
"""P2: index.json must match {id, timestamp, path, sent_to_agent} contract."""
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=12.5)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert len(index) == 1
entry = index[0]
assert "id" in entry
assert "timestamp" in entry
assert "path" in entry
assert "sent_to_agent" in entry
assert entry["sent_to_agent"] is False
assert entry["timestamp"] == 12.5
def test_id_format_is_F_zero_padded(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=1.0)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert index[0]["id"] == "F0001"
def test_sequential_ids(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
for i in range(3):
processor.on_raw_frame(jpeg, global_ts=float(i))
index = json.loads((processor.frames_dir / "index.json").read_text())
assert [e["id"] for e in index] == ["F0001", "F0002", "F0003"]
def test_fires_on_new_frames_callback(self, processor):
cb = MagicMock()
processor.set_on_new_frames(cb)
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=3.0)
cb.assert_called_once()
frames = cb.call_args[0][0]
assert len(frames) == 1
assert frames[0]["timestamp"] == 3.0
def test_path_in_index_is_absolute(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=1.0)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert Path(index[0]["path"]).is_absolute()
# -- P0: audio callback crash protection --
class TestAudioCallbackCrash:
def test_crashing_callback_does_not_kill_audio_thread(self, processor, tmp_path):
"""P0 regression: uncaught exception in on_new_audio must not kill the extractor thread."""
crash_count = [0]
call_count = [0]
def bad_callback(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
crash_count[0] += 1
raise RuntimeError("simulated callback crash")
processor._on_new_audio = bad_callback
# Write a fake WAV that passes the size check
wav = tmp_path / "chunk_0000.wav"
wav.write_bytes(b"\x00" * 200)
# Simulate what _audio_loop does after extracting a chunk
if processor._on_new_audio:
try:
processor._on_new_audio(wav, 0.0, 3.0, segment_path=wav, local_start=0.0)
except Exception as e:
pass # This is the OLD behavior — thread would die here
# With the fix, the exception is caught inside _audio_loop so thread stays alive.
# We test it by calling the protected path directly:
wav2 = tmp_path / "chunk_0001.wav"
wav2.write_bytes(b"\x00" * 200)
alive_after = [True]
def run_protected():
if processor._on_new_audio:
try:
processor._on_new_audio(wav2, 3.0, 3.0, segment_path=wav2, local_start=3.0)
except Exception:
alive_after[0] = False
run_protected()
# Callback was called twice — thread survived the first crash
assert call_count[0] == 2
assert alive_after[0] is True
# -- P0: wall-clock offset --
class TestWallClockOffset:
def test_offset_from_session_dir_name(self, tmp_path):
"""P0: wall-clock offset from session dir name must be close to actual elapsed time."""
from datetime import datetime
# Create a session dir named with "now"
now = datetime.now()
session_name = now.strftime("%Y%m%d_%H%M%S")
session_dir = tmp_path / session_name
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
offset = proc._wall_clock_offset()
# Should be within 2 seconds of 0 (just created)
assert 0.0 <= offset < 2.0
def test_offset_increases_with_time(self, tmp_path):
"""P0: offset must grow, not stay zero."""
from datetime import datetime, timedelta
# Simulate a session started 10 seconds ago
past = datetime.now() - timedelta(seconds=10)
session_name = past.strftime("%Y%m%d_%H%M%S")
session_dir = tmp_path / session_name
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
offset = proc._wall_clock_offset()
assert offset >= 9.0 # at least 9s (allow 1s tolerance)
def test_offset_falls_back_gracefully_on_bad_name(self, tmp_path):
"""P0 fragility: bad session dir name must not crash."""
session_dir = tmp_path / "not_a_timestamp"
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
# Should not raise
offset = proc._wall_clock_offset()
assert offset >= 0.0
# -- P0: flush frame deduplication --
class TestFlushFrameDeduplication:
def test_frames_within_100ms_are_skipped(self, processor):
"""P0 regression: flush frames within flush_window of scene frame must be dropped."""
received = []
processor.set_on_new_frames(lambda frames: received.extend(frames))
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
# Simulate the recorder's _read_stdout dedup logic:
# pts_time - last_pts < 0.1 → skip
last_pts = -1.0
threshold = 0.1
def emit_frame(pts_time):
nonlocal last_pts
if pts_time - last_pts < threshold:
return # flush frame, skip
last_pts = pts_time
processor.on_raw_frame(jpeg, global_ts=pts_time)
emit_frame(5.0) # scene frame — accepted
emit_frame(5.03) # flush frame 1 — < 100ms, skipped
emit_frame(5.06) # flush frame 2 — < 100ms, skipped
emit_frame(8.0) # next scene frame — accepted
assert len(received) == 2
assert received[0]["timestamp"] == 5.0
assert received[1]["timestamp"] == 8.0
def test_frames_beyond_100ms_are_accepted(self, processor):
"""Frames separated by > 100ms are distinct scenes, not flush frames."""
received = []
processor.set_on_new_frames(lambda frames: received.extend(frames))
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
last_pts = -1.0
threshold = 0.1
def emit_frame(pts_time):
nonlocal last_pts
if pts_time - last_pts < threshold:
return
last_pts = pts_time
processor.on_raw_frame(jpeg, global_ts=pts_time)
emit_frame(5.0)
emit_frame(5.15) # > 100ms — separate scene, accepted
emit_frame(5.30)
assert len(received) == 3

View File

@@ -10,17 +10,19 @@ from cht.stream.tracker import RecordingTracker
class TestRecordingTracker: class TestRecordingTracker:
def test_initial_duration_is_zero(self, tmp_path): def test_initial_duration_is_zero(self):
tracker = RecordingTracker(tmp_path / "rec.ts") tracker = RecordingTracker(get_segments=lambda: [])
assert tracker.duration == 0.0 assert tracker.duration == 0.0
def test_callback_called_on_update(self, tmp_path): def test_callback_called_on_update(self, tmp_path):
rec = tmp_path / "rec.ts" seg = tmp_path / "rec.mp4"
rec.write_bytes(b"\x00" * 100_000) seg.write_bytes(b"\x00" * 100_000)
cb = MagicMock() cb = MagicMock()
tracker = RecordingTracker(rec, on_duration_update=cb) tracker = RecordingTracker(
get_segments=lambda: [seg],
on_duration_update=cb,
)
with patch.object(tracker, "_probe_duration", return_value=10.0): with patch.object(tracker, "_probe_duration", return_value=10.0):
tracker.start() tracker.start()
time.sleep(3) time.sleep(3)
@@ -29,10 +31,44 @@ class TestRecordingTracker:
cb.assert_called() cb.assert_called()
assert cb.call_args[0][0] > 0 assert cb.call_args[0][0] > 0
def test_no_callback_if_file_missing(self, tmp_path): def test_no_callback_if_no_segments(self):
cb = MagicMock() cb = MagicMock()
tracker = RecordingTracker(tmp_path / "nonexistent.ts", on_duration_update=cb) tracker = RecordingTracker(get_segments=lambda: [], on_duration_update=cb)
tracker.start() tracker.start()
time.sleep(3) time.sleep(3)
tracker.stop() tracker.stop()
cb.assert_not_called() cb.assert_not_called()
def test_no_callback_if_file_missing(self, tmp_path):
cb = MagicMock()
tracker = RecordingTracker(
get_segments=lambda: [tmp_path / "nonexistent.mp4"],
on_duration_update=cb,
)
tracker.start()
time.sleep(3)
tracker.stop()
cb.assert_not_called()
def test_duration_only_increases(self, tmp_path):
seg = tmp_path / "rec.mp4"
seg.write_bytes(b"\x00" * 100_000)
durations = []
def on_update(d):
durations.append(d)
probe_values = iter([5.0, 3.0, 7.0]) # 3.0 is a regression — should be ignored
tracker = RecordingTracker(
get_segments=lambda: [seg],
on_duration_update=on_update,
)
with patch.object(tracker, "_probe_duration", side_effect=probe_values):
tracker.start()
time.sleep(7)
tracker.stop()
# Duration should never go backwards
for i in range(1, len(durations)):
assert durations[i] >= durations[i - 1], "Duration regressed"

349
uv.lock generated
View File

@@ -2,6 +2,15 @@ version = 1
revision = 3 revision = 3
requires-python = ">=3.13" requires-python = ">=3.13"
[[package]]
name = "annotated-doc"
version = "0.0.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
]
[[package]] [[package]]
name = "annotated-types" name = "annotated-types"
version = "0.7.0" version = "0.7.0"
@@ -32,6 +41,30 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
] ]
[[package]]
name = "av"
version = "17.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b2/eb/abca886df3a091bc406feb5ff71b4c4f426beaae6b71b9697264ce8c7211/av-17.0.0.tar.gz", hash = "sha256:c53685df73775a8763c375c7b2d62a6cb149d992a26a4b098204da42ade8c3df", size = 4410769, upload-time = "2026-03-14T14:38:45.868Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b1/fb/55e3b5b5d1fc61466292f26fbcbabafa2642f378dc48875f8f554591e1a4/av-17.0.0-cp311-abi3-macosx_11_0_x86_64.whl", hash = "sha256:ed4013fac77c309a4a68141dcf6148f1821bb1073a36d4289379762a6372f711", size = 23238424, upload-time = "2026-03-14T14:38:05.856Z" },
{ url = "https://files.pythonhosted.org/packages/52/03/9ace1acc08bc9ae38c14bf3a4b1360e995e4d999d1d33c2cbd7c9e77582a/av-17.0.0-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:e44b6c83e9f3be9f79ee87d0b77a27cea9a9cd67bd630362c86b7e56a748dfbb", size = 18709043, upload-time = "2026-03-14T14:38:08.288Z" },
{ url = "https://files.pythonhosted.org/packages/00/c0/637721f3cd5bb8bd16105a1a08efd781fc12f449931bdb3a4d0cfd63fa55/av-17.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b440da6ac47da0629d509316f24bcd858f33158dbdd0f1b7293d71e99beb26de", size = 34018780, upload-time = "2026-03-14T14:38:10.45Z" },
{ url = "https://files.pythonhosted.org/packages/d2/59/d19bc3257dd985d55337d7f0414c019414b97e16cd3690ebf9941a847543/av-17.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1060cba85f97f4a337311169d92c0b5e143452cfa5ca0e65fa499d7955e8592e", size = 36358757, upload-time = "2026-03-14T14:38:13.092Z" },
{ url = "https://files.pythonhosted.org/packages/52/6c/a1f4f2677bae6f2ade7a8a18e90ebdcf70690c9b1c4e40e118aa30fa313f/av-17.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:deda202e6021cfc7ba3e816897760ec5431309d59a4da1f75df3c0e9413d71e7", size = 35195281, upload-time = "2026-03-14T14:38:15.789Z" },
{ url = "https://files.pythonhosted.org/packages/90/ea/52b0fc6f69432c7bf3f5fbe6f707113650aa40a1a05b9096ffc2bba4f77d/av-17.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ffaf266a1a9c2148072de0a4b5ae98061465178d2cfaa69ee089761149342974", size = 37444817, upload-time = "2026-03-14T14:38:18.563Z" },
{ url = "https://files.pythonhosted.org/packages/34/ad/d2172966282cb8f146c13b6be7416efefde74186460c5e1708ddfc13dba6/av-17.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:45a35a40b2875bf2f98de7c952d74d960f92f319734e6d28e03b4c62a49e6f49", size = 28888553, upload-time = "2026-03-14T14:38:21.223Z" },
{ url = "https://files.pythonhosted.org/packages/b0/bb/c5a4c4172c514d631fb506e6366b503576b8c7f29809cf42aca73e28ff01/av-17.0.0-cp311-abi3-win_arm64.whl", hash = "sha256:3d32e9b5c5bbcb872a0b6917b352a1db8a42142237826c9b49a36d5dbd9e9c26", size = 21916910, upload-time = "2026-03-14T14:38:23.706Z" },
{ url = "https://files.pythonhosted.org/packages/7f/8e/c40ac08e63f79387c59f6ecc38f47d4c942b549130eee579ec1a91f6a291/av-17.0.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:d13250fb4b4522e9a6bec32da082556d5f257110ea223758151375748d9bbe25", size = 23483029, upload-time = "2026-03-14T14:38:25.758Z" },
{ url = "https://files.pythonhosted.org/packages/a9/fb/b4419494bfc249163ec393c613966d66db7e95c76da3345711cd115a79df/av-17.0.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:dbb56aa3b7ae72451d1bf6e9d37c7d83d39b97af712f73583ff419fbf08fc237", size = 18920446, upload-time = "2026-03-14T14:38:27.905Z" },
{ url = "https://files.pythonhosted.org/packages/30/62/c2306d91602ddad2c56106f21dcb334fd51d5ea2e952f7fa025bb8aa39fc/av-17.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a213ac9e83b7ab12c2e9f277a09cac8e9d85cf0883efdab7a87a60e2e4e48879", size = 37477266, upload-time = "2026-03-14T14:38:30.404Z" },
{ url = "https://files.pythonhosted.org/packages/28/cd/c8510a9607886785c0b3ca019d503e888c3757529be42a7287fe2bfa92d5/av-17.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e15c88bb0921f9435bcc5a27a0863dba571a80ad5e1389c4fcf2073833bb4a74", size = 39572988, upload-time = "2026-03-14T14:38:32.984Z" },
{ url = "https://files.pythonhosted.org/packages/7d/2d/207d9361e25b5abec9be335bbab4df6b6b838e2214be4b374f4cfb285427/av-17.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:096cfd1e9fc896506726c7c42aaf9b370e78c2f257cde4d6ddb6c889bfcc49ec", size = 38399591, upload-time = "2026-03-14T14:38:35.465Z" },
{ url = "https://files.pythonhosted.org/packages/73/ca/307740c6aa2980966bf11383ffcb04bacc5b13f3d268ab4cfb274ad6f793/av-17.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3649ab3d2c7f58049ded1a36e100c0d8fd529cf258f41dd88678ba824034d8c9", size = 40590681, upload-time = "2026-03-14T14:38:38.269Z" },
{ url = "https://files.pythonhosted.org/packages/35/f2/6fdb26d0651adf409864cb2a0d60da107e467d3d1aabc94b234ead54324a/av-17.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e5002271ab2135b551d980c2db8f3299d452e3b9d3633f24f6bb57fffe91cd10", size = 29216337, upload-time = "2026-03-14T14:38:40.83Z" },
{ url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" },
]
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2026.2.25" version = "2026.2.25"
@@ -92,6 +125,7 @@ version = "0.1.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "claude-agent-sdk" }, { name = "claude-agent-sdk" },
{ name = "faster-whisper" },
{ name = "ffmpeg-python" }, { name = "ffmpeg-python" },
{ name = "numpy" }, { name = "numpy" },
{ name = "openai" }, { name = "openai" },
@@ -108,6 +142,7 @@ dev = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "claude-agent-sdk" }, { name = "claude-agent-sdk" },
{ name = "faster-whisper" },
{ name = "ffmpeg-python" }, { name = "ffmpeg-python" },
{ name = "numpy" }, { name = "numpy" },
{ name = "openai" }, { name = "openai" },
@@ -209,6 +244,33 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1b/82/ca4893968aeb2709aacfb57a30dec6fa2ab25b10fa9f064b8882ce33f599/cryptography-46.0.6-cp38-abi3-win_amd64.whl", hash = "sha256:79e865c642cfc5c0b3eb12af83c35c5aeff4fa5c672dc28c43721c2c9fdd2f0f", size = 3471160, upload-time = "2026-03-25T23:34:37.191Z" }, { url = "https://files.pythonhosted.org/packages/1b/82/ca4893968aeb2709aacfb57a30dec6fa2ab25b10fa9f064b8882ce33f599/cryptography-46.0.6-cp38-abi3-win_amd64.whl", hash = "sha256:79e865c642cfc5c0b3eb12af83c35c5aeff4fa5c672dc28c43721c2c9fdd2f0f", size = 3471160, upload-time = "2026-03-25T23:34:37.191Z" },
] ]
[[package]]
name = "ctranslate2"
version = "4.7.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "pyyaml" },
{ name = "setuptools" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/34/6d/eb49ba05db286b4ea9d5d3fcf5f5cd0a9a5e218d46349618d5041001e303/ctranslate2-4.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6b2abf2929756e3ec6246057b56df379995661560a2d776af05f9d97f63afcf5", size = 1256960, upload-time = "2026-02-04T06:11:47.487Z" },
{ url = "https://files.pythonhosted.org/packages/45/5a/b9cce7b00d89fc6fdeaf27587aa52d0597b465058563e93ff50910553bdd/ctranslate2-4.7.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:857ef3959d6b1c40dc227c715a36db33db2d097164996d6c75b6db8e30828f52", size = 11918645, upload-time = "2026-02-04T06:11:49.599Z" },
{ url = "https://files.pythonhosted.org/packages/ea/03/c0db0a5276599fb44ceafa2f2cb1afd5628808ec406fe036060a39693680/ctranslate2-4.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:393a9e7e989034660526a2c0e8bb65d1924f43d9a5c77d336494a353d16ba2a4", size = 16860452, upload-time = "2026-02-04T06:11:52.276Z" },
{ url = "https://files.pythonhosted.org/packages/0b/03/4e3728ce29d192ee75ed9a2d8589bf4f19edafe5bed3845187de51b179a3/ctranslate2-4.7.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a3d0682f2b9082e31c73d75b45f16cde77355ab76d7e8356a24c3cb2480a6d3", size = 38995174, upload-time = "2026-02-04T06:11:55.477Z" },
{ url = "https://files.pythonhosted.org/packages/9b/15/6e8e87c6a201d69803a79ac2e29623ce7c2cc9cd1df9db99810cca714373/ctranslate2-4.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:baa6d2b10f57933d8c11791e8522659217918722d07bbef2389a443801125fe7", size = 18844953, upload-time = "2026-02-04T06:11:58.519Z" },
{ url = "https://files.pythonhosted.org/packages/fd/73/8a6b7ba18cad0c8667ee221ddab8c361cb70926440e5b8dd0e81924c28ac/ctranslate2-4.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d5dfb076566551f4959dfd0706f94c923c1931def9b7bb249a2caa6ab23353a0", size = 1257560, upload-time = "2026-02-04T06:12:00.926Z" },
{ url = "https://files.pythonhosted.org/packages/70/c2/8817ca5d6c1b175b23a12f7c8b91484652f8718a76353317e5919b038733/ctranslate2-4.7.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:eecdb4ed934b384f16e8c01b185b082d6b5ffc7dcbb0b6a6eb48cd465282d957", size = 11918995, upload-time = "2026-02-04T06:12:02.875Z" },
{ url = "https://files.pythonhosted.org/packages/ac/33/b8eb3acc67bbca4d9872fc9ff94db78e6167a7ba5cd932f585d1560effc7/ctranslate2-4.7.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1aa6796edcc3c8d163c9e39c429d50076d266d68980fed9d1b2443f617c67e9e", size = 16844162, upload-time = "2026-02-04T06:12:05.099Z" },
{ url = "https://files.pythonhosted.org/packages/80/11/6474893b07121057035069a0a483fe1cd8c47878213f282afb4c0c6fc275/ctranslate2-4.7.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24c0482c51726430fb83724451921c0e539d769c8618dcfd46b1645e7f75960d", size = 38966728, upload-time = "2026-02-04T06:12:07.923Z" },
{ url = "https://files.pythonhosted.org/packages/94/88/8fc7ff435c5e783e5fad9586d839d463e023988dbbbad949d442092d01f1/ctranslate2-4.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:76db234c0446a23d20dd8eeaa7a789cc87d1d05283f48bf3152bae9fa0a69844", size = 19100788, upload-time = "2026-02-04T06:12:10.592Z" },
{ url = "https://files.pythonhosted.org/packages/d9/b3/f100013a76a98d64e67c721bd4559ea4eeb54be3e4ac45f4d801769899af/ctranslate2-4.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:058c9db2277dc8b19ecc86c7937628f69022f341844b9081d2ab642965d88fc6", size = 1280179, upload-time = "2026-02-04T06:12:12.596Z" },
{ url = "https://files.pythonhosted.org/packages/39/22/b77f748015667a5e2ca54a5ee080d7016fce34314f0e8cf904784549305a/ctranslate2-4.7.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:5abcf885062c7f28a3f9a46be8d185795e8706ac6230ad086cae0bc82917df31", size = 11940166, upload-time = "2026-02-04T06:12:14.054Z" },
{ url = "https://files.pythonhosted.org/packages/7d/78/6d7fd52f646c6ba3343f71277a9bbef33734632949d1651231948b0f0359/ctranslate2-4.7.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9950acb04a002d5c60ae90a1ddceead1a803af1f00cadd9b1a1dc76e1f017481", size = 16849483, upload-time = "2026-02-04T06:12:17.082Z" },
{ url = "https://files.pythonhosted.org/packages/40/27/58769ff15ac31b44205bd7a8aeca80cf7357c657ea5df1b94ce0f5c83771/ctranslate2-4.7.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1dcc734e92e3f1ceeaa0c42bbfd009352857be179ecd4a7ed6cccc086a202f58", size = 38949393, upload-time = "2026-02-04T06:12:21.302Z" },
{ url = "https://files.pythonhosted.org/packages/0e/5c/9fa0ad6462b62efd0fb5ac1100eee47bc96ecc198ff4e237c731e5473616/ctranslate2-4.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:dfb7657bdb7b8211c8f9ecb6f3b70bc0db0e0384d01a8b1808cb66fe7199df59", size = 19123451, upload-time = "2026-02-04T06:12:24.115Z" },
]
[[package]] [[package]]
name = "distro" name = "distro"
version = "1.9.0" version = "1.9.0"
@@ -218,6 +280,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
] ]
[[package]]
name = "faster-whisper"
version = "1.2.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "av" },
{ name = "ctranslate2" },
{ name = "huggingface-hub" },
{ name = "onnxruntime" },
{ name = "tokenizers" },
{ name = "tqdm" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/05/99/49ee85903dee060d9f08297b4a342e5e0bcfca2f027a07b4ee0a38ab13f9/faster_whisper-1.2.1-py3-none-any.whl", hash = "sha256:79a66ad50688c0b794dd501dc340a736992a6342f7f95e5811be60b5224a26a7", size = 1118909, upload-time = "2025-10-31T11:35:47.794Z" },
]
[[package]] [[package]]
name = "ffmpeg-python" name = "ffmpeg-python"
version = "0.2.0" version = "0.2.0"
@@ -230,6 +308,32 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024, upload-time = "2019-07-06T00:19:07.215Z" }, { url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024, upload-time = "2019-07-06T00:19:07.215Z" },
] ]
[[package]]
name = "filelock"
version = "3.25.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" },
]
[[package]]
name = "flatbuffers"
version = "25.12.19"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" },
]
[[package]]
name = "fsspec"
version = "2026.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" },
]
[[package]] [[package]]
name = "future" name = "future"
version = "1.0.0" version = "1.0.0"
@@ -248,6 +352,38 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
] ]
[[package]]
name = "hf-xet"
version = "1.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" },
{ url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" },
{ url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" },
{ url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" },
{ url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" },
{ url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" },
{ url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" },
{ url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" },
{ url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" },
{ url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" },
{ url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" },
{ url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" },
{ url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" },
{ url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" },
{ url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" },
{ url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" },
{ url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" },
{ url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" },
{ url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" },
{ url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" },
{ url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" },
{ url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" },
{ url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" },
{ url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" },
]
[[package]] [[package]]
name = "httpcore" name = "httpcore"
version = "1.0.9" version = "1.0.9"
@@ -285,6 +421,26 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
] ]
[[package]]
name = "huggingface-hub"
version = "1.10.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock" },
{ name = "fsspec" },
{ name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
{ name = "httpx" },
{ name = "packaging" },
{ name = "pyyaml" },
{ name = "tqdm" },
{ name = "typer" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e4/28/baf5d745559503ce8d28cf5bc9551f5ac59158eafd7b6a6afff0bcdb0f50/huggingface_hub-1.10.1.tar.gz", hash = "sha256:696c53cf9c2ac9befbfb5dd41d05392a031c69fc6930d1ed9671debd405b6fff", size = 758094, upload-time = "2026-04-09T15:01:18.928Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/83/8c/c7a33f3efaa8d6a5bc40e012e5ecc2d72c2e6124550ca9085fe0ceed9993/huggingface_hub-1.10.1-py3-none-any.whl", hash = "sha256:6b981107a62fbe68c74374418983399c632e35786dcd14642a9f2972633c8b5a", size = 642630, upload-time = "2026-04-09T15:01:17.35Z" },
]
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.11" version = "3.11"
@@ -381,6 +537,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
] ]
[[package]]
name = "markdown-it-py"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
]
[[package]] [[package]]
name = "mcp" name = "mcp"
version = "1.27.0" version = "1.27.0"
@@ -406,6 +574,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" }, { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" },
] ]
[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]]
name = "mpmath"
version = "1.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
]
[[package]] [[package]]
name = "numpy" name = "numpy"
version = "2.4.4" version = "2.4.4"
@@ -456,6 +642,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" },
] ]
[[package]]
name = "onnxruntime"
version = "1.24.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "flatbuffers" },
{ name = "numpy" },
{ name = "packaging" },
{ name = "protobuf" },
{ name = "sympy" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/f0/8a21ec0a97e40abb7d8da1e8b20fb9e1af509cc6d191f6faa75f73622fb2/onnxruntime-1.24.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e99a48078baaefa2b50fe5836c319499f71f13f76ed32d0211f39109147a49e0", size = 17341922, upload-time = "2026-03-17T22:03:56.364Z" },
{ url = "https://files.pythonhosted.org/packages/8b/25/d7908de8e08cee9abfa15b8aa82349b79733ae5865162a3609c11598805d/onnxruntime-1.24.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4aaed1e5e1aaacf2343c838a30a7c3ade78f13eeb16817411f929d04040a13", size = 15172290, upload-time = "2026-03-17T22:03:37.124Z" },
{ url = "https://files.pythonhosted.org/packages/7f/72/105ec27a78c5aa0154a7c0cd8c41c19a97799c3b12fc30392928997e3be3/onnxruntime-1.24.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e30c972bc02e072911aabb6891453ec73795386c0af2b761b65444b8a4c4745f", size = 17244738, upload-time = "2026-03-17T22:04:40.625Z" },
{ url = "https://files.pythonhosted.org/packages/05/fb/a592736d968c2f58e12de4d52088dda8e0e724b26ad5c0487263adb45875/onnxruntime-1.24.4-cp313-cp313-win_amd64.whl", hash = "sha256:3b6ba8b0181a3aa88edab00eb01424ffc06f42e71095a91186c2249415fcff93", size = 12597435, upload-time = "2026-03-17T22:05:43.826Z" },
{ url = "https://files.pythonhosted.org/packages/ad/04/ae2479e9841b64bd2eb44f8a64756c62593f896514369a11243b1b86ca5c/onnxruntime-1.24.4-cp313-cp313-win_arm64.whl", hash = "sha256:71d6a5c1821d6e8586a024000ece458db8f2fc0ecd050435d45794827ce81e19", size = 12269852, upload-time = "2026-03-17T22:05:33.353Z" },
{ url = "https://files.pythonhosted.org/packages/b4/af/a479a536c4398ffaf49fbbe755f45d5b8726bdb4335ab31b537f3d7149b8/onnxruntime-1.24.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1700f559c8086d06b2a4d5de51e62cb4ff5e2631822f71a36db8c72383db71ee", size = 15176861, upload-time = "2026-03-17T22:03:40.143Z" },
{ url = "https://files.pythonhosted.org/packages/be/13/19f5da70c346a76037da2c2851ecbf1266e61d7f0dcdb887c667210d4608/onnxruntime-1.24.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c74e268dc808e61e63784d43f9ddcdaf50a776c2819e8bd1d1b11ef64bf7e36", size = 17247454, upload-time = "2026-03-17T22:04:46.643Z" },
{ url = "https://files.pythonhosted.org/packages/89/db/b30dbbd6037847b205ab75d962bc349bf1e46d02a65b30d7047a6893ffd6/onnxruntime-1.24.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:fbff2a248940e3398ae78374c5a839e49a2f39079b488bc64439fa0ec327a3e4", size = 17343300, upload-time = "2026-03-17T22:03:59.223Z" },
{ url = "https://files.pythonhosted.org/packages/61/88/1746c0e7959961475b84c776d35601a21d445f463c93b1433a409ec3e188/onnxruntime-1.24.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2b7969e72d8cb53ffc88ab6d49dd5e75c1c663bda7be7eb0ece192f127343d1", size = 15175936, upload-time = "2026-03-17T22:03:43.671Z" },
{ url = "https://files.pythonhosted.org/packages/5f/ba/4699cde04a52cece66cbebc85bd8335a0d3b9ad485abc9a2e15946a1349d/onnxruntime-1.24.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14ed1f197fab812b695a5eaddb536c635e58a2fbbe50a517c78f082cc6ce9177", size = 17246432, upload-time = "2026-03-17T22:04:49.58Z" },
{ url = "https://files.pythonhosted.org/packages/ef/60/4590910841bb28bd3b4b388a9efbedf4e2d2cca99ddf0c863642b4e87814/onnxruntime-1.24.4-cp314-cp314-win_amd64.whl", hash = "sha256:311e309f573bf3c12aa5723e23823077f83d5e412a18499d4485c7eb41040858", size = 12903276, upload-time = "2026-03-17T22:05:46.349Z" },
{ url = "https://files.pythonhosted.org/packages/7f/6f/60e2c0acea1e1ac09b3e794b5a19c166eebf91c0b860b3e6db8e74983fda/onnxruntime-1.24.4-cp314-cp314-win_arm64.whl", hash = "sha256:3f0b910e86b759a4732663ec61fd57ac42ee1b0066f68299de164220b660546d", size = 12594365, upload-time = "2026-03-17T22:05:35.795Z" },
{ url = "https://files.pythonhosted.org/packages/cf/68/0c05d10f8f6c40fe0912ebec0d5a33884aaa2af2053507e864dab0883208/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa12ddc54c9c4594073abcaa265cd9681e95fb89dae982a6f508a794ca42e661", size = 15176889, upload-time = "2026-03-17T22:03:48.021Z" },
{ url = "https://files.pythonhosted.org/packages/6c/1d/1666dc64e78d8587d168fec4e3b7922b92eb286a2ddeebcf6acb55c7dc82/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1cc6a518255f012134bc791975a6294806be9a3b20c4a54cca25194c90cf731", size = 17247021, upload-time = "2026-03-17T22:04:52.377Z" },
]
[[package]] [[package]]
name = "openai" name = "openai"
version = "2.30.0" version = "2.30.0"
@@ -551,6 +765,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
] ]
[[package]]
name = "protobuf"
version = "7.34.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" },
{ url = "https://files.pythonhosted.org/packages/eb/9d/aa69df2724ff63efa6f72307b483ce0827f4347cc6d6df24b59e26659fef/protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b", size = 325753, upload-time = "2026-03-20T17:34:38.751Z" },
{ url = "https://files.pythonhosted.org/packages/92/e8/d174c91fd48e50101943f042b09af9029064810b734e4160bbe282fa1caa/protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a", size = 340198, upload-time = "2026-03-20T17:34:39.871Z" },
{ url = "https://files.pythonhosted.org/packages/53/1b/3b431694a4dc6d37b9f653f0c64b0a0d9ec074ee810710c0c3da21d67ba7/protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4", size = 324267, upload-time = "2026-03-20T17:34:41.1Z" },
{ url = "https://files.pythonhosted.org/packages/85/29/64de04a0ac142fb685fd09999bc3d337943fb386f3a0ec57f92fd8203f97/protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a", size = 426628, upload-time = "2026-03-20T17:34:42.536Z" },
{ url = "https://files.pythonhosted.org/packages/4d/87/cb5e585192a22b8bd457df5a2c16a75ea0db9674c3a0a39fc9347d84e075/protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c", size = 437901, upload-time = "2026-03-20T17:34:44.112Z" },
{ url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" },
]
[[package]] [[package]]
name = "pycparser" name = "pycparser"
version = "3.0" version = "3.0"
@@ -721,6 +950,42 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
] ]
[[package]]
name = "pyyaml"
version = "6.0.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
{ url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
{ url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
{ url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
{ url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
{ url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
{ url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
{ url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
{ url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
{ url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
{ url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
{ url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
{ url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
{ url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
{ url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
{ url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
{ url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
{ url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
{ url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
{ url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
{ url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
{ url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
{ url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
{ url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
{ url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
{ url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
{ url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
{ url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
]
[[package]] [[package]]
name = "referencing" name = "referencing"
version = "0.37.0" version = "0.37.0"
@@ -734,6 +999,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
] ]
[[package]]
name = "rich"
version = "14.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markdown-it-py" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
]
[[package]] [[package]]
name = "rpds-py" name = "rpds-py"
version = "0.30.0" version = "0.30.0"
@@ -800,6 +1078,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" },
] ]
[[package]]
name = "setuptools"
version = "82.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" },
]
[[package]]
name = "shellingham"
version = "1.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
]
[[package]] [[package]]
name = "sniffio" name = "sniffio"
version = "1.3.1" version = "1.3.1"
@@ -834,6 +1130,44 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" },
] ]
[[package]]
name = "sympy"
version = "1.14.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mpmath" },
]
sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
]
[[package]]
name = "tokenizers"
version = "0.22.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "huggingface-hub" },
]
sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
{ url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
{ url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
{ url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
{ url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
{ url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
{ url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
{ url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
{ url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
{ url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
{ url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
{ url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
{ url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" },
{ url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" },
{ url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
]
[[package]] [[package]]
name = "tqdm" name = "tqdm"
version = "4.67.3" version = "4.67.3"
@@ -846,6 +1180,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
] ]
[[package]]
name = "typer"
version = "0.24.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "annotated-doc" },
{ name = "click" },
{ name = "rich" },
{ name = "shellingham" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
]
[[package]] [[package]]
name = "typing-extensions" name = "typing-extensions"
version = "4.15.0" version = "4.15.0"