update docs

2026-05-06 11:51:43 -03:00
parent c8bb6c7581
commit 946234eb9e
16 changed files with 1723 additions and 502 deletions
--- a/docs/index.html
+++ b/docs/index.html
@@ -0,0 +1,581 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Mitus — Architecture</title>
+<style>
+  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap');
+
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+
+  body {
+    background: #1e1e2e;
+    color: #cdd6f4;
+    font-family: 'Inter', sans-serif;
+    line-height: 1.6;
+    height: 100vh;
+    overflow: hidden;
+    display: flex;
+    flex-direction: column;
+  }
+
+  header {
+    padding: 16px 24px;
+    border-bottom: 1px solid #313244;
+    display: flex;
+    align-items: baseline;
+    gap: 16px;
+    flex-shrink: 0;
+  }
+
+  header h1 {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 22px;
+    font-weight: 600;
+    letter-spacing: 3px;
+    color: #89b4fa;
+  }
+
+  header .subtitle {
+    font-size: 13px;
+    color: #6c7086;
+    letter-spacing: 1px;
+    text-transform: uppercase;
+  }
+
+  .layout {
+    display: flex;
+    flex: 1;
+    min-height: 0;
+  }
+
+  nav {
+    display: flex;
+    flex-direction: column;
+    width: 220px;
+    flex-shrink: 0;
+    background: #181825;
+    border-right: 1px solid #313244;
+    padding: 8px 0;
+    overflow-y: auto;
+  }
+
+  nav a {
+    padding: 10px 20px;
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 12px;
+    color: #a6adc8;
+    text-decoration: none;
+    border-left: 2px solid transparent;
+    cursor: pointer;
+    transition: all 0.15s;
+  }
+
+  nav a:hover { color: #cdd6f4; background: #313244; }
+  nav a.active { color: #89b4fa; border-left-color: #89b4fa; background: #1e2d3e; }
+
+  nav .group {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 10px;
+    color: #585b70;
+    letter-spacing: 1px;
+    text-transform: uppercase;
+    padding: 16px 20px 6px;
+  }
+
+  main {
+    flex: 1;
+    overflow: auto;
+    padding: 32px 48px;
+  }
+
+  .graph-section {
+    display: none;
+    animation: fadeIn 0.2s ease;
+  }
+  .graph-section.active { display: block; }
+
+  @keyframes fadeIn {
+    from { opacity: 0; }
+    to { opacity: 1; }
+  }
+
+  .graph-section h2 {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 15px;
+    font-weight: 500;
+    color: #a6adc8;
+    margin-bottom: 8px;
+    letter-spacing: 1px;
+  }
+
+  .graph-section p {
+    font-size: 13px;
+    color: #6c7086;
+    margin-bottom: 24px;
+    max-width: 800px;
+  }
+
+  .graph-container {
+    background: #11111b;
+    border: 1px solid #313244;
+    padding: 24px;
+    overflow: auto;
+  }
+
+  .graph-container a { display: block; }
+  .graph-container img { max-width: 100%; height: auto; }
+
+  .legend {
+    display: flex;
+    gap: 24px;
+    margin-top: 16px;
+    font-size: 11px;
+    font-family: 'JetBrains Mono', monospace;
+    color: #6c7086;
+  }
+
+  .legend span::before {
+    content: '';
+    display: inline-block;
+    width: 8px;
+    height: 8px;
+    margin-right: 6px;
+    border-radius: 50%;
+  }
+
+  .legend .python::before { background: #cba6f7; }
+  .legend .rust::before   { background: #89b4fa; }
+  .legend .hw::before     { background: #a6e3a1; }
+  .legend .fs::before     { background: #585b70; }
+
+  /* Repo tree */
+  .tree-container {
+    background: #11111b;
+    border: 1px solid #313244;
+    padding: 24px;
+    overflow: auto;
+  }
+  .repo-tree {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 13px;
+    line-height: 1.7;
+    color: #a6adc8;
+  }
+  .t-root { color: #89b4fa; font-weight: 600; font-size: 15px; }
+  .t-dir  { color: #cdd6f4; font-weight: 500; }
+  .t-rust { color: #89b4fa; font-weight: 500; }
+  .t-py   { color: #cba6f7; font-weight: 500; }
+  .t-comment { color: #6c7086; }
+
+  /* Prose sections */
+  .graph-section h3 {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 13px;
+    font-weight: 500;
+    color: #cdd6f4;
+    letter-spacing: 1px;
+    margin: 32px 0 10px;
+    text-transform: uppercase;
+  }
+  .prose { max-width: 820px; }
+  .prose p {
+    font-size: 14px;
+    color: #a6adc8;
+    margin-bottom: 14px;
+    line-height: 1.7;
+  }
+  .prose p b { color: #cdd6f4; font-weight: 600; }
+  .prose code {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 12px;
+    color: #89b4fa;
+    background: #181825;
+    padding: 1px 5px;
+    border-radius: 3px;
+  }
+  .prose pre {
+    background: #11111b;
+    border: 1px solid #313244;
+    padding: 14px 16px;
+    margin: 8px 0 18px;
+    border-radius: 4px;
+    overflow-x: auto;
+  }
+  .prose pre code {
+    background: transparent;
+    padding: 0;
+    color: #cdd6f4;
+    font-size: 12px;
+  }
+  .prose ul {
+    margin: 8px 0 16px 20px;
+    font-size: 14px;
+    color: #a6adc8;
+    line-height: 1.7;
+  }
+  .prose ul li { margin-bottom: 6px; }
+  .prose .note {
+    border-left: 3px solid #f9e2af;
+    background: #2a2a3e;
+    padding: 10px 14px;
+    margin: 12px 0 18px;
+    font-size: 13px;
+    color: #cdd6f4;
+  }
+  .cmp-table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 13px;
+    margin: 8px 0 20px;
+    border: 1px solid #313244;
+  }
+  .cmp-table th {
+    text-align: left;
+    background: #181825;
+    color: #a6adc8;
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 11px;
+    letter-spacing: 1px;
+    padding: 10px 14px;
+    border-bottom: 1px solid #313244;
+  }
+  .cmp-table td {
+    padding: 10px 14px;
+    color: #a6adc8;
+    border-bottom: 1px solid #313244;
+    vertical-align: top;
+  }
+  .cmp-table tr:last-child td { border-bottom: none; }
+
+  /* Mobile */
+  .menu-toggle {
+    display: none;
+    background: transparent;
+    border: 1px solid #313244;
+    color: #cdd6f4;
+    padding: 6px 10px;
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 14px;
+    cursor: pointer;
+    line-height: 1;
+    margin-left: auto;
+  }
+  .menu-toggle:hover { background: #313244; }
+
+  .nav-backdrop {
+    display: none;
+    position: absolute;
+    inset: 0;
+    background: rgba(0, 0, 0, 0.5);
+    z-index: 10;
+  }
+  .layout.nav-open .nav-backdrop { display: block; }
+
+  @media (max-width: 720px) {
+    header { padding: 10px 12px; gap: 8px; }
+    header h1 { font-size: 16px; letter-spacing: 1px; }
+    header .subtitle { display: none; }
+    .menu-toggle { display: inline-block; }
+
+    .layout { position: relative; }
+    nav {
+      position: absolute;
+      left: 0; top: 0; bottom: 0;
+      width: 220px;
+      z-index: 20;
+      transform: translateX(-100%);
+      transition: transform 0.2s ease;
+      box-shadow: 2px 0 8px rgba(0, 0, 0, 0.5);
+    }
+    .layout.nav-open nav { transform: translateX(0); }
+
+    main { padding: 16px; }
+    .graph-section h2 { font-size: 13px; }
+    .prose p, .prose ul { font-size: 13px; }
+  }
+</style>
+</head>
+<body>
+
+<header>
+  <h1>MITUS</h1>
+  <span class="subtitle">Stream viewer + agent — architecture</span>
+  <button class="menu-toggle" onclick="toggleNav()" aria-label="Toggle navigation">☰</button>
+</header>
+
+<div class="layout">
+<div class="nav-backdrop" onclick="toggleNav()"></div>
+
+<nav>
+  <div class="group">Overview</div>
+  <a class="active" onclick="show('overview')">Goal &amp; walkthrough</a>
+  <a onclick="show('usage')">Usage</a>
+  <a onclick="show('system')">System</a>
+
+  <div class="group">Transports</div>
+  <a onclick="show('python')">Python pipeline</a>
+  <a onclick="show('rust_client')">Rust client</a>
+  <a onclick="show('rust_server')">Rust server</a>
+  <a onclick="show('crates')">Rust crates</a>
+
+  <div class="group">Reference</div>
+  <a onclick="show('repo')">Repository</a>
+  <a onclick="show('notes')">Design notes</a>
+</nav>
+
+<main>
+
+<section id="overview" class="graph-section active">
+  <h2>GOAL &amp; WALKTHROUGH</h2>
+  <p>Mitus records a remote desktop, transcribes its audio, extracts scene-change frames, and exposes both to an LLM agent for ad-hoc Q&amp;A.</p>
+  <div class="prose">
+
+    <h3>What it is</h3>
+    <p>A two-machine setup: the <b>sender</b> (a Wayland desktop) captures screen + audio and ships an encoded stream to the <b>receiver</b>. The receiver records to disk, runs scene detection on the live feed to extract per-event JPEG frames, transcribes the audio, and presents the result in a GTK4 GUI. The GUI doubles as an LLM client: select a frame or transcript span, hit Enter, and an agent (Claude SDK or any OpenAI-compatible endpoint) answers using the selected media as context.</p>
+
+    <h3>Why the split</h3>
+    <p>Capture wants Wayland + a VAAPI-friendly GPU; analysis wants CUDA for both faster-whisper and ffmpeg scene detection. Different machines, different drivers — the network stream is the seam. The receiver also runs the GUI because the recordings are stored locally and the agent talks to large frames as files, not blobs over a wire.</p>
+
+    <h3>Two transport modes</h3>
+    <p>Both modes produce the <b>same on-disk session layout</b> (<code>data/&lt;session_id&gt;/stream/</code>, <code>frames/</code>, <code>audio/</code>, <code>transcript.json</code>) so the GUI doesn't care which path the bytes took. The choice is a CLI flag.</p>
+    <ul>
+      <li><b>Python (default).</b> Sender is a bash watchdog wrapping <code>ffmpeg</code> CLI. Receiver is <code>cht/stream/recorder.py</code>: an <code>ffmpeg</code> listener that writes fragmented MP4 + relays UDP to <code>mpv</code> + emits scene frames out of an <code>showinfo</code> stdout pipe. Simple, all in one process, every restart costs a few seconds.</li>
+      <li><b>Rust (<code>--rust</code>).</b> A standalone Rust workspace under <code>media/</code>: <code>cht-client</code> on the sender, <code>cht-server</code> on the receiver. Wire protocol is a typed <code>WirePacket</code> framing instead of raw mpegts. Scene detection still runs in Python via a Unix-socket relay from the server. Connect time drops from ~20s to ~3s; session reload from disk is 1–2s.</li>
+    </ul>
+    <div class="note">The <code>media/</code> directory holds the Rust transport. While both modes coexist, that name is a misnomer — a future rename is planned. For now, "Rust transport" and "<code>media/</code>" mean the same thing.</div>
+
+    <h3>What the agent sees</h3>
+    <p>Two reference syntaxes resolve to media when sent: <code>@F0001</code>–<code>@F0042</code> for frames, <code>@T0001</code>–<code>@T0010</code> for transcript segments. Single-word verbs <code>describe</code> and <code>answer</code> are sent verbatim — no system prompt, no boilerplate. If you want detail, you type it. The agent runner injects only the referenced frame paths and transcript text alongside the user message.</p>
+
+  </div>
+</section>
+
+<section id="usage" class="graph-section">
+  <h2>USAGE</h2>
+  <p>How to start a session — sender side, receiver side, both transports.</p>
+  <div class="prose">
+
+    <p>Both <code>ctrl/client.sh</code> and <code>ctrl/app.sh</code> take a transport flag — <code>--python</code> (default) or <code>--rust</code>. The <code>ctrl/</code> wrappers are the entrypoints; <code>media/ctrl/*</code> and <code>sender/stream_av.py</code> are implementation details they dispatch to.</p>
+
+    <h3>Receiver (mcrn) — GUI</h3>
+    <p><b>Python transport (default):</b></p>
+<pre><code>./ctrl/app.sh --python</code></pre>
+    <p><b>Rust transport:</b></p>
+<pre><code>./ctrl/server.sh         # cht-server on TCP :4447 (Rust mode only)
+./ctrl/app.sh --rust</code></pre>
+    <p>Python mode does its own TCP listening inside the GUI process — no separate server step.</p>
+
+    <h3>Sender</h3>
+    <p><b>Python transport:</b></p>
+<pre><code>./ctrl/client.sh --python [RECEIVER_IP] [PORT]   # default port 4444</code></pre>
+    <p>(Runs <code>sudo python3 sender/stream_av.py</code> under the hood — <code>sudo</code> is required for <code>kmsgrab</code>.)</p>
+    <p><b>Rust transport:</b></p>
+<pre><code>./ctrl/client.sh --rust [server_addr]            # default mcrndeb:4447</code></pre>
+
+    <h3>Sync</h3>
+    <p>Both machines share the same source tree; <code>ctrl/sync.sh</code> rsyncs from the dev host to <code>mcrndeb</code>. The receiver's filesystem is also bind-mounted at <code>~/mcrn</code> on the dev host for quick file access.</p>
+
+    <h3>Inside the GUI</h3>
+    <ul>
+      <li><b>Frames panel</b> — click to select; <code>←/→</code> navigate.</li>
+      <li><b>Transcript panel</b> — click to select; <code>↑/↓</code> navigate; <code>Shift</code> to extend.</li>
+      <li><b>Enter</b> — sends <code>answer</code> + selected refs to the agent.</li>
+      <li><b>Describe / Answer</b> buttons — same idea, single-word verb prepended.</li>
+      <li><b>Agent input</b> — type freely; <code>@F1-3</code> and <code>@T5</code> attach refs.</li>
+      <li><b>Esc</b> — clear selection. <b>Del</b> — clear agent output.</li>
+      <li><b>Ctrl+R</b> — manual segment cut.</li>
+    </ul>
+
+    <h3>Agent provider</h3>
+    <p>Resolution order in <code>cht/agent/runner.py</code>:</p>
+    <ul>
+      <li><code>GROQ_API_KEY</code> → OpenAI-compatible client against Groq.</li>
+      <li><code>OPENAI_API_KEY</code> → OpenAI / OpenAI-compatible.</li>
+      <li>(default) → Claude Code SDK using your local CC subscription.</li>
+    </ul>
+
+  </div>
+</section>
+
+<section id="system" class="graph-section">
+  <h2>SYSTEM ARCHITECTURE</h2>
+  <p>End-to-end view: sender capture → network → receiver record + analyse → GUI + agent. Both transports converge on the same on-disk session layout.</p>
+  <div class="graph-container">
+    <a href="viewer.html?src=graphs/system.svg"><img src="graphs/system.svg" alt="System architecture"></a>
+  </div>
+  <div class="legend">
+    <span class="python">Python</span>
+    <span class="rust">Rust</span>
+    <span class="hw">Hardware / external</span>
+    <span class="fs">Filesystem</span>
+  </div>
+</section>
+
+<section id="python" class="graph-section">
+  <h2>PYTHON PIPELINE</h2>
+  <p>Default mode. Bash + ffmpeg CLI on the sender; <code>StreamRecorder</code> + <code>SessionProcessor</code> in <code>cht/stream/</code> on the receiver. Scene detection rides the recorder's <code>ffmpeg</code> stdout pipe — sub-second latency, no extra process.</p>
+  <div class="graph-container">
+    <a href="viewer.html?src=graphs/python_pipeline.svg"><img src="graphs/python_pipeline.svg" alt="Python pipeline"></a>
+  </div>
+  <div class="legend">
+    <span class="python">Python module</span>
+    <span class="rust">External binary (ffmpeg)</span>
+    <span class="hw">Hardware / OS source</span>
+    <span class="fs">Filesystem output</span>
+  </div>
+</section>
+
+<section id="rust_client" class="graph-section">
+  <h2>RUST CLIENT — sender</h2>
+  <p><code>media/client/</code> — replaces <code>sender/stream_av.sh</code> when running with <code>--rust</code>. Two backends: subprocess (default, wraps ffmpeg CLI) and an experimental direct VAAPI capture/encoder.</p>
+  <div class="graph-container">
+    <a href="viewer.html?src=graphs/rust_client.svg"><img src="graphs/rust_client.svg" alt="Rust client pipeline"></a>
+  </div>
+</section>
+
+<section id="rust_server" class="graph-section">
+  <h2>RUST SERVER — receiver</h2>
+  <p><code>media/server/</code> — replaces <code>StreamRecorder</code> when running with <code>--rust</code>. TCP listener with a typed <code>WirePacket</code> framing; routes Video/Audio/Control packets to ffmpeg recording, ADTS audio, and a Unix-socket scene relay.</p>
+  <div class="graph-container">
+    <a href="viewer.html?src=graphs/rust_server.svg"><img src="graphs/rust_server.svg" alt="Rust server pipeline"></a>
+  </div>
+</section>
+
+<section id="crates" class="graph-section">
+  <h2>RUST CRATES</h2>
+  <p>Cargo workspace under <code>media/</code>: three crates (<code>cht-common</code>, <code>cht-client</code>, <code>cht-server</code>) and their external deps. Designed to be reusable as a standalone tool — <code>mpr</code> is expected to depend on it too.</p>
+  <div class="graph-container">
+    <a href="viewer.html?src=graphs/crates.svg"><img src="graphs/crates.svg" alt="Rust crates"></a>
+  </div>
+</section>
+
+<section id="repo" class="graph-section">
+  <h2>REPOSITORY STRUCTURE</h2>
+  <p>Top-level layout. Python app under <code>cht/</code>; Rust transport under <code>media/</code>; sender bash under <code>sender/</code>; ops scripts under <code>ctrl/</code>.</p>
+  <div class="tree-container">
+    <pre class="repo-tree"><span class="t-root">cht/</span>
+├── <span class="t-py">cht/</span>                    <span class="t-comment">Python app (GTK4 GUI, recording, transcribe, agent)</span>
+│   ├── app.py · window.py     <span class="t-comment">entrypoint + main window</span>
+│   ├── config.py · session.py <span class="t-comment">app config, session manifest</span>
+│   ├── stream/                <span class="t-comment">recorder · processor · tracker · lifecycle · ffmpeg helpers</span>
+│   ├── audio/                 <span class="t-comment">waveform engine</span>
+│   ├── transcriber/           <span class="t-comment">faster-whisper engine</span>
+│   ├── scrub/                 <span class="t-comment">proxy manager (scrub-mode preview)</span>
+│   ├── index/                 <span class="t-comment">frame index helpers</span>
+│   ├── agent/                 <span class="t-comment">runner · base · tools · claude_sdk_connection · openai_connection</span>
+│   └── ui/                    <span class="t-comment">timeline · monitor · scrub_bar · frames_panel · transcript_panel</span>
+│                              <span class="t-comment">agent_input · agent_output · markdown · keyboard · mpv · waveform</span>
+├── <span class="t-rust">media/</span>                  <span class="t-comment">Rust transport workspace (Cargo) — to be renamed once both modes coexist</span>
+│   ├── common/                <span class="t-comment">cht-common  — WirePacket, ControlMessage, logging</span>
+│   ├── client/                <span class="t-comment">cht-client  — sender (Wayland, VAAPI)</span>
+│   ├── server/                <span class="t-comment">cht-server  — receiver (TCP listener, ffmpeg fan-out)</span>
+│   └── ctrl/                  <span class="t-comment">build.sh · client.sh · server.sh</span>
+├── <span class="t-dir">sender/</span>                 <span class="t-comment">Python-mode sender — stream_av.sh (bash watchdog around ffmpeg CLI)</span>
+├── <span class="t-dir">ctrl/</span>                   <span class="t-comment">app.sh · server.sh · client.sh · sync.sh · bench.py · e2e_test.sh</span>
+├── <span class="t-dir">tests/</span>                  <span class="t-comment">pytest suites — config · ffmpeg · manager · processor · timeline · tracker</span>
+├── <span class="t-dir">data/</span>                   <span class="t-comment">runtime — sessions, active-session pointer (gitignored)</span>
+├── <span class="t-dir">logs/</span>                   <span class="t-comment">runtime logs (gitignored)</span>
+├── <span class="t-dir">docs/</span>                   <span class="t-comment">this site — index.html · viewer.html · graphs/ · render.sh</span>
+└── pyproject.toml · uv.lock   <span class="t-comment">Python deps via uv</span></pre>
+  </div>
+</section>
+
+<section id="notes" class="graph-section">
+  <h2>DESIGN NOTES</h2>
+  <p>Why some non-obvious choices look the way they do.</p>
+  <div class="prose">
+
+    <h3>Same on-disk layout from both transports</h3>
+    <p>The GUI, transcript, scene index, and agent never branch on transport mode — they only read files. The recording layout is the contract; the network protocol underneath is replaceable. This is what made the Rust port feasible without rewriting the analysis side.</p>
+
+    <h3>Scene detection lives in the recorder, not the processor</h3>
+    <p>In Python mode, scene-change frames come straight off the recorder's <code>ffmpeg</code> stdout pipe — sub-second, single process. Polling the fragmented MP4 from a separate process would add 3–5 s of disk-IPC latency. In Rust mode the same property is approximated by relaying raw H.264 over <code>scene.sock</code> to a separate ffmpeg, but that relay turns out to be the source of most current scene-detection pain (see <i>The scene detection saga</i> below).</p>
+
+    <h3>Why bother with the Rust port</h3>
+    <p>Two measured wins drove the work: connect time dropped from ~20 s (CLI ffmpeg startup + mpegts negotiation) to ~3 s (typed handshake), and session reload from disk dropped to 1–2 s. The Python recorder still works fine for development; the Rust path matters when you reconnect a lot.</p>
+
+    <h3>One-word verbs, no system prompt</h3>
+    <p>Pressing Enter sends <code>answer</code> + selected refs verbatim. There is no system prompt and no instruction template wrapping the message. If a question needs detail, the user types it — the model sees exactly what you'd see, not a contract you'd have to debug.</p>
+
+    <h3>Subprocess backend over a custom encoder</h3>
+    <p>The Rust client wraps the same <code>ffmpeg</code> CLI the Python sender uses, demuxes its NUT output in-process, and ships <code>EncodedPacket</code>s. Less code to own than a direct VAAPI encode path, and it inherits ffmpeg's robustness around odd Wayland/DRM transitions. The direct VAAPI backend exists but is experimental.</p>
+
+    <h3>Sender as a watchdog, not a daemon</h3>
+    <p>Python-mode <code>stream_av.sh</code> is a bash loop that restarts <code>ffmpeg</code> on stall (no progress for 10 s) and restarts immediately on the DRM-plane format change that fullscreen apps trigger. Cheaper and more reliable than building stall detection into a long-lived process.</p>
+
+    <h3>Struggles — the scene detection saga</h3>
+    <p>Scene detection is the part of the system that has fought back the hardest. The short version: <b>scene detection wants to live in the same ffmpeg process that does the decoding</b>, and every architecture change has had to relearn that.</p>
+
+    <h3>1. The "one behind" bug and the flush trick</h3>
+    <p>Original Python pipeline ran scene detection as a branch of the same <code>ffmpeg</code> that records: <code>select='gt(scene,T)'</code> → <code>showinfo</code> → MJPEG. The MJPEG encoder + muxer holds the selected frame in its internal buffer until <i>another</i> selected frame pushes it out — so the JPEG you receive at time <i>T</i> is actually the previous scene change, not the current one. Classic "one behind".</p>
+    <p>Workaround: a flush trick — select extra adjacent frames after each scene change so the real frame gets pushed through immediately (<code>SCENE_FLUSH_FRAMES</code>, see <code>cht/config.py</code>, used in <code>cht/stream/ffmpeg.py</code> :: <code>receive_record_relay_and_detect</code>). Worked reliably <b>only because everything was in one ffmpeg process</b>.</p>
+
+    <h3>2. The Rust relay broke it</h3>
+    <p>When transport moved to Rust, the recorder split into two processes: Rust-side ffmpeg writes fMP4 + UDP, and a separate Python-side ffmpeg consumes raw H.264 from <code>scene.sock</code> for scene detection. Two new failure modes appeared:</p>
+    <ul>
+      <li><b>The flush trick stopped flushing.</b> The MJPEG encoder behaves differently in a standalone pipe-fed ffmpeg vs. as a branch of a multi-output process — adjacent extra frames no longer reliably push the previous selection through.</li>
+      <li><b>Decoder corruption from dropped packets.</b> The Rust relay uses <code>try_send</code> with a 100 ms socket write timeout (<code>media/server/src/session.rs</code>). On any backpressure the relay drops H.264 packets, which corrupts the downstream decoder until the next keyframe — and missed keyframes mean missed scene detections.</li>
+    </ul>
+
+    <h3>3. Three dead ends</h3>
+    <ul>
+      <li><b>fMP4-tip extraction.</b> Trigger on showinfo, then extract the frame from the just-written fragmented MP4. Fragments only finalize at keyframe boundaries (~2 s with GOP 30), so <code>ffprobe</code> reports stale duration and the extracted frame comes from the <i>previous</i> scene.</li>
+      <li><b>Single Rust ffmpeg with mixed outputs.</b> The clean fix would be one ffmpeg in Rust doing record (<code>-c:v copy</code>) + relay (<code>-c:v copy</code>) + scene detect (decode + filter). It doesn't work — ffmpeg won't mix <code>-c:v copy</code> outputs with <code>-filter_complex</code> on a pipe input under <code>-hwaccel cuda</code>.</li>
+      <li><b>Tighter retry intervals on the extractor.</b> Dropping retry from 1 s to 0.3 s made things <i>worse</i> — concurrent ffmpeg processes thrashing the GPU rather than completing.</li>
+    </ul>
+
+    <h3>4. Where it actually landed</h3>
+    <p>Current working approach (Rust mode): the relay-fed scene detector fires <code>showinfo</code> with a timestamp, then Python extracts the frame from the recording file at <i>that</i> timestamp, with a wall-clock offset computed from the session-dir name. Reliable frames; ~1 s latency per scene from fMP4 fragment lag plus the per-extract ffmpeg spawn (~0.5 s). It's the system limping along until the proper fix lands. See <code>def/10-scene-detect-to-rust.md</code> and <code>def/ISSUES.md</code> R1, R3 for the full record.</p>
+    <div class="note"><b>Lesson.</b> The flush hack is a dead end in any pipe-fed context. Don't try to make it work over relay — move scene detection back into the same process that has the decoded frames. That's the only configuration that has ever been quiet.</div>
+
+    <h3>Future work</h3>
+
+    <h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">Near term — scene detection as a 3rd output of the Rust server's ffmpeg</h4>
+    <p>Spec: <code>def/10-scene-detect-to-rust.md</code>. Add a third branch to the existing ffmpeg the Rust server already runs:</p>
+    <ul>
+      <li>Output 1: <code>-c:v copy</code> → fMP4 (unchanged)</li>
+      <li>Output 2: <code>-c:v copy</code> → UDP relay (unchanged)</li>
+      <li>Output 3: CUDA decode → <code>select='gt(scene,T)'</code> → <code>showinfo</code> → MJPEG out a second pipe / second Unix socket</li>
+    </ul>
+    <p>This restores the single-process invariant — scene detection sees the same decoded frames as the recording branch, the flush behavior matches, no relay packet drops. Removes <code>detect_scenes_from_pipe()</code> in <code>cht/stream/ffmpeg.py</code>, the stdin-feeder thread in <code>cht/stream/processor.py</code>, and <code>scene_relay_task</code> in <code>media/server/src/session.rs</code>.</p>
+    <p>Adjacent improvements once that lands:</p>
+    <ul>
+      <li><b>Long-running extractor.</b> Keep one ffmpeg open and pipe seek commands rather than spawning per frame — eliminates the ~0.5 s startup hit.</li>
+      <li><b>PTS on the wire.</b> Have the Rust server send recording PTS alongside scene events so Python doesn't have to guess a wall-clock offset from the session-dir name (which is also why the first scene frame currently lands 7–10 s late in Rust mode — <code>def/ISSUES.md</code> R1).</li>
+    </ul>
+
+    <h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">End goal — in-process libav filter graph</h4>
+    <p>Spec: <code>def/09-media-transport.md</code>. Rust server decodes via NVDEC, runs the scene filter in-process via the libav API, and writes JPEGs directly. No ffmpeg subprocess, no pipe, no relay, no extraction — scene-to-frame latency drops to near zero. The 3rd-output step above is the bridge: same single-process discipline, easier to land, and a clean rewrite target once it works.</p>
+    <p>Other items deferred to that broader port:</p>
+    <ul>
+      <li><b>Frame buffer / fast scrub.</b> GPU ring buffer of the last N decoded frames exposed over shared memory to the Python scrub UI — replaces the mpv proxy MJPEG hack (see <code>def/07-scrub-perf-ceiling.md</code>).</li>
+      <li><b>Typed control protocol.</b> The current <code>WirePacket</code> framing covers session lifecycle but not parameter changes; spec 09 sketches a control-message channel for things like live <code>scene_threshold</code> updates and reconnect-with-PTS.</li>
+      <li><b>Audio in the live UDP relay.</b> Rust mode currently has no audio in the live monitor (<code>def/ISSUES.md</code> R2) because the server's ffmpeg only takes video on its stdin. Resolved naturally once the server's ffmpeg also receives the audio track.</li>
+    </ul>
+
+  </div>
+</section>
+
+</main>
+
+</div>
+
+<script>
+function show(id) {
+  document.querySelectorAll('.graph-section').forEach(s => s.classList.remove('active'));
+  document.querySelectorAll('nav a').forEach(a => a.classList.remove('active'));
+  document.getElementById(id).classList.add('active');
+  var navLink = document.querySelector('nav a[onclick="show(\'' + id + '\')"]');
+  if (navLink) navLink.classList.add('active');
+  document.querySelector('.layout').classList.remove('nav-open');
+}
+
+function toggleNav() {
+  document.querySelector('.layout').classList.toggle('nav-open');
+}
+</script>
+
+</body>
+</html>