Compare commits

...

2 Commits

Author SHA1 Message Date
fdc34578a5 add readme 2026-05-07 13:08:50 -03:00
946234eb9e update docs 2026-05-06 11:51:43 -03:00
17 changed files with 1734 additions and 502 deletions

11
README.md Normal file
View File

@@ -0,0 +1,11 @@
# Mitus
Meeting stream viewer with an embedded AI agent. Captures screen and audio from a Wayland source machine, streams it over TCP to a receiver with GPU-accelerated decode, and runs a Claude Code agent that watches the feed autonomously — transcribing audio, extracting frames on scene changes, and acting on user-defined rules. The agent panel shows a live log of what it observes and the actions it takes, while a thumbnail grid gives a visual timeline of the session. Primary use case: staying present in meetings without manually feeding context to Claude. Provides summarization after the fact
## Docs
```
cd docs && python3 -m http.server 8000
```
Then open <http://localhost:8000>.

34
docs/README.md Normal file
View File

@@ -0,0 +1,34 @@
# Mitus — Documentation
## View
```
cd docs && python3 -m http.server 8000
```
Then open <http://localhost:8000> in a browser.
## Re-render diagrams
After editing any `graphs/*.dot` file:
```
./render.sh
```
Requires `graphviz` (`sudo apt install graphviz`).
## Layout
```
docs/
├── index.html main page (overview, diagrams, walkthroughs)
├── viewer.html pan/zoom viewer for individual SVGs
├── render.sh regenerate all SVGs from .dot sources
└── graphs/
├── system.{dot,svg} top-level architecture
├── python_pipeline.{dot,svg} Python transport (default)
├── rust_client.{dot,svg} Rust client (sender)
├── rust_server.{dot,svg} Rust server (receiver)
└── crates.{dot,svg} Rust workspace crates
```

View File

@@ -1,4 +1,4 @@
// Cargo workspace crate dependency graph
// Mitus — Rust transport workspace (media/) crate dependency graph
digraph crates {
graph [fontname="monospace" bgcolor="#1e1e2e" pad="0.5"]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
@@ -21,7 +21,7 @@ digraph crates {
client [label="cht-client [sender, Wayland]\n─────────────────────────────\nbackends/subprocess.rs ffmpeg CLI + PulseAudio\n NUT demux → EncodedPacket\nbackends/mod.rs Backend enum\ncapture.rs KmsCapture (direct backend)\nencoder.rs VaapiEncoder + MediaType\npipeline.rs capture→encode thread\nmain.rs wait_for_server, transport,\n YYYYMMDD_HHMMSS session IDs"
fillcolor="#1e2d3e" color="#89b4fa"]
server [label="cht-server [receiver, mcrndeb]\n─────────────────────────────\nmain.rs TCP listener\n routes Video/Audio/Control\nsession.rs ffmpeg subprocess:\n fMP4 + UDP relay\n ADTS audio writer\n Scene relay (Unix socket)\n keyframe buffering"
server [label="cht-server [receiver, mcrn]\n─────────────────────────────\nmain.rs TCP listener\n routes Video/Audio/Control\nsession.rs ffmpeg subprocess:\n fMP4 + UDP relay\n ADTS audio writer\n Scene relay (Unix socket)\n keyframe buffering"
fillcolor="#1e2d3e" color="#89b4fa"]
// Deps

View File

@@ -170,7 +170,7 @@
<g id="node10" class="node">
<title>server</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1119.22,-417.07 760.42,-417.07 760.42,-247.43 1119.22,-247.43 1119.22,-417.07"/>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-396.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server &#160;[receiver, mcrndeb]</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-396.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server &#160;[receiver, mcrn]</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-379.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────────────────────</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-362.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">main.rs &#160;&#160;&#160;&#160;&#160;&#160;TCP listener</text>
<text xml:space="preserve" text-anchor="middle" x="939.82" y="-344.82" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;routes Video/Audio/Control</text>

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -0,0 +1,86 @@
// Mitus — Python transport pipeline (default mode, --python or no flag)
// Sender bash script wraps ffmpeg CLI; receiver runs ffmpeg in-process via Python.
digraph python_pipeline {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
// Hardware / OS
drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]
pulse [label="PulseAudio\n─────────────\nmonitor: default sink\nmic: webcam (C922)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]
net [label="TCP :4444\nmpegts" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
subgraph cluster_sender {
label="Sender — sender/stream_av.sh" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
watchdog [label="watchdog loop\n─────────────\nffmpeg restart on stall\n(total_size or frame stuck > 10s)\nimmediate restart on\nDRM plane format change"
fillcolor="#2d2038" color="#cba6f7"]
ffmpeg_send [label="ffmpeg CLI\n─────────────\nkmsgrab → hwmap=vaapi\nscale_vaapi 1920x1080 nv12\nh264_vaapi (qp=20, gop=30, no B-frames)\namix(monitor, mic) → aac 128k\nmpegts → TCP"
fillcolor="#1e2d3e" color="#89b4fa"]
}
subgraph cluster_recorder {
label="StreamRecorder — cht/stream/recorder.py" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
ffmpeg_recv [label="ffmpeg listener\n─────────────\nlisten=1 on TCP :4444\n→ 2 outputs:\n fragmented MP4 (recording_*.mp4)\n UDP :4445 (mpegts → mpv)\n stdout pipe (showinfo)"
fillcolor="#1e2d3e" color="#89b4fa"]
scene_pipe [label="scene-detect parser\n─────────────\nreads stdout pipe\nshowinfo → scene timestamps\nemits raw_frame(jpeg, ts)"
fillcolor="#2d2038" color="#cba6f7"]
}
subgraph cluster_processor {
label="SessionProcessor — cht/stream/processor.py" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
frame_writer [label="frame writer\n─────────────\nwrites JPEG to frames/\nappends to index.json\nfires on_new_frames(ts, path)"
fillcolor="#2d2038" color="#cba6f7"]
audio_extract [label="audio extractor\n─────────────\npolls fMP4 for new audio\nffmpeg → 16 kHz mono WAV\nchunks for transcription"
fillcolor="#2d2038" color="#cba6f7"]
tracker [label="RecordingTracker\n─────────────\nffprobe duration\nsums segments\nfeeds timeline UI"
fillcolor="#2d2038" color="#cba6f7"]
}
transcriber [label="TranscriberEngine\n─────────────\ncht/transcriber/engine.py\nfaster-whisper (CUDA)\ngrouped segments → transcript.json"
fillcolor="#2d2038" color="#cba6f7"]
gui [label="Mitus GUI (GTK4)\n─────────────\nMonitor (mpv UDP)\nScrub bar · Frames · Transcript\nAgent input/output"
fillcolor="#2d2038" color="#cba6f7"]
// Outputs
fmp4 [label="stream/\nrecording_*.mp4" shape=folder fillcolor="#2a2a3e" color="#585b70"]
udp [label="UDP :4445\n→ mpv" shape=parallelogram fillcolor="#2a2a3e" color="#585b70"]
frames [label="frames/\nindex.json + *.jpg" shape=folder fillcolor="#2a2a3e" color="#585b70"]
audio [label="audio/\nchunk_*.wav" shape=folder fillcolor="#2a2a3e" color="#585b70"]
txt [label="transcript.json" shape=folder fillcolor="#2a2a3e" color="#585b70"]
// Flow — sender
drm -> ffmpeg_send [label="kmsgrab"]
pulse -> ffmpeg_send [label="-f pulse"]
watchdog -> ffmpeg_send [style=dashed label="restart"]
ffmpeg_send -> net
// Flow — recorder
net -> ffmpeg_recv [label="mpegts"]
ffmpeg_recv -> fmp4
ffmpeg_recv -> udp
ffmpeg_recv -> scene_pipe [label="stdout"]
udp -> gui [label="live\nmonitor"]
// Flow — processor
scene_pipe -> frame_writer [label="raw_frame"]
frame_writer -> frames
fmp4 -> audio_extract [label="poll" style=dashed]
audio_extract -> audio
audio -> transcriber [label="WAV"]
transcriber -> txt
fmp4 -> tracker [label="ffprobe" style=dashed]
tracker -> gui [label="duration"]
// Flow — GUI
frames -> gui
txt -> gui
}

View File

@@ -0,0 +1,308 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: python_pipeline Pages: 1 -->
<svg width="1067pt" height="1624pt"
viewBox="0.00 0.00 1067.00 1624.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1580.56)">
<title>python_pipeline</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1580.56 1024.25,-1580.56 1024.25,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_sender</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="159.75,-1176.05 159.75,-1529.36 533.75,-1529.36 533.75,-1176.05 159.75,-1176.05"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1512.06" font-family="monospace" font-size="14.00" fill="#a6adc8">Sender — sender/stream_av.sh</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_recorder</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="174.75,-664.16 174.75,-1000.24 512.75,-1000.24 512.75,-664.16 174.75,-664.16"/>
<text xml:space="preserve" text-anchor="middle" x="343.75" y="-982.94" font-family="monospace" font-size="14.00" fill="#a6adc8">StreamRecorder — cht/stream/recorder.py</text>
</g>
<g id="clust3" class="cluster">
<title>cluster_processor</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="135.75,-484.12 135.75,-628.9 873.75,-628.9 873.75,-484.12 135.75,-484.12"/>
<text xml:space="preserve" text-anchor="middle" x="504.75" y="-611.6" font-family="monospace" font-size="14.00" fill="#a6adc8">SessionProcessor — cht/stream/processor.py</text>
</g>
<!-- drm -->
<g id="node1" class="node">
<title>drm</title>
<path fill="#1e3a2f" stroke="#a6e3a1" d="M151.5,-1464.85C151.5,-1468.42 117.55,-1471.32 75.75,-1471.32 33.95,-1471.32 0,-1468.42 0,-1464.85 0,-1464.85 0,-1406.59 0,-1406.59 0,-1403.02 33.95,-1400.12 75.75,-1400.12 117.55,-1400.12 151.5,-1403.02 151.5,-1406.59 151.5,-1406.59 151.5,-1464.85 151.5,-1464.85"/>
<path fill="none" stroke="#a6e3a1" d="M151.5,-1464.85C151.5,-1461.27 117.55,-1458.37 75.75,-1458.37 33.95,-1458.37 0,-1461.27 0,-1464.85"/>
<text xml:space="preserve" text-anchor="middle" x="75.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">/dev/dri/card0</text>
<text xml:space="preserve" text-anchor="middle" x="75.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">(KMS scanout)</text>
</g>
<!-- ffmpeg_send -->
<g id="node5" class="node">
<title>ffmpeg_send</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="525.62,-1322.08 167.88,-1322.08 167.88,-1184.05 525.62,-1184.05 525.62,-1322.08"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1300.14" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg CLI</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1282.89" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1265.64" font-family="monospace" font-size="14.00" fill="#cdd6f4">kmsgrab → hwmap=vaapi</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1248.39" font-family="monospace" font-size="14.00" fill="#cdd6f4">scale_vaapi 1920x1080 nv12</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1231.14" font-family="monospace" font-size="14.00" fill="#cdd6f4">h264_vaapi (qp=20, gop=30, no B&#45;frames)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1213.89" font-family="monospace" font-size="14.00" fill="#cdd6f4">amix(monitor, mic) → aac 128k</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1196.64" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts → TCP</text>
</g>
<!-- drm&#45;&gt;ffmpeg_send -->
<g id="edge1" class="edge">
<title>drm&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" d="M122.29,-1400.17C139.89,-1387.11 155.75,-1375.33 155.75,-1375.33 155.75,-1375.33 189.94,-1353.62 229.39,-1328.57"/>
<polygon fill="#585b70" stroke="#585b70" points="231.13,-1331.62 237.69,-1323.31 227.37,-1325.71 231.13,-1331.62"/>
<text xml:space="preserve" text-anchor="middle" x="235.85" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">kmsgrab</text>
</g>
<!-- pulse -->
<g id="node2" class="node">
<title>pulse</title>
<path fill="#1e3a2f" stroke="#a6e3a1" d="M751.38,-1484.25C751.38,-1490.2 704.48,-1495.04 646.75,-1495.04 589.02,-1495.04 542.12,-1490.2 542.12,-1484.25 542.12,-1484.25 542.12,-1387.19 542.12,-1387.19 542.12,-1381.24 589.02,-1376.4 646.75,-1376.4 704.48,-1376.4 751.38,-1381.24 751.38,-1387.19 751.38,-1387.19 751.38,-1484.25 751.38,-1484.25"/>
<path fill="none" stroke="#a6e3a1" d="M751.38,-1484.25C751.38,-1478.3 704.48,-1473.47 646.75,-1473.47 589.02,-1473.47 542.12,-1478.3 542.12,-1484.25"/>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1456.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">PulseAudio</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">monitor: default sink</text>
<text xml:space="preserve" text-anchor="middle" x="646.75" y="-1405.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">mic: webcam (C922)</text>
</g>
<!-- pulse&#45;&gt;ffmpeg_send -->
<g id="edge2" class="edge">
<title>pulse&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" d="M555.11,-1379.54C528.25,-1363.36 498.43,-1345.41 469.95,-1328.25"/>
<polygon fill="#585b70" stroke="#585b70" points="472.01,-1325.41 461.64,-1323.25 468.4,-1331.41 472.01,-1325.41"/>
<text xml:space="preserve" text-anchor="middle" x="547.71" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">&#45;f pulse</text>
</g>
<!-- net -->
<g id="node3" class="node">
<title>net</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="461.05,-1147.05 279.22,-1147.05 232.45,-1043.49 414.28,-1043.49 461.05,-1147.05"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1099.22" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1081.97" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts</text>
</g>
<!-- ffmpeg_recv -->
<g id="node6" class="node">
<title>ffmpeg_recv</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="505,-966.99 188.5,-966.99 188.5,-828.96 505,-828.96 505,-966.99"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-945.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg listener</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-927.8" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-910.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">listen=1 on TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-893.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ 2 outputs:</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-876.05" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;fragmented MP4 (recording_*.mp4)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-858.8" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;UDP :4445 (mpegts → mpv)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-841.55" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;stdout pipe (showinfo)</text>
</g>
<!-- net&#45;&gt;ffmpeg_recv -->
<g id="edge5" class="edge">
<title>net&#45;&gt;ffmpeg_recv</title>
<path fill="none" stroke="#585b70" d="M346.75,-1043.21C346.75,-1023.48 346.75,-1000.44 346.75,-978.64"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-978.88 346.75,-968.88 343.25,-978.88 350.25,-978.88"/>
<text xml:space="preserve" text-anchor="middle" x="371.5" y="-1012.19" font-family="monospace" font-size="14.00" fill="#a6adc8">mpegts</text>
</g>
<!-- watchdog -->
<g id="node4" class="node">
<title>watchdog</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="500.88,-1496.11 192.62,-1496.11 192.62,-1375.33 500.88,-1375.33 500.88,-1496.11"/>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1474.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">watchdog loop</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1456.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1439.67" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg restart on stall</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1422.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">(total_size or frame stuck &gt; 10s)</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1405.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">immediate restart on</text>
<text xml:space="preserve" text-anchor="middle" x="346.75" y="-1387.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">DRM plane format change</text>
</g>
<!-- watchdog&#45;&gt;ffmpeg_send -->
<g id="edge3" class="edge">
<title>watchdog&#45;&gt;ffmpeg_send</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M346.75,-1375.07C346.75,-1361.9 346.75,-1347.74 346.75,-1333.92"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-1333.93 346.75,-1323.93 343.25,-1333.93 350.25,-1333.93"/>
<text xml:space="preserve" text-anchor="middle" x="375.62" y="-1344.03" font-family="monospace" font-size="14.00" fill="#a6adc8">restart</text>
</g>
<!-- ffmpeg_send&#45;&gt;net -->
<g id="edge4" class="edge">
<title>ffmpeg_send&#45;&gt;net</title>
<path fill="none" stroke="#585b70" d="M346.75,-1183.73C346.75,-1175.4 346.75,-1166.91 346.75,-1158.67"/>
<polygon fill="#585b70" stroke="#585b70" points="350.25,-1158.73 346.75,-1148.73 343.25,-1158.73 350.25,-1158.73"/>
</g>
<!-- scene_pipe -->
<g id="node7" class="node">
<title>scene_pipe</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="458.12,-775.69 199.38,-775.69 199.38,-672.16 458.12,-672.16 458.12,-775.69"/>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-753.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">scene&#45;detect parser</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-736.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-719.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads stdout pipe</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-702" font-family="monospace" font-size="14.00" fill="#cdd6f4">showinfo → scene timestamps</text>
<text xml:space="preserve" text-anchor="middle" x="328.75" y="-684.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">emits raw_frame(jpeg, ts)</text>
</g>
<!-- ffmpeg_recv&#45;&gt;scene_pipe -->
<g id="edge8" class="edge">
<title>ffmpeg_recv&#45;&gt;scene_pipe</title>
<path fill="none" stroke="#585b70" d="M339.58,-828.48C338.15,-814.79 336.65,-800.49 335.25,-787.03"/>
<polygon fill="#585b70" stroke="#585b70" points="338.77,-787.04 334.25,-777.46 331.81,-787.77 338.77,-787.04"/>
<text xml:space="preserve" text-anchor="middle" x="362.5" y="-797.66" font-family="monospace" font-size="14.00" fill="#a6adc8">stdout</text>
</g>
<!-- fmp4 -->
<g id="node13" class="node">
<title>fmp4</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="680.62,-749.82 677.62,-753.82 656.62,-753.82 653.62,-749.82 520.88,-749.82 520.88,-698.04 680.62,-698.04 680.62,-749.82"/>
<text xml:space="preserve" text-anchor="middle" x="600.75" y="-727.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="600.75" y="-710.63" font-family="monospace" font-size="14.00" fill="#cdd6f4">recording_*.mp4</text>
</g>
<!-- ffmpeg_recv&#45;&gt;fmp4 -->
<g id="edge6" class="edge">
<title>ffmpeg_recv&#45;&gt;fmp4</title>
<path fill="none" stroke="#585b70" d="M447.87,-828.48C484.57,-803.62 524.28,-776.73 554.03,-756.57"/>
<polygon fill="#585b70" stroke="#585b70" points="555.75,-759.63 562.07,-751.13 551.83,-753.84 555.75,-759.63"/>
</g>
<!-- udp -->
<g id="node14" class="node">
<title>udp</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="981.05,-775.71 799.22,-775.71 752.45,-672.15 934.28,-672.15 981.05,-775.71"/>
<text xml:space="preserve" text-anchor="middle" x="866.75" y="-727.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">UDP :4445</text>
<text xml:space="preserve" text-anchor="middle" x="866.75" y="-710.63" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ mpv</text>
</g>
<!-- ffmpeg_recv&#45;&gt;udp -->
<g id="edge7" class="edge">
<title>ffmpeg_recv&#45;&gt;udp</title>
<path fill="none" stroke="#585b70" d="M505.24,-844.54C594.36,-815.05 702.36,-779.32 776.98,-754.63"/>
<polygon fill="#585b70" stroke="#585b70" points="777.91,-758.01 786.31,-751.55 775.71,-751.36 777.91,-758.01"/>
</g>
<!-- frame_writer -->
<g id="node8" class="node">
<title>frame_writer</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="419.38,-595.65 144.12,-595.65 144.12,-492.12 419.38,-492.12 419.38,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">frame writer</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">writes JPEG to frames/</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">appends to index.json</text>
<text xml:space="preserve" text-anchor="middle" x="281.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">fires on_new_frames(ts, path)</text>
</g>
<!-- scene_pipe&#45;&gt;frame_writer -->
<g id="edge10" class="edge">
<title>scene_pipe&#45;&gt;frame_writer</title>
<path fill="none" stroke="#585b70" d="M315.28,-671.91C309.92,-651.61 303.71,-628.06 298.09,-606.8"/>
<polygon fill="#585b70" stroke="#585b70" points="301.54,-606.15 295.61,-597.38 294.77,-607.94 301.54,-606.15"/>
<text xml:space="preserve" text-anchor="middle" x="346.58" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">raw_frame</text>
</g>
<!-- frames -->
<g id="node15" class="node">
<title>frames</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="464,-192.31 461,-196.31 440,-196.31 437,-192.31 279.5,-192.31 279.5,-140.53 464,-140.53 464,-192.31"/>
<text xml:space="preserve" text-anchor="middle" x="371.75" y="-170.37" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/</text>
<text xml:space="preserve" text-anchor="middle" x="371.75" y="-153.12" font-family="monospace" font-size="14.00" fill="#cdd6f4">index.json + *.jpg</text>
</g>
<!-- frame_writer&#45;&gt;frames -->
<g id="edge11" class="edge">
<title>frame_writer&#45;&gt;frames</title>
<path fill="none" stroke="#585b70" d="M293.98,-491.86C312.47,-414.71 347.09,-270.31 363.15,-203.29"/>
<polygon fill="#585b70" stroke="#585b70" points="366.47,-204.47 365.39,-193.93 359.66,-202.84 366.47,-204.47"/>
</g>
<!-- audio_extract -->
<g id="node9" class="node">
<title>audio_extract</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="671.75,-595.65 437.75,-595.65 437.75,-492.12 671.75,-492.12 671.75,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio extractor</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">polls fMP4 for new audio</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg → 16 kHz mono WAV</text>
<text xml:space="preserve" text-anchor="middle" x="554.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunks for transcription</text>
</g>
<!-- audio -->
<g id="node16" class="node">
<title>audio</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="620.12,-455.12 617.12,-459.12 596.12,-459.12 593.12,-455.12 493.38,-455.12 493.38,-403.34 620.12,-403.34 620.12,-455.12"/>
<text xml:space="preserve" text-anchor="middle" x="556.75" y="-433.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/</text>
<text xml:space="preserve" text-anchor="middle" x="556.75" y="-415.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunk_*.wav</text>
</g>
<!-- audio_extract&#45;&gt;audio -->
<g id="edge13" class="edge">
<title>audio_extract&#45;&gt;audio</title>
<path fill="none" stroke="#585b70" d="M555.66,-491.83C555.8,-483.48 555.96,-474.97 556.1,-467.05"/>
<polygon fill="#585b70" stroke="#585b70" points="559.59,-467.16 556.27,-457.1 552.6,-467.03 559.59,-467.16"/>
</g>
<!-- tracker -->
<g id="node10" class="node">
<title>tracker</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="865.88,-595.65 689.62,-595.65 689.62,-492.12 865.88,-492.12 865.88,-595.65"/>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-573.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">RecordingTracker</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-556.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-539.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffprobe duration</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-521.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">sums segments</text>
<text xml:space="preserve" text-anchor="middle" x="777.75" y="-504.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">feeds timeline UI</text>
</g>
<!-- gui -->
<g id="node12" class="node">
<title>gui</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="860.62,-103.53 568.88,-103.53 568.88,0 860.62,0 860.62,-103.53"/>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-81.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">Mitus GUI (GTK4)</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-64.34" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-47.09" font-family="monospace" font-size="14.00" fill="#cdd6f4">Monitor (mpv UDP)</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-29.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scrub bar · Frames · Transcript</text>
<text xml:space="preserve" text-anchor="middle" x="714.75" y="-12.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent input/output</text>
</g>
<!-- tracker&#45;&gt;gui -->
<g id="edge17" class="edge">
<title>tracker&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M771.41,-491.83C767.7,-462.02 763.75,-430.23 763.75,-430.23 763.75,-430.23 763.75,-430.23 763.75,-165.42 763.75,-165.42 752.97,-140.64 741.53,-114.33"/>
<polygon fill="#585b70" stroke="#585b70" points="744.76,-112.98 737.56,-105.21 738.34,-115.77 744.76,-112.98"/>
<text xml:space="preserve" text-anchor="middle" x="796.75" y="-276.4" font-family="monospace" font-size="14.00" fill="#a6adc8">duration</text>
</g>
<!-- transcriber -->
<g id="node11" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="726,-332.84 409.5,-332.84 409.5,-229.31 726,-229.31 726,-332.84"/>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-310.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">TranscriberEngine</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-293.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-276.4" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/transcriber/engine.py</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-259.15" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper (CUDA)</text>
<text xml:space="preserve" text-anchor="middle" x="567.75" y="-241.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">grouped segments → transcript.json</text>
</g>
<!-- txt -->
<g id="node17" class="node">
<title>txt</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="691.62,-184.42 688.62,-188.42 667.62,-188.42 664.62,-184.42 531.88,-184.42 531.88,-148.42 691.62,-148.42 691.62,-184.42"/>
<text xml:space="preserve" text-anchor="middle" x="611.75" y="-161.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript.json</text>
</g>
<!-- transcriber&#45;&gt;txt -->
<g id="edge15" class="edge">
<title>transcriber&#45;&gt;txt</title>
<path fill="none" stroke="#585b70" d="M587.69,-229.02C592.18,-217.52 596.79,-205.72 600.76,-195.56"/>
<polygon fill="#585b70" stroke="#585b70" points="603.99,-196.9 604.37,-186.31 597.47,-194.35 603.99,-196.9"/>
</g>
<!-- fmp4&#45;&gt;audio_extract -->
<g id="edge12" class="edge">
<title>fmp4&#45;&gt;audio_extract</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M594.26,-697.8C588.16,-674.22 578.81,-638 570.73,-606.73"/>
<polygon fill="#585b70" stroke="#585b70" points="574.2,-606.17 568.31,-597.37 567.42,-607.92 574.2,-606.17"/>
<text xml:space="preserve" text-anchor="middle" x="598.37" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">poll</text>
</g>
<!-- fmp4&#45;&gt;tracker -->
<g id="edge16" class="edge">
<title>fmp4&#45;&gt;tracker</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M625.73,-697.8C649.89,-673.5 687.4,-635.77 719.11,-603.87"/>
<polygon fill="#585b70" stroke="#585b70" points="721.4,-606.53 725.97,-596.97 716.43,-601.6 721.4,-606.53"/>
<text xml:space="preserve" text-anchor="middle" x="712.82" y="-640.85" font-family="monospace" font-size="14.00" fill="#a6adc8">ffprobe</text>
</g>
<!-- udp&#45;&gt;gui -->
<g id="edge9" class="edge">
<title>udp&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M874.49,-671.91C882.47,-619.3 893.75,-544.88 893.75,-544.88 893.75,-544.88 893.75,-544.88 893.75,-165.42 893.75,-165.42 850.06,-137.93 805.85,-110.1"/>
<polygon fill="#585b70" stroke="#585b70" points="807.74,-107.15 797.41,-104.79 804.01,-113.08 807.74,-107.15"/>
<text xml:space="preserve" text-anchor="middle" x="922.62" y="-372.04" font-family="monospace" font-size="14.00" fill="#a6adc8">live</text>
<text xml:space="preserve" text-anchor="middle" x="922.62" y="-354.79" font-family="monospace" font-size="14.00" fill="#a6adc8">monitor</text>
</g>
<!-- frames&#45;&gt;gui -->
<g id="edge18" class="edge">
<title>frames&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M448.73,-140.14C481.04,-129.52 519.87,-116.77 557.75,-104.33"/>
<polygon fill="#585b70" stroke="#585b70" points="558.56,-107.75 566.97,-101.3 556.37,-101.1 558.56,-107.75"/>
</g>
<!-- audio&#45;&gt;transcriber -->
<g id="edge14" class="edge">
<title>audio&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M558.64,-403.11C559.86,-386.92 561.5,-365.12 563.05,-344.48"/>
<polygon fill="#585b70" stroke="#585b70" points="566.53,-344.92 563.79,-334.69 559.55,-344.4 566.53,-344.92"/>
<text xml:space="preserve" text-anchor="middle" x="574.62" y="-363.41" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV</text>
</g>
<!-- txt&#45;&gt;gui -->
<g id="edge19" class="edge">
<title>txt&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M627.39,-148.31C636.43,-138.42 648.4,-125.33 660.57,-112.02"/>
<polygon fill="#585b70" stroke="#585b70" points="662.96,-114.59 667.13,-104.85 657.8,-109.87 662.96,-114.59"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -1,10 +1,10 @@
// Client pipeline data flow
// Mitus — Rust client (sender) pipeline — media/client/
// Sender machine (Wayland, VAAPI GPU)
digraph client_pipeline {
digraph rust_client {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8" labelfontname="monospace"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
// Hardware
drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"]

View File

@@ -3,11 +3,11 @@
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: client_pipeline Pages: 1 -->
<!-- Title: rust_client Pages: 1 -->
<svg width="1291pt" height="1237pt"
viewBox="0.00 0.00 1291.00 1237.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1194.19)">
<title>client_pipeline</title>
<title>rust_client</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1194.19 1248.2,-1194.19 1248.2,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_main</title>

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View File

@@ -1,6 +1,6 @@
// Server pipeline — current implementation
// Receiver machine (mcrndeb: X11, RTX 3080, NVDEC)
digraph server_pipeline {
// Mitus — Rust server (receiver) pipeline — media/server/
// Receiver machine (mcrn: X11, RTX 3080, NVDEC)
digraph rust_server {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.12"]
@@ -68,9 +68,4 @@ digraph server_pipeline {
// Python reads files
aac_file -> audio_extract [label="reads" style=dashed]
active_session -> python [label="discovers\nsession dir" style=dashed]
// Known regression (2026-04-10)
regression [label="⚠ REGRESSED\n─────────────\nScene relay (separate pipe)\nbreaks 'one behind' flush.\ntry_send drops → decoder\ncorruption until keyframe.\n\nFix: move scene detection\ninto server ffmpeg as 3rd\noutput branch (10-scene-\ndetect-to-rust.md)"
shape=note fillcolor="#3d1e1e" color="#f38ba8" fontcolor="#f38ba8"]
scene_relay -> regression [style=dashed color="#f38ba8"]
}

263
docs/graphs/rust_server.svg Normal file
View File

@@ -0,0 +1,263 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: rust_server Pages: 1 -->
<svg width="1429pt" height="1141pt"
viewBox="0.00 0.00 1429.00 1141.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1097.94)">
<title>rust_server</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1097.94 1385.33,-1097.94 1385.33,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_rust</title>
<polygon fill="#1e1e2e" stroke="#a6e3a1" points="175.12,-520.12 175.12,-907.93 1205.12,-907.93 1205.12,-520.12 175.12,-520.12"/>
<text xml:space="preserve" text-anchor="middle" x="690.12" y="-890.63" font-family="monospace" font-size="14.00" fill="#a6e3a1">cht&#45;server (Rust)</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_python</title>
<polygon fill="#1e1e2e" stroke="#cba6f7" points="493.12,-114.28 493.12,-310.81 1321.12,-310.81 1321.12,-114.28 493.12,-114.28"/>
<text xml:space="preserve" text-anchor="middle" x="907.12" y="-293.51" font-family="monospace" font-size="14.00" fill="#cba6f7">Python (cht app)</text>
</g>
<!-- net -->
<g id="node1" class="node">
<title>net</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="806.09,-1054.74 583.44,-1054.74 526.16,-951.18 748.81,-951.18 806.09,-1054.74"/>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-1006.91" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4447</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-989.66" font-family="monospace" font-size="14.00" fill="#cdd6f4">(WirePacket)</text>
</g>
<!-- listener -->
<g id="node3" class="node">
<title>listener</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="807.88,-874.68 524.38,-874.68 524.38,-719.4 807.88,-719.4 807.88,-874.68"/>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-852.74" font-family="monospace" font-size="14.00" fill="#cdd6f4">Listener</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-835.49" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-818.24" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP accept</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-800.99" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads WirePacket</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-783.74" font-family="monospace" font-size="14.00" fill="#cdd6f4">routes by type:</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-766.49" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Video → ffmpeg + scene relay</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-749.24" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Audio → ADTS file</text>
<text xml:space="preserve" text-anchor="middle" x="666.12" y="-731.99" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Control → session lifecycle</text>
</g>
<!-- net&#45;&gt;listener -->
<g id="edge1" class="edge">
<title>net&#45;&gt;listener</title>
<path fill="none" stroke="#585b70" d="M666.12,-950.77C666.12,-931.28 666.12,-908.46 666.12,-886.5"/>
<polygon fill="#585b70" stroke="#585b70" points="669.63,-886.63 666.13,-876.63 662.63,-886.63 669.63,-886.63"/>
<text xml:space="preserve" text-anchor="middle" x="707.38" y="-919.88" font-family="monospace" font-size="14.00" fill="#a6adc8">WirePacket</text>
</g>
<!-- python -->
<g id="node2" class="node">
<title>python</title>
<polygon fill="#2a2a3e" stroke="#cba6f7" points="1155.98,-457.62 960.55,-457.62 910.27,-354.06 1105.7,-354.06 1155.98,-457.62"/>
<text xml:space="preserve" text-anchor="middle" x="1033.12" y="-409.79" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python GUI</text>
<text xml:space="preserve" text-anchor="middle" x="1033.12" y="-392.54" font-family="monospace" font-size="14.00" fill="#cdd6f4">(cht app)</text>
</g>
<!-- ffmpeg_rec -->
<g id="node4" class="node">
<title>ffmpeg_rec</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="425.25,-640.27 183,-640.27 183,-536.75 425.25,-536.75 425.25,-640.27"/>
<text xml:space="preserve" text-anchor="middle" x="304.12" y="-618.34" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg subprocess</text>
<text xml:space="preserve" text-anchor="middle" x="304.12" y="-601.09" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="304.12" y="-583.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">H.264 pipe:0 → 2 outputs:</text>
<text xml:space="preserve" text-anchor="middle" x="304.12" y="-566.59" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;1. fMP4 (frag_keyframe)</text>
<text xml:space="preserve" text-anchor="middle" x="304.12" y="-549.34" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;2. UDP :4445 (mpegts)</text>
</g>
<!-- listener&#45;&gt;ffmpeg_rec -->
<g id="edge2" class="edge">
<title>listener&#45;&gt;ffmpeg_rec</title>
<path fill="none" stroke="#585b70" d="M531.07,-718.99C488.84,-694.9 443.04,-668.76 403.71,-646.33"/>
<polygon fill="#585b70" stroke="#585b70" points="405.59,-643.37 395.17,-641.45 402.12,-649.45 405.59,-643.37"/>
<text xml:space="preserve" text-anchor="middle" x="541.35" y="-679.48" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 video</text>
</g>
<!-- scene_relay -->
<g id="node5" class="node">
<title>scene_relay</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="693.38,-648.9 442.88,-648.9 442.88,-528.12 693.38,-528.12 693.38,-648.9"/>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-626.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Relay</text>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-609.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-592.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix socket (scene.sock)</text>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-575.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">buffers latest keyframe</text>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-557.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">best&#45;effort: drops if slow</text>
<text xml:space="preserve" text-anchor="middle" x="568.12" y="-540.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">100ms write timeout</text>
</g>
<!-- listener&#45;&gt;scene_relay -->
<g id="edge3" class="edge">
<title>listener&#45;&gt;scene_relay</title>
<path fill="none" stroke="#585b70" d="M629.63,-719.14C620.36,-699.6 610.45,-678.71 601.35,-659.53"/>
<polygon fill="#585b70" stroke="#585b70" points="604.63,-658.27 597.18,-650.74 598.3,-661.28 604.63,-658.27"/>
<text xml:space="preserve" text-anchor="middle" x="681.94" y="-688.1" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 copy</text>
<text xml:space="preserve" text-anchor="middle" x="681.94" y="-670.85" font-family="monospace" font-size="14.00" fill="#a6adc8">+ keyframe flag</text>
</g>
<!-- audio_writer -->
<g id="node6" class="node">
<title>audio_writer</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="920.75,-631.65 711.5,-631.65 711.5,-545.37 920.75,-545.37 920.75,-631.65"/>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-609.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Writer</text>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-592.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-575.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">ADTS header + raw AAC</text>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-557.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ stream/audio.aac</text>
</g>
<!-- listener&#45;&gt;audio_writer -->
<g id="edge4" class="edge">
<title>listener&#45;&gt;audio_writer</title>
<path fill="none" stroke="#585b70" d="M732.7,-719.2C741.82,-708.68 748.12,-701.4 748.12,-701.4 748.12,-701.4 766.59,-671.01 784.28,-641.91"/>
<polygon fill="#585b70" stroke="#585b70" points="787.23,-643.79 789.44,-633.42 781.25,-640.15 787.23,-643.79"/>
<text xml:space="preserve" text-anchor="middle" x="805.34" y="-679.48" font-family="monospace" font-size="14.00" fill="#a6adc8">AAC audio</text>
</g>
<!-- active_session -->
<g id="node7" class="node">
<title>active_session</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1191.5,-640.27 938.75,-640.27 938.75,-536.75 1197.5,-536.75 1197.5,-634.27 1191.5,-640.27"/>
<polyline fill="none" stroke="#585b70" points="1191.5,-640.27 1191.5,-634.27"/>
<polyline fill="none" stroke="#585b70" points="1197.5,-634.27 1191.5,-634.27"/>
<text xml:space="preserve" text-anchor="middle" x="1068.12" y="-618.34" font-family="monospace" font-size="14.00" fill="#cdd6f4">active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1068.12" y="-601.09" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1068.12" y="-583.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">file at data/active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1068.12" y="-566.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python polls to discover</text>
<text xml:space="preserve" text-anchor="middle" x="1068.12" y="-549.34" font-family="monospace" font-size="14.00" fill="#cdd6f4">session dir</text>
</g>
<!-- listener&#45;&gt;active_session -->
<g id="edge5" class="edge">
<title>listener&#45;&gt;active_session</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M808.14,-723.08C857.5,-697.72 912.08,-669.68 958.43,-645.87"/>
<polygon fill="#585b70" stroke="#585b70" points="959.85,-649.07 967.14,-641.39 956.65,-642.85 959.85,-649.07"/>
<text xml:space="preserve" text-anchor="middle" x="976.83" y="-679.48" font-family="monospace" font-size="14.00" fill="#a6adc8">on SessionStart</text>
</g>
<!-- fmp4 -->
<g id="node11" class="node">
<title>fmp4</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="176.25,-440.36 173.25,-444.36 152.25,-444.36 149.25,-440.36 0,-440.36 0,-371.33 176.25,-371.33 176.25,-440.36"/>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-418.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-401.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">recording_000.mp4</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-383.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">(fragmented MP4)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;fmp4 -->
<g id="edge7" class="edge">
<title>ffmpeg_rec&#45;&gt;fmp4</title>
<path fill="none" stroke="#585b70" d="M243.09,-536.46C209.57,-508.42 168.54,-474.11 137.13,-447.83"/>
<polygon fill="#585b70" stroke="#585b70" points="139.61,-445.34 129.7,-441.61 135.12,-450.71 139.61,-445.34"/>
<text xml:space="preserve" text-anchor="middle" x="225.56" y="-488.19" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- udp_live -->
<g id="node12" class="node">
<title>udp_live</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="508.19,-457.62 258.33,-457.62 194.06,-354.06 443.92,-354.06 508.19,-457.62"/>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-409.79" font-family="monospace" font-size="14.00" fill="#cdd6f4">UDP :4445</text>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-392.54" font-family="monospace" font-size="14.00" fill="#cdd6f4">(mpegts → mpv)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;udp_live -->
<g id="edge8" class="edge">
<title>ffmpeg_rec&#45;&gt;udp_live</title>
<path fill="none" stroke="#585b70" d="M317.34,-536.7C322.82,-515.63 329.24,-490.99 334.99,-468.86"/>
<polygon fill="#585b70" stroke="#585b70" points="338.33,-469.92 337.46,-459.36 331.56,-468.15 338.33,-469.92"/>
<text xml:space="preserve" text-anchor="middle" x="349.72" y="-488.19" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- scene_ffmpeg -->
<g id="node8" class="node">
<title>scene_ffmpeg</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="743.25,-277.56 501,-277.56 501,-122.28 743.25,-122.28 743.25,-277.56"/>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-255.62" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Detector</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-238.37" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-221.12" font-family="monospace" font-size="14.00" fill="#cdd6f4">connects to scene.sock</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-203.87" font-family="monospace" font-size="14.00" fill="#cdd6f4">pipes H.264 → ffmpeg:</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-186.62" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;CUDA decode</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-169.37" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;select=gt(scene,thresh)</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-152.12" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;showinfo → timestamps</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-134.87" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;MJPEG → JPEG frames</text>
</g>
<!-- scene_relay&#45;&gt;scene_ffmpeg -->
<g id="edge6" class="edge">
<title>scene_relay&#45;&gt;scene_ffmpeg</title>
<path fill="none" stroke="#a6e3a1" d="M576.44,-527.95C585.36,-464.09 599.62,-362.03 609.84,-288.9"/>
<polygon fill="#a6e3a1" stroke="#a6e3a1" points="613.25,-289.77 611.17,-279.38 606.32,-288.8 613.25,-289.77"/>
<text xml:space="preserve" text-anchor="middle" x="654.12" y="-409.79" font-family="monospace" font-size="14.00" fill="#a6adc8">raw H.264</text>
<text xml:space="preserve" text-anchor="middle" x="654.12" y="-392.54" font-family="monospace" font-size="14.00" fill="#a6adc8">(Unix socket)</text>
</g>
<!-- aac_file -->
<g id="node13" class="node">
<title>aac_file</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="891.88,-440.36 888.88,-444.36 867.88,-444.36 864.88,-440.36 740.38,-440.36 740.38,-371.33 891.88,-371.33 891.88,-440.36"/>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-418.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-401.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="816.12" y="-383.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">(ADTS&#45;wrapped)</text>
</g>
<!-- audio_writer&#45;&gt;aac_file -->
<g id="edge9" class="edge">
<title>audio_writer&#45;&gt;aac_file</title>
<path fill="none" stroke="#585b70" d="M816.12,-545.15C816.12,-517.19 816.12,-480.6 816.12,-452.07"/>
<polygon fill="#585b70" stroke="#585b70" points="819.63,-452.3 816.13,-442.3 812.63,-452.3 819.63,-452.3"/>
</g>
<!-- active_session&#45;&gt;python -->
<g id="edge14" class="edge">
<title>active_session&#45;&gt;python</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M1058.28,-536.7C1054.22,-515.73 1049.47,-491.21 1045.2,-469.16"/>
<polygon fill="#585b70" stroke="#585b70" points="1048.64,-468.53 1043.3,-459.38 1041.77,-469.86 1048.64,-468.53"/>
<text xml:space="preserve" text-anchor="middle" x="1098.1" y="-496.82" font-family="monospace" font-size="14.00" fill="#a6adc8">discovers</text>
<text xml:space="preserve" text-anchor="middle" x="1098.1" y="-479.57" font-family="monospace" font-size="14.00" fill="#a6adc8">session dir</text>
</g>
<!-- frames -->
<g id="node14" class="node">
<title>frames</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="714.38,-51.78 711.38,-55.78 690.38,-55.78 687.38,-51.78 529.88,-51.78 529.88,0 714.38,0 714.38,-51.78"/>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-29.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/</text>
<text xml:space="preserve" text-anchor="middle" x="622.12" y="-12.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">index.json + *.jpg</text>
</g>
<!-- scene_ffmpeg&#45;&gt;frames -->
<g id="edge10" class="edge">
<title>scene_ffmpeg&#45;&gt;frames</title>
<path fill="none" stroke="#585b70" d="M622.12,-121.96C622.12,-101.46 622.12,-80.27 622.12,-63.11"/>
<polygon fill="#585b70" stroke="#585b70" points="625.63,-63.45 622.13,-53.45 618.63,-63.45 625.63,-63.45"/>
<text xml:space="preserve" text-anchor="middle" x="671.62" y="-90.98" font-family="monospace" font-size="14.00" fill="#a6adc8">JPEG on</text>
<text xml:space="preserve" text-anchor="middle" x="671.62" y="-73.73" font-family="monospace" font-size="14.00" fill="#a6adc8">scene change</text>
</g>
<!-- audio_extract -->
<g id="node9" class="node">
<title>audio_extract</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="995.12,-251.69 761.12,-251.69 761.12,-148.16 995.12,-148.16 995.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="878.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Extractor</text>
<text xml:space="preserve" text-anchor="middle" x="878.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="878.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="878.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg → 16kHz mono WAV</text>
<text xml:space="preserve" text-anchor="middle" x="878.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunks + transcript WAVs</text>
</g>
<!-- audio_dir -->
<g id="node15" class="node">
<title>audio_dir</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1342.12,-440.36 1339.12,-444.36 1318.12,-444.36 1315.12,-440.36 1174.12,-440.36 1174.12,-371.33 1342.12,-371.33 1342.12,-440.36"/>
<text xml:space="preserve" text-anchor="middle" x="1258.12" y="-418.42" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/</text>
<text xml:space="preserve" text-anchor="middle" x="1258.12" y="-401.17" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunk_*.wav</text>
<text xml:space="preserve" text-anchor="middle" x="1258.12" y="-383.92" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript_*.wav</text>
</g>
<!-- audio_extract&#45;&gt;audio_dir -->
<g id="edge11" class="edge">
<title>audio_extract&#45;&gt;audio_dir</title>
<path fill="none" stroke="#585b70" d="M936.7,-252C969.6,-280.7 1004.12,-310.81 1004.12,-310.81 1004.12,-310.81 1165.12,-354.06 1165.12,-354.06 1165.12,-354.06 1173.87,-358.83 1185.99,-365.45"/>
<polygon fill="#585b70" stroke="#585b70" points="1184.05,-368.38 1194.51,-370.1 1187.41,-362.24 1184.05,-368.38"/>
</g>
<!-- transcriber -->
<g id="node10" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1313.12,-251.69 1013.12,-251.69 1013.12,-148.16 1313.12,-148.16 1313.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="1163.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Transcriber</text>
<text xml:space="preserve" text-anchor="middle" x="1163.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1163.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper (CUDA)</text>
<text xml:space="preserve" text-anchor="middle" x="1163.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">segment grouping</text>
<text xml:space="preserve" text-anchor="middle" x="1163.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">slider: chunk size + lines/group</text>
</g>
<!-- aac_file&#45;&gt;audio_extract -->
<g id="edge13" class="edge">
<title>aac_file&#45;&gt;audio_extract</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M826.36,-371.17C835.31,-341.74 848.52,-298.28 859.36,-262.63"/>
<polygon fill="#585b70" stroke="#585b70" points="862.64,-263.88 862.2,-253.3 855.94,-261.85 862.64,-263.88"/>
<text xml:space="preserve" text-anchor="middle" x="862.2" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">reads</text>
</g>
<!-- audio_dir&#45;&gt;transcriber -->
<g id="edge12" class="edge">
<title>audio_dir&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M1242.44,-371.17C1228.67,-341.62 1208.31,-297.91 1191.66,-262.17"/>
<polygon fill="#585b70" stroke="#585b70" points="1194.89,-260.81 1187.49,-253.22 1188.54,-263.76 1194.89,-260.81"/>
<text xml:space="preserve" text-anchor="middle" x="1265.93" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV chunks</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

77
docs/graphs/system.dot Normal file
View File

@@ -0,0 +1,77 @@
// Mitus — top-level architecture
// Sender (Wayland, VAAPI) → network → Receiver (X11, NVDEC/NVENC) → Mitus GUI app
// Two transport modes share the same recording layout and same GUI.
digraph system {
graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=LR pad="0.6" splines=polyline nodesep=0.5 ranksep=0.8]
node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box
fillcolor="#313244" color="#585b70" margin="0.25,0.14"]
edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"]
subgraph cluster_sender {
label="Sender machine — Wayland, VAAPI GPU" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
capture_py [label="kmsgrab + PulseAudio\n─────────────\nsender/stream_av.sh\nffmpeg CLI · h264_vaapi · AAC\nmpegts over TCP" fillcolor="#2d2038" color="#cba6f7"]
capture_rs [label="cht-client (Rust)\n─────────────\nmedia/client/\nffmpeg subprocess (subprocess backend)\nNUT demux → mpsc → WirePacket TCP" fillcolor="#1e2d3e" color="#89b4fa"]
}
subgraph cluster_net {
label="Network" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
net_py [label="TCP :4444\nmpegts" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
net_rs [label="TCP :4447\nWirePacket framing" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
}
subgraph cluster_receiver {
label="Receiver (mcrn) — X11, NVENC/NVDEC GPU" fontcolor="#a6adc8" color="#45475a" fontname="monospace"
recorder_py [label="StreamRecorder (Python)\n─────────────\ncht/stream/recorder.py\nffmpeg listener · TCP receive\nfMP4 writer · UDP relay\nstdout-pipe scene detect"
fillcolor="#2d2038" color="#cba6f7"]
recorder_rs [label="cht-server (Rust)\n─────────────\nmedia/server/\nWirePacket router\nfMP4 + UDP relay (ffmpeg)\nADTS audio writer\nUnix-socket scene relay"
fillcolor="#1e2d3e" color="#89b4fa"]
processor [label="SessionProcessor (Python)\n─────────────\ncht/stream/processor.py\nfMP4 → audio.wav (ffmpeg)\nchunked WAVs for transcribe\n[Rust mode: scene detect via\nUnix socket → ffmpeg pipe]"
fillcolor="#2d2038" color="#cba6f7"]
transcriber [label="Transcriber\n─────────────\ncht/transcriber/engine.py\nfaster-whisper · CUDA\nsegment grouping"
fillcolor="#2d2038" color="#cba6f7"]
gui [label="Mitus GUI (GTK4 + libadwaita)\n─────────────\ncht/window.py · cht/ui/*\nMonitor (mpv UDP) · Scrub bar\nFrames panel · Transcript panel\nAgent input/output"
fillcolor="#2d2038" color="#cba6f7"]
agent [label="Agent runner\n─────────────\ncht/agent/*\nClaude SDK · OpenAI/Groq\n@F frame refs · @T transcript refs"
fillcolor="#2d2038" color="#cba6f7"]
store [label="data/<session_id>/\n─────────────\nstream/recording_*.mp4\nstream/audio.aac (Rust mode)\nframes/*.jpg + index.json\naudio/chunk_*.wav\ntranscript.json · thread.json"
shape=folder fillcolor="#2a2a3e" color="#585b70"]
}
// Python transport flow
capture_py -> net_py [color="#cba6f7"]
net_py -> recorder_py [color="#cba6f7"]
recorder_py -> store [color="#cba6f7"]
recorder_py -> processor [label="raw scene\nframes" color="#cba6f7"]
// Rust transport flow
capture_rs -> net_rs [color="#89b4fa"]
net_rs -> recorder_rs [color="#89b4fa"]
recorder_rs -> store [color="#89b4fa"]
recorder_rs -> processor [label="scene.sock\n(H.264)" style=dashed color="#a6e3a1"]
// Shared downstream
store -> processor [style=dashed]
processor -> transcriber [label="WAV chunks"]
transcriber -> store [label="transcript.json"]
store -> gui [label="files + watchers"]
gui -> agent [label="@-mentions"]
agent -> store [label="thread.json" style=dashed]
// Legend
subgraph cluster_legend {
label="Legend" fontcolor="#a6adc8" color="#585b70" fontname="monospace"
l_py [label="Python" fillcolor="#2d2038" color="#cba6f7"]
l_rs [label="Rust" fillcolor="#1e2d3e" color="#89b4fa"]
l_io [label="I/O · network" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"]
l_fs [label="filesystem" shape=folder fillcolor="#2a2a3e" color="#585b70"]
}
}

262
docs/graphs/system.svg Normal file
View File

@@ -0,0 +1,262 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: system Pages: 1 -->
<svg width="3430pt" height="767pt"
viewBox="0.00 0.00 3430.00 767.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 724.2)">
<title>system</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-724.2 3386.51,-724.2 3386.51,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_sender</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="8,-40 8,-329 373.5,-329 373.5,-40 8,-40"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-311.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Sender machine — Wayland, VAAPI GPU</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_net</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="416.5,-37 416.5,-333 815.06,-333 815.06,-37 416.5,-37"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-315.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Network</text>
</g>
<g id="clust3" class="cluster">
<title>cluster_receiver</title>
<polygon fill="#1e1e2e" stroke="#45475a" points="858.06,-8 858.06,-349 3335.31,-349 3335.31,-8 858.06,-8"/>
<text xml:space="preserve" text-anchor="middle" x="2096.69" y="-331.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Receiver (mcrn) — X11, NVENC/NVDEC GPU</text>
</g>
<g id="clust4" class="cluster">
<title>cluster_legend</title>
<polygon fill="#1e1e2e" stroke="#585b70" points="34.24,-337 34.24,-673 347.26,-673 347.26,-337 34.24,-337"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-655.7" font-family="monospace" font-size="14.00" fill="#a6adc8">Legend</text>
</g>
<!-- capture_py -->
<g id="node1" class="node">
<title>capture_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="328.38,-296.2 53.12,-296.2 53.12,-189.8 328.38,-189.8 328.38,-296.2"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-272.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">kmsgrab + PulseAudio</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-255.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-238.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">sender/stream_av.sh</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-221.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg CLI · h264_vaapi · AAC</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-203.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts over TCP</text>
</g>
<!-- net_py -->
<g id="node3" class="node">
<title>net_py</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="730.08,-299.66 548.25,-299.66 501.48,-190.34 683.31,-190.34 730.08,-299.66"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-248.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4444</text>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-231.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">mpegts</text>
</g>
<!-- capture_py&#45;&gt;net_py -->
<g id="edge1" class="edge">
<title>capture_py&#45;&gt;net_py</title>
<path fill="none" stroke="#cba6f7" d="M328.8,-243.65C388.75,-243.93 457.83,-244.26 513.09,-244.52"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="512.77,-248.02 522.79,-244.57 512.8,-241.02 512.77,-248.02"/>
</g>
<!-- capture_rs -->
<g id="node2" class="node">
<title>capture_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="365.5,-154.2 16,-154.2 16,-47.8 365.5,-47.8 365.5,-154.2"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-130.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;client (Rust)</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-113.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-96.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">media/client/</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-79.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg subprocess (subprocess backend)</text>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-61.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">NUT demux → mpsc → WirePacket TCP</text>
</g>
<!-- net_rs -->
<g id="node4" class="node">
<title>net_rs</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="807.06,-154.66 502.78,-154.66 424.5,-45.34 728.79,-45.34 807.06,-154.66"/>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-103.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4447</text>
<text xml:space="preserve" text-anchor="middle" x="615.78" y="-86.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">WirePacket framing</text>
</g>
<!-- capture_rs&#45;&gt;net_rs -->
<g id="edge5" class="edge">
<title>capture_rs&#45;&gt;net_rs</title>
<path fill="none" stroke="#89b4fa" d="M365.95,-100.59C394.5,-100.52 423.99,-100.45 452.24,-100.38"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="451.91,-103.89 461.91,-100.36 451.9,-96.89 451.91,-103.89"/>
</g>
<!-- recorder_py -->
<g id="node5" class="node">
<title>recorder_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1141.31,-315.83 866.06,-315.83 866.06,-192.17 1141.31,-192.17 1141.31,-315.83"/>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-292.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">StreamRecorder (Python)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-275.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-257.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/stream/recorder.py</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-240.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg listener · TCP receive</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-223.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 writer · UDP relay</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-206.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">stdout&#45;pipe scene detect</text>
</g>
<!-- net_py&#45;&gt;recorder_py -->
<g id="edge2" class="edge">
<title>net_py&#45;&gt;recorder_py</title>
<path fill="none" stroke="#cba6f7" d="M707.75,-247.12C751.47,-248.14 805,-249.39 854.39,-250.54"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="854.26,-254.04 864.34,-250.77 854.43,-247.04 854.26,-254.04"/>
</g>
<!-- recorder_rs -->
<g id="node6" class="node">
<title>recorder_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1124.81,-156.45 882.56,-156.45 882.56,-15.55 1124.81,-15.55 1124.81,-156.45"/>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-133.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht&#45;server (Rust)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-115.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-98.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">media/server/</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-81.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">WirePacket router</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-64.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 + UDP relay (ffmpeg)</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-46.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">ADTS audio writer</text>
<text xml:space="preserve" text-anchor="middle" x="1003.69" y="-29.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix&#45;socket scene relay</text>
</g>
<!-- net_rs&#45;&gt;recorder_rs -->
<g id="edge6" class="edge">
<title>net_rs&#45;&gt;recorder_rs</title>
<path fill="none" stroke="#89b4fa" d="M764.6,-94.64C799.57,-93.37 836.63,-92.02 870.83,-90.78"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="870.94,-94.28 880.8,-90.42 870.68,-87.29 870.94,-94.28"/>
</g>
<!-- processor -->
<g id="node7" class="node">
<title>processor</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1548.81,-219.45 1281.81,-219.45 1281.81,-78.55 1548.81,-78.55 1548.81,-219.45"/>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-196.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">SessionProcessor (Python)</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-178.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-161.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/stream/processor.py</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-144.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">fMP4 → audio.wav (ffmpeg)</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-127.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunked WAVs for transcribe</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-109.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">[Rust mode: scene detect via</text>
<text xml:space="preserve" text-anchor="middle" x="1415.31" y="-92.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix socket → ffmpeg pipe]</text>
</g>
<!-- recorder_py&#45;&gt;processor -->
<g id="edge4" class="edge">
<title>recorder_py&#45;&gt;processor</title>
<path fill="none" stroke="#cba6f7" d="M1141.66,-218.89C1183.07,-208.27 1228.67,-196.59 1270.52,-185.86"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="1271.36,-189.26 1280.18,-183.38 1269.62,-182.48 1271.36,-189.26"/>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-232.1" font-family="monospace" font-size="14.00" fill="#a6adc8">raw scene</text>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-214.85" font-family="monospace" font-size="14.00" fill="#a6adc8">frames</text>
</g>
<!-- store -->
<g id="node11" class="node">
<title>store</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="2388.56,-233.45 2385.56,-237.45 2364.56,-237.45 2361.56,-233.45 2113.31,-233.45 2113.31,-92.55 2388.56,-92.55 2388.56,-233.45"/>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-210.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">data/&lt;session_id&gt;/</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-192.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-175.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/recording_*.mp4</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-158.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/audio.aac (Rust mode)</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-141.07" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/*.jpg + index.json</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-123.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/chunk_*.wav</text>
<text xml:space="preserve" text-anchor="middle" x="2250.94" y="-106.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript.json · thread.json</text>
</g>
<!-- recorder_py&#45;&gt;store -->
<g id="edge3" class="edge">
<title>recorder_py&#45;&gt;store</title>
<path fill="none" stroke="#cba6f7" d="M1141.45,-253.45C1198.97,-253.22 1252.81,-253 1252.81,-253 1252.81,-253 1548.81,-237 1548.81,-237 1548.81,-237 1931.56,-211 1931.56,-211 1931.56,-211 2084.31,-200 2084.31,-200 2084.31,-200 2091.16,-198.47 2102.08,-196.03"/>
<polygon fill="#cba6f7" stroke="#cba6f7" points="2102.69,-199.48 2111.68,-193.89 2101.16,-192.65 2102.69,-199.48"/>
</g>
<!-- recorder_rs&#45;&gt;processor -->
<g id="edge8" class="edge">
<title>recorder_rs&#45;&gt;processor</title>
<path fill="none" stroke="#a6e3a1" stroke-dasharray="5,2" d="M1125.12,-98.86C1188.33,-105.61 1252.81,-112.5 1252.81,-112.5 1252.81,-112.5 1259.59,-114.03 1270.37,-116.47"/>
<polygon fill="#a6e3a1" stroke="#a6e3a1" points="1269.31,-119.82 1279.84,-118.61 1270.85,-112.99 1269.31,-119.82"/>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-133.7" font-family="monospace" font-size="14.00" fill="#a6adc8">scene.sock</text>
<text xml:space="preserve" text-anchor="middle" x="1211.56" y="-116.45" font-family="monospace" font-size="14.00" fill="#a6adc8">(H.264)</text>
</g>
<!-- recorder_rs&#45;&gt;store -->
<g id="edge7" class="edge">
<title>recorder_rs&#45;&gt;store</title>
<path fill="none" stroke="#89b4fa" d="M1125.03,-75.14C1199.82,-68.4 1281.81,-61 1281.81,-61 1281.81,-61 1689.31,-59 1689.31,-59 1689.31,-59 1931.56,-59 1931.56,-59 1931.56,-59 2018.95,-87.55 2102.16,-114.73"/>
<polygon fill="#89b4fa" stroke="#89b4fa" points="2101.04,-118.04 2111.63,-117.82 2103.21,-111.39 2101.04,-118.04"/>
</g>
<!-- transcriber -->
<g id="node8" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1931.56,-183.2 1689.31,-183.2 1689.31,-76.8 1931.56,-76.8 1931.56,-183.2"/>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-159.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">Transcriber</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-142.57" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-125.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/transcriber/engine.py</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-108.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper · CUDA</text>
<text xml:space="preserve" text-anchor="middle" x="1810.44" y="-90.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">segment grouping</text>
</g>
<!-- processor&#45;&gt;transcriber -->
<g id="edge10" class="edge">
<title>processor&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M1549.13,-142.58C1590.57,-140.58 1636.24,-138.37 1677.61,-136.37"/>
<polygon fill="#585b70" stroke="#585b70" points="1677.54,-139.88 1687.36,-135.9 1677.2,-132.89 1677.54,-139.88"/>
<text xml:space="preserve" text-anchor="middle" x="1619.06" y="-144.59" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV chunks</text>
</g>
<!-- transcriber&#45;&gt;store -->
<g id="edge11" class="edge">
<title>transcriber&#45;&gt;store</title>
<path fill="none" stroke="#585b70" d="M1931.84,-139.06C1984.38,-143.02 2046.5,-147.69 2101.83,-151.85"/>
<polygon fill="#585b70" stroke="#585b70" points="2101.42,-155.33 2111.66,-152.59 2101.95,-148.35 2101.42,-155.33"/>
<text xml:space="preserve" text-anchor="middle" x="2022.44" y="-154.38" font-family="monospace" font-size="14.00" fill="#a6adc8">transcript.json</text>
</g>
<!-- gui -->
<g id="node9" class="node">
<title>gui</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="2870.31,-244.83 2578.56,-244.83 2578.56,-121.17 2870.31,-121.17 2870.31,-244.83"/>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-221.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">Mitus GUI (GTK4 + libadwaita)</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-204.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-186.95" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/window.py · cht/ui/*</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-169.7" font-family="monospace" font-size="14.00" fill="#cdd6f4">Monitor (mpv UDP) · Scrub bar</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-152.45" font-family="monospace" font-size="14.00" fill="#cdd6f4">Frames panel · Transcript panel</text>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-135.2" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent input/output</text>
</g>
<!-- agent -->
<g id="node10" class="node">
<title>agent</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="3327.31,-161.2 3010.81,-161.2 3010.81,-54.8 3327.31,-54.8 3327.31,-161.2"/>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-137.82" font-family="monospace" font-size="14.00" fill="#cdd6f4">Agent runner</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-120.58" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-103.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">cht/agent/*</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-86.08" font-family="monospace" font-size="14.00" fill="#cdd6f4">Claude SDK · OpenAI/Groq</text>
<text xml:space="preserve" text-anchor="middle" x="3169.06" y="-68.83" font-family="monospace" font-size="14.00" fill="#cdd6f4">@F frame refs · @T transcript refs</text>
</g>
<!-- gui&#45;&gt;agent -->
<g id="edge13" class="edge">
<title>gui&#45;&gt;agent</title>
<path fill="none" stroke="#585b70" d="M2870.68,-158.39C2911.74,-151.43 2956.79,-143.8 2999.13,-136.63"/>
<polygon fill="#585b70" stroke="#585b70" points="2999.64,-140.09 3008.91,-134.97 2998.47,-133.19 2999.64,-140.09"/>
<text xml:space="preserve" text-anchor="middle" x="2940.56" y="-156.17" font-family="monospace" font-size="14.00" fill="#a6adc8">@&#45;mentions</text>
</g>
<!-- agent&#45;&gt;store -->
<g id="edge14" class="edge">
<title>agent&#45;&gt;store</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M3010.47,-96.22C2939.05,-90.89 2870.31,-85.75 2870.31,-85.75 2870.31,-85.75 2578.56,-85.75 2578.56,-85.75 2578.56,-85.75 2486.47,-107.53 2400.08,-127.96"/>
<polygon fill="#585b70" stroke="#585b70" points="2399.43,-124.52 2390.51,-130.23 2401.05,-131.33 2399.43,-124.52"/>
<text xml:space="preserve" text-anchor="middle" x="2724.44" y="-89.7" font-family="monospace" font-size="14.00" fill="#a6adc8">thread.json</text>
</g>
<!-- store&#45;&gt;processor -->
<g id="edge9" class="edge">
<title>store&#45;&gt;processor</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M2113.07,-179.34C2026.78,-189.64 1931.56,-201 1931.56,-201 1931.56,-201 1689.31,-201 1689.31,-201 1689.31,-201 1625.9,-188.92 1560.2,-176.41"/>
<polygon fill="#585b70" stroke="#585b70" points="1561.18,-173.03 1550.7,-174.6 1559.87,-179.91 1561.18,-173.03"/>
</g>
<!-- store&#45;&gt;gui -->
<g id="edge12" class="edge">
<title>store&#45;&gt;gui</title>
<path fill="none" stroke="#585b70" d="M2388.95,-168.81C2444.64,-171.17 2509.32,-173.92 2566.86,-176.36"/>
<polygon fill="#585b70" stroke="#585b70" points="2566.62,-179.85 2576.76,-176.78 2566.92,-172.86 2566.62,-179.85"/>
<text xml:space="preserve" text-anchor="middle" x="2483.56" y="-179.33" font-family="monospace" font-size="14.00" fill="#a6adc8">files + watchers</text>
</g>
<!-- l_py -->
<g id="node12" class="node">
<title>l_py</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="233.5,-382.7 148,-382.7 148,-345.3 233.5,-345.3 233.5,-382.7"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-359.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python</text>
</g>
<!-- l_rs -->
<g id="node13" class="node">
<title>l_rs</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="225.25,-455.7 156.25,-455.7 156.25,-418.3 225.25,-418.3 225.25,-455.7"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-432.32" font-family="monospace" font-size="14.00" fill="#cdd6f4">Rust</text>
</g>
<!-- l_io -->
<g id="node14" class="node">
<title>l_io</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="339.26,-566.41 103.01,-566.41 42.24,-491.59 278.49,-491.59 339.26,-566.41"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-524.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">I/O · network</text>
</g>
<!-- l_fs -->
<g id="node15" class="node">
<title>l_fs</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="250,-639.71 247,-643.71 226,-643.71 223,-639.71 131.5,-639.71 131.5,-602.29 250,-602.29 250,-639.71"/>
<text xml:space="preserve" text-anchor="middle" x="190.75" y="-616.33" font-family="monospace" font-size="14.00" fill="#cdd6f4">filesystem</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

581
docs/index.html Normal file
View File

@@ -0,0 +1,581 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Mitus — Architecture</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap');
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
background: #1e1e2e;
color: #cdd6f4;
font-family: 'Inter', sans-serif;
line-height: 1.6;
height: 100vh;
overflow: hidden;
display: flex;
flex-direction: column;
}
header {
padding: 16px 24px;
border-bottom: 1px solid #313244;
display: flex;
align-items: baseline;
gap: 16px;
flex-shrink: 0;
}
header h1 {
font-family: 'JetBrains Mono', monospace;
font-size: 22px;
font-weight: 600;
letter-spacing: 3px;
color: #89b4fa;
}
header .subtitle {
font-size: 13px;
color: #6c7086;
letter-spacing: 1px;
text-transform: uppercase;
}
.layout {
display: flex;
flex: 1;
min-height: 0;
}
nav {
display: flex;
flex-direction: column;
width: 220px;
flex-shrink: 0;
background: #181825;
border-right: 1px solid #313244;
padding: 8px 0;
overflow-y: auto;
}
nav a {
padding: 10px 20px;
font-family: 'JetBrains Mono', monospace;
font-size: 12px;
color: #a6adc8;
text-decoration: none;
border-left: 2px solid transparent;
cursor: pointer;
transition: all 0.15s;
}
nav a:hover { color: #cdd6f4; background: #313244; }
nav a.active { color: #89b4fa; border-left-color: #89b4fa; background: #1e2d3e; }
nav .group {
font-family: 'JetBrains Mono', monospace;
font-size: 10px;
color: #585b70;
letter-spacing: 1px;
text-transform: uppercase;
padding: 16px 20px 6px;
}
main {
flex: 1;
overflow: auto;
padding: 32px 48px;
}
.graph-section {
display: none;
animation: fadeIn 0.2s ease;
}
.graph-section.active { display: block; }
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
.graph-section h2 {
font-family: 'JetBrains Mono', monospace;
font-size: 15px;
font-weight: 500;
color: #a6adc8;
margin-bottom: 8px;
letter-spacing: 1px;
}
.graph-section p {
font-size: 13px;
color: #6c7086;
margin-bottom: 24px;
max-width: 800px;
}
.graph-container {
background: #11111b;
border: 1px solid #313244;
padding: 24px;
overflow: auto;
}
.graph-container a { display: block; }
.graph-container img { max-width: 100%; height: auto; }
.legend {
display: flex;
gap: 24px;
margin-top: 16px;
font-size: 11px;
font-family: 'JetBrains Mono', monospace;
color: #6c7086;
}
.legend span::before {
content: '';
display: inline-block;
width: 8px;
height: 8px;
margin-right: 6px;
border-radius: 50%;
}
.legend .python::before { background: #cba6f7; }
.legend .rust::before { background: #89b4fa; }
.legend .hw::before { background: #a6e3a1; }
.legend .fs::before { background: #585b70; }
/* Repo tree */
.tree-container {
background: #11111b;
border: 1px solid #313244;
padding: 24px;
overflow: auto;
}
.repo-tree {
font-family: 'JetBrains Mono', monospace;
font-size: 13px;
line-height: 1.7;
color: #a6adc8;
}
.t-root { color: #89b4fa; font-weight: 600; font-size: 15px; }
.t-dir { color: #cdd6f4; font-weight: 500; }
.t-rust { color: #89b4fa; font-weight: 500; }
.t-py { color: #cba6f7; font-weight: 500; }
.t-comment { color: #6c7086; }
/* Prose sections */
.graph-section h3 {
font-family: 'JetBrains Mono', monospace;
font-size: 13px;
font-weight: 500;
color: #cdd6f4;
letter-spacing: 1px;
margin: 32px 0 10px;
text-transform: uppercase;
}
.prose { max-width: 820px; }
.prose p {
font-size: 14px;
color: #a6adc8;
margin-bottom: 14px;
line-height: 1.7;
}
.prose p b { color: #cdd6f4; font-weight: 600; }
.prose code {
font-family: 'JetBrains Mono', monospace;
font-size: 12px;
color: #89b4fa;
background: #181825;
padding: 1px 5px;
border-radius: 3px;
}
.prose pre {
background: #11111b;
border: 1px solid #313244;
padding: 14px 16px;
margin: 8px 0 18px;
border-radius: 4px;
overflow-x: auto;
}
.prose pre code {
background: transparent;
padding: 0;
color: #cdd6f4;
font-size: 12px;
}
.prose ul {
margin: 8px 0 16px 20px;
font-size: 14px;
color: #a6adc8;
line-height: 1.7;
}
.prose ul li { margin-bottom: 6px; }
.prose .note {
border-left: 3px solid #f9e2af;
background: #2a2a3e;
padding: 10px 14px;
margin: 12px 0 18px;
font-size: 13px;
color: #cdd6f4;
}
.cmp-table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
margin: 8px 0 20px;
border: 1px solid #313244;
}
.cmp-table th {
text-align: left;
background: #181825;
color: #a6adc8;
font-family: 'JetBrains Mono', monospace;
font-size: 11px;
letter-spacing: 1px;
padding: 10px 14px;
border-bottom: 1px solid #313244;
}
.cmp-table td {
padding: 10px 14px;
color: #a6adc8;
border-bottom: 1px solid #313244;
vertical-align: top;
}
.cmp-table tr:last-child td { border-bottom: none; }
/* Mobile */
.menu-toggle {
display: none;
background: transparent;
border: 1px solid #313244;
color: #cdd6f4;
padding: 6px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 14px;
cursor: pointer;
line-height: 1;
margin-left: auto;
}
.menu-toggle:hover { background: #313244; }
.nav-backdrop {
display: none;
position: absolute;
inset: 0;
background: rgba(0, 0, 0, 0.5);
z-index: 10;
}
.layout.nav-open .nav-backdrop { display: block; }
@media (max-width: 720px) {
header { padding: 10px 12px; gap: 8px; }
header h1 { font-size: 16px; letter-spacing: 1px; }
header .subtitle { display: none; }
.menu-toggle { display: inline-block; }
.layout { position: relative; }
nav {
position: absolute;
left: 0; top: 0; bottom: 0;
width: 220px;
z-index: 20;
transform: translateX(-100%);
transition: transform 0.2s ease;
box-shadow: 2px 0 8px rgba(0, 0, 0, 0.5);
}
.layout.nav-open nav { transform: translateX(0); }
main { padding: 16px; }
.graph-section h2 { font-size: 13px; }
.prose p, .prose ul { font-size: 13px; }
}
</style>
</head>
<body>
<header>
<h1>MITUS</h1>
<span class="subtitle">Stream viewer + agent — architecture</span>
<button class="menu-toggle" onclick="toggleNav()" aria-label="Toggle navigation"></button>
</header>
<div class="layout">
<div class="nav-backdrop" onclick="toggleNav()"></div>
<nav>
<div class="group">Overview</div>
<a class="active" onclick="show('overview')">Goal &amp; walkthrough</a>
<a onclick="show('usage')">Usage</a>
<a onclick="show('system')">System</a>
<div class="group">Transports</div>
<a onclick="show('python')">Python pipeline</a>
<a onclick="show('rust_client')">Rust client</a>
<a onclick="show('rust_server')">Rust server</a>
<a onclick="show('crates')">Rust crates</a>
<div class="group">Reference</div>
<a onclick="show('repo')">Repository</a>
<a onclick="show('notes')">Design notes</a>
</nav>
<main>
<section id="overview" class="graph-section active">
<h2>GOAL &amp; WALKTHROUGH</h2>
<p>Mitus records a remote desktop, transcribes its audio, extracts scene-change frames, and exposes both to an LLM agent for ad-hoc Q&amp;A.</p>
<div class="prose">
<h3>What it is</h3>
<p>A two-machine setup: the <b>sender</b> (a Wayland desktop) captures screen + audio and ships an encoded stream to the <b>receiver</b>. The receiver records to disk, runs scene detection on the live feed to extract per-event JPEG frames, transcribes the audio, and presents the result in a GTK4 GUI. The GUI doubles as an LLM client: select a frame or transcript span, hit Enter, and an agent (Claude SDK or any OpenAI-compatible endpoint) answers using the selected media as context.</p>
<h3>Why the split</h3>
<p>Capture wants Wayland + a VAAPI-friendly GPU; analysis wants CUDA for both faster-whisper and ffmpeg scene detection. Different machines, different drivers — the network stream is the seam. The receiver also runs the GUI because the recordings are stored locally and the agent talks to large frames as files, not blobs over a wire.</p>
<h3>Two transport modes</h3>
<p>Both modes produce the <b>same on-disk session layout</b> (<code>data/&lt;session_id&gt;/stream/</code>, <code>frames/</code>, <code>audio/</code>, <code>transcript.json</code>) so the GUI doesn't care which path the bytes took. The choice is a CLI flag.</p>
<ul>
<li><b>Python (default).</b> Sender is a bash watchdog wrapping <code>ffmpeg</code> CLI. Receiver is <code>cht/stream/recorder.py</code>: an <code>ffmpeg</code> listener that writes fragmented MP4 + relays UDP to <code>mpv</code> + emits scene frames out of an <code>showinfo</code> stdout pipe. Simple, all in one process, every restart costs a few seconds.</li>
<li><b>Rust (<code>--rust</code>).</b> A standalone Rust workspace under <code>media/</code>: <code>cht-client</code> on the sender, <code>cht-server</code> on the receiver. Wire protocol is a typed <code>WirePacket</code> framing instead of raw mpegts. Scene detection still runs in Python via a Unix-socket relay from the server. Connect time drops from ~20s to ~3s; session reload from disk is 12s.</li>
</ul>
<div class="note">The <code>media/</code> directory holds the Rust transport. While both modes coexist, that name is a misnomer — a future rename is planned. For now, "Rust transport" and "<code>media/</code>" mean the same thing.</div>
<h3>What the agent sees</h3>
<p>Two reference syntaxes resolve to media when sent: <code>@F0001</code><code>@F0042</code> for frames, <code>@T0001</code><code>@T0010</code> for transcript segments. Single-word verbs <code>describe</code> and <code>answer</code> are sent verbatim — no system prompt, no boilerplate. If you want detail, you type it. The agent runner injects only the referenced frame paths and transcript text alongside the user message.</p>
</div>
</section>
<section id="usage" class="graph-section">
<h2>USAGE</h2>
<p>How to start a session — sender side, receiver side, both transports.</p>
<div class="prose">
<p>Both <code>ctrl/client.sh</code> and <code>ctrl/app.sh</code> take a transport flag — <code>--python</code> (default) or <code>--rust</code>. The <code>ctrl/</code> wrappers are the entrypoints; <code>media/ctrl/*</code> and <code>sender/stream_av.py</code> are implementation details they dispatch to.</p>
<h3>Receiver (mcrn) — GUI</h3>
<p><b>Python transport (default):</b></p>
<pre><code>./ctrl/app.sh --python</code></pre>
<p><b>Rust transport:</b></p>
<pre><code>./ctrl/server.sh # cht-server on TCP :4447 (Rust mode only)
./ctrl/app.sh --rust</code></pre>
<p>Python mode does its own TCP listening inside the GUI process — no separate server step.</p>
<h3>Sender</h3>
<p><b>Python transport:</b></p>
<pre><code>./ctrl/client.sh --python [RECEIVER_IP] [PORT] # default port 4444</code></pre>
<p>(Runs <code>sudo python3 sender/stream_av.py</code> under the hood — <code>sudo</code> is required for <code>kmsgrab</code>.)</p>
<p><b>Rust transport:</b></p>
<pre><code>./ctrl/client.sh --rust [server_addr] # default mcrndeb:4447</code></pre>
<h3>Sync</h3>
<p>Both machines share the same source tree; <code>ctrl/sync.sh</code> rsyncs from the dev host to <code>mcrndeb</code>. The receiver's filesystem is also bind-mounted at <code>~/mcrn</code> on the dev host for quick file access.</p>
<h3>Inside the GUI</h3>
<ul>
<li><b>Frames panel</b> — click to select; <code>←/→</code> navigate.</li>
<li><b>Transcript panel</b> — click to select; <code>↑/↓</code> navigate; <code>Shift</code> to extend.</li>
<li><b>Enter</b> — sends <code>answer</code> + selected refs to the agent.</li>
<li><b>Describe / Answer</b> buttons — same idea, single-word verb prepended.</li>
<li><b>Agent input</b> — type freely; <code>@F1-3</code> and <code>@T5</code> attach refs.</li>
<li><b>Esc</b> — clear selection. <b>Del</b> — clear agent output.</li>
<li><b>Ctrl+R</b> — manual segment cut.</li>
</ul>
<h3>Agent provider</h3>
<p>Resolution order in <code>cht/agent/runner.py</code>:</p>
<ul>
<li><code>GROQ_API_KEY</code> → OpenAI-compatible client against Groq.</li>
<li><code>OPENAI_API_KEY</code> → OpenAI / OpenAI-compatible.</li>
<li>(default) → Claude Code SDK using your local CC subscription.</li>
</ul>
</div>
</section>
<section id="system" class="graph-section">
<h2>SYSTEM ARCHITECTURE</h2>
<p>End-to-end view: sender capture → network → receiver record + analyse → GUI + agent. Both transports converge on the same on-disk session layout.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/system.svg"><img src="graphs/system.svg" alt="System architecture"></a>
</div>
<div class="legend">
<span class="python">Python</span>
<span class="rust">Rust</span>
<span class="hw">Hardware / external</span>
<span class="fs">Filesystem</span>
</div>
</section>
<section id="python" class="graph-section">
<h2>PYTHON PIPELINE</h2>
<p>Default mode. Bash + ffmpeg CLI on the sender; <code>StreamRecorder</code> + <code>SessionProcessor</code> in <code>cht/stream/</code> on the receiver. Scene detection rides the recorder's <code>ffmpeg</code> stdout pipe — sub-second latency, no extra process.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/python_pipeline.svg"><img src="graphs/python_pipeline.svg" alt="Python pipeline"></a>
</div>
<div class="legend">
<span class="python">Python module</span>
<span class="rust">External binary (ffmpeg)</span>
<span class="hw">Hardware / OS source</span>
<span class="fs">Filesystem output</span>
</div>
</section>
<section id="rust_client" class="graph-section">
<h2>RUST CLIENT — sender</h2>
<p><code>media/client/</code> — replaces <code>sender/stream_av.sh</code> when running with <code>--rust</code>. Two backends: subprocess (default, wraps ffmpeg CLI) and an experimental direct VAAPI capture/encoder.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/rust_client.svg"><img src="graphs/rust_client.svg" alt="Rust client pipeline"></a>
</div>
</section>
<section id="rust_server" class="graph-section">
<h2>RUST SERVER — receiver</h2>
<p><code>media/server/</code> — replaces <code>StreamRecorder</code> when running with <code>--rust</code>. TCP listener with a typed <code>WirePacket</code> framing; routes Video/Audio/Control packets to ffmpeg recording, ADTS audio, and a Unix-socket scene relay.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/rust_server.svg"><img src="graphs/rust_server.svg" alt="Rust server pipeline"></a>
</div>
</section>
<section id="crates" class="graph-section">
<h2>RUST CRATES</h2>
<p>Cargo workspace under <code>media/</code>: three crates (<code>cht-common</code>, <code>cht-client</code>, <code>cht-server</code>) and their external deps. Designed to be reusable as a standalone tool — <code>mpr</code> is expected to depend on it too.</p>
<div class="graph-container">
<a href="viewer.html?src=graphs/crates.svg"><img src="graphs/crates.svg" alt="Rust crates"></a>
</div>
</section>
<section id="repo" class="graph-section">
<h2>REPOSITORY STRUCTURE</h2>
<p>Top-level layout. Python app under <code>cht/</code>; Rust transport under <code>media/</code>; sender bash under <code>sender/</code>; ops scripts under <code>ctrl/</code>.</p>
<div class="tree-container">
<pre class="repo-tree"><span class="t-root">cht/</span>
├── <span class="t-py">cht/</span> <span class="t-comment">Python app (GTK4 GUI, recording, transcribe, agent)</span>
│ ├── app.py · window.py <span class="t-comment">entrypoint + main window</span>
│ ├── config.py · session.py <span class="t-comment">app config, session manifest</span>
│ ├── stream/ <span class="t-comment">recorder · processor · tracker · lifecycle · ffmpeg helpers</span>
│ ├── audio/ <span class="t-comment">waveform engine</span>
│ ├── transcriber/ <span class="t-comment">faster-whisper engine</span>
│ ├── scrub/ <span class="t-comment">proxy manager (scrub-mode preview)</span>
│ ├── index/ <span class="t-comment">frame index helpers</span>
│ ├── agent/ <span class="t-comment">runner · base · tools · claude_sdk_connection · openai_connection</span>
│ └── ui/ <span class="t-comment">timeline · monitor · scrub_bar · frames_panel · transcript_panel</span>
<span class="t-comment">agent_input · agent_output · markdown · keyboard · mpv · waveform</span>
├── <span class="t-rust">media/</span> <span class="t-comment">Rust transport workspace (Cargo) — to be renamed once both modes coexist</span>
│ ├── common/ <span class="t-comment">cht-common — WirePacket, ControlMessage, logging</span>
│ ├── client/ <span class="t-comment">cht-client — sender (Wayland, VAAPI)</span>
│ ├── server/ <span class="t-comment">cht-server — receiver (TCP listener, ffmpeg fan-out)</span>
│ └── ctrl/ <span class="t-comment">build.sh · client.sh · server.sh</span>
├── <span class="t-dir">sender/</span> <span class="t-comment">Python-mode sender — stream_av.sh (bash watchdog around ffmpeg CLI)</span>
├── <span class="t-dir">ctrl/</span> <span class="t-comment">app.sh · server.sh · client.sh · sync.sh · bench.py · e2e_test.sh</span>
├── <span class="t-dir">tests/</span> <span class="t-comment">pytest suites — config · ffmpeg · manager · processor · timeline · tracker</span>
├── <span class="t-dir">data/</span> <span class="t-comment">runtime — sessions, active-session pointer (gitignored)</span>
├── <span class="t-dir">logs/</span> <span class="t-comment">runtime logs (gitignored)</span>
├── <span class="t-dir">docs/</span> <span class="t-comment">this site — index.html · viewer.html · graphs/ · render.sh</span>
└── pyproject.toml · uv.lock <span class="t-comment">Python deps via uv</span></pre>
</div>
</section>
<section id="notes" class="graph-section">
<h2>DESIGN NOTES</h2>
<p>Why some non-obvious choices look the way they do.</p>
<div class="prose">
<h3>Same on-disk layout from both transports</h3>
<p>The GUI, transcript, scene index, and agent never branch on transport mode — they only read files. The recording layout is the contract; the network protocol underneath is replaceable. This is what made the Rust port feasible without rewriting the analysis side.</p>
<h3>Scene detection lives in the recorder, not the processor</h3>
<p>In Python mode, scene-change frames come straight off the recorder's <code>ffmpeg</code> stdout pipe — sub-second, single process. Polling the fragmented MP4 from a separate process would add 35 s of disk-IPC latency. In Rust mode the same property is approximated by relaying raw H.264 over <code>scene.sock</code> to a separate ffmpeg, but that relay turns out to be the source of most current scene-detection pain (see <i>The scene detection saga</i> below).</p>
<h3>Why bother with the Rust port</h3>
<p>Two measured wins drove the work: connect time dropped from ~20 s (CLI ffmpeg startup + mpegts negotiation) to ~3 s (typed handshake), and session reload from disk dropped to 12 s. The Python recorder still works fine for development; the Rust path matters when you reconnect a lot.</p>
<h3>One-word verbs, no system prompt</h3>
<p>Pressing Enter sends <code>answer</code> + selected refs verbatim. There is no system prompt and no instruction template wrapping the message. If a question needs detail, the user types it — the model sees exactly what you'd see, not a contract you'd have to debug.</p>
<h3>Subprocess backend over a custom encoder</h3>
<p>The Rust client wraps the same <code>ffmpeg</code> CLI the Python sender uses, demuxes its NUT output in-process, and ships <code>EncodedPacket</code>s. Less code to own than a direct VAAPI encode path, and it inherits ffmpeg's robustness around odd Wayland/DRM transitions. The direct VAAPI backend exists but is experimental.</p>
<h3>Sender as a watchdog, not a daemon</h3>
<p>Python-mode <code>stream_av.sh</code> is a bash loop that restarts <code>ffmpeg</code> on stall (no progress for 10 s) and restarts immediately on the DRM-plane format change that fullscreen apps trigger. Cheaper and more reliable than building stall detection into a long-lived process.</p>
<h3>Struggles — the scene detection saga</h3>
<p>Scene detection is the part of the system that has fought back the hardest. The short version: <b>scene detection wants to live in the same ffmpeg process that does the decoding</b>, and every architecture change has had to relearn that.</p>
<h3>1. The "one behind" bug and the flush trick</h3>
<p>Original Python pipeline ran scene detection as a branch of the same <code>ffmpeg</code> that records: <code>select='gt(scene,T)'</code><code>showinfo</code> → MJPEG. The MJPEG encoder + muxer holds the selected frame in its internal buffer until <i>another</i> selected frame pushes it out — so the JPEG you receive at time <i>T</i> is actually the previous scene change, not the current one. Classic "one behind".</p>
<p>Workaround: a flush trick — select extra adjacent frames after each scene change so the real frame gets pushed through immediately (<code>SCENE_FLUSH_FRAMES</code>, see <code>cht/config.py</code>, used in <code>cht/stream/ffmpeg.py</code> :: <code>receive_record_relay_and_detect</code>). Worked reliably <b>only because everything was in one ffmpeg process</b>.</p>
<h3>2. The Rust relay broke it</h3>
<p>When transport moved to Rust, the recorder split into two processes: Rust-side ffmpeg writes fMP4 + UDP, and a separate Python-side ffmpeg consumes raw H.264 from <code>scene.sock</code> for scene detection. Two new failure modes appeared:</p>
<ul>
<li><b>The flush trick stopped flushing.</b> The MJPEG encoder behaves differently in a standalone pipe-fed ffmpeg vs. as a branch of a multi-output process — adjacent extra frames no longer reliably push the previous selection through.</li>
<li><b>Decoder corruption from dropped packets.</b> The Rust relay uses <code>try_send</code> with a 100 ms socket write timeout (<code>media/server/src/session.rs</code>). On any backpressure the relay drops H.264 packets, which corrupts the downstream decoder until the next keyframe — and missed keyframes mean missed scene detections.</li>
</ul>
<h3>3. Three dead ends</h3>
<ul>
<li><b>fMP4-tip extraction.</b> Trigger on showinfo, then extract the frame from the just-written fragmented MP4. Fragments only finalize at keyframe boundaries (~2 s with GOP 30), so <code>ffprobe</code> reports stale duration and the extracted frame comes from the <i>previous</i> scene.</li>
<li><b>Single Rust ffmpeg with mixed outputs.</b> The clean fix would be one ffmpeg in Rust doing record (<code>-c:v copy</code>) + relay (<code>-c:v copy</code>) + scene detect (decode + filter). It doesn't work — ffmpeg won't mix <code>-c:v copy</code> outputs with <code>-filter_complex</code> on a pipe input under <code>-hwaccel cuda</code>.</li>
<li><b>Tighter retry intervals on the extractor.</b> Dropping retry from 1 s to 0.3 s made things <i>worse</i> — concurrent ffmpeg processes thrashing the GPU rather than completing.</li>
</ul>
<h3>4. Where it actually landed</h3>
<p>Current working approach (Rust mode): the relay-fed scene detector fires <code>showinfo</code> with a timestamp, then Python extracts the frame from the recording file at <i>that</i> timestamp, with a wall-clock offset computed from the session-dir name. Reliable frames; ~1 s latency per scene from fMP4 fragment lag plus the per-extract ffmpeg spawn (~0.5 s). It's the system limping along until the proper fix lands. See <code>def/10-scene-detect-to-rust.md</code> and <code>def/ISSUES.md</code> R1, R3 for the full record.</p>
<div class="note"><b>Lesson.</b> The flush hack is a dead end in any pipe-fed context. Don't try to make it work over relay — move scene detection back into the same process that has the decoded frames. That's the only configuration that has ever been quiet.</div>
<h3>Future work</h3>
<h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">Near term — scene detection as a 3rd output of the Rust server's ffmpeg</h4>
<p>Spec: <code>def/10-scene-detect-to-rust.md</code>. Add a third branch to the existing ffmpeg the Rust server already runs:</p>
<ul>
<li>Output 1: <code>-c:v copy</code> → fMP4 (unchanged)</li>
<li>Output 2: <code>-c:v copy</code> → UDP relay (unchanged)</li>
<li>Output 3: CUDA decode → <code>select='gt(scene,T)'</code><code>showinfo</code> → MJPEG out a second pipe / second Unix socket</li>
</ul>
<p>This restores the single-process invariant — scene detection sees the same decoded frames as the recording branch, the flush behavior matches, no relay packet drops. Removes <code>detect_scenes_from_pipe()</code> in <code>cht/stream/ffmpeg.py</code>, the stdin-feeder thread in <code>cht/stream/processor.py</code>, and <code>scene_relay_task</code> in <code>media/server/src/session.rs</code>.</p>
<p>Adjacent improvements once that lands:</p>
<ul>
<li><b>Long-running extractor.</b> Keep one ffmpeg open and pipe seek commands rather than spawning per frame — eliminates the ~0.5 s startup hit.</li>
<li><b>PTS on the wire.</b> Have the Rust server send recording PTS alongside scene events so Python doesn't have to guess a wall-clock offset from the session-dir name (which is also why the first scene frame currently lands 710 s late in Rust mode — <code>def/ISSUES.md</code> R1).</li>
</ul>
<h4 style="font-family:'JetBrains Mono',monospace;font-size:12px;color:#a6adc8;letter-spacing:1px;margin:20px 0 6px">End goal — in-process libav filter graph</h4>
<p>Spec: <code>def/09-media-transport.md</code>. Rust server decodes via NVDEC, runs the scene filter in-process via the libav API, and writes JPEGs directly. No ffmpeg subprocess, no pipe, no relay, no extraction — scene-to-frame latency drops to near zero. The 3rd-output step above is the bridge: same single-process discipline, easier to land, and a clean rewrite target once it works.</p>
<p>Other items deferred to that broader port:</p>
<ul>
<li><b>Frame buffer / fast scrub.</b> GPU ring buffer of the last N decoded frames exposed over shared memory to the Python scrub UI — replaces the mpv proxy MJPEG hack (see <code>def/07-scrub-perf-ceiling.md</code>).</li>
<li><b>Typed control protocol.</b> The current <code>WirePacket</code> framing covers session lifecycle but not parameter changes; spec 09 sketches a control-message channel for things like live <code>scene_threshold</code> updates and reconnect-with-PTS.</li>
<li><b>Audio in the live UDP relay.</b> Rust mode currently has no audio in the live monitor (<code>def/ISSUES.md</code> R2) because the server's ffmpeg only takes video on its stdin. Resolved naturally once the server's ffmpeg also receives the audio track.</li>
</ul>
</div>
</section>
</main>
</div>
<script>
function show(id) {
document.querySelectorAll('.graph-section').forEach(s => s.classList.remove('active'));
document.querySelectorAll('nav a').forEach(a => a.classList.remove('active'));
document.getElementById(id).classList.add('active');
var navLink = document.querySelector('nav a[onclick="show(\'' + id + '\')"]');
if (navLink) navLink.classList.add('active');
document.querySelector('.layout').classList.remove('nav-open');
}
function toggleNav() {
document.querySelector('.layout').classList.toggle('nav-open');
}
</script>
</body>
</html>

View File

@@ -1,21 +1,18 @@
#!/bin/bash
# Re-render all Graphviz diagrams to SVG.
# Run this after each phase when .dot files are updated.
# Usage: ./docs.sh
# Run after editing any .dot file under docs/graphs/.
# Usage: ./render.sh
set -euo pipefail
DOCS_DIR="$(cd "$(dirname "$0")/../docs" && pwd)"
GRAPHS_DIR="$(cd "$(dirname "$0")/graphs" && pwd)"
if ! command -v dot &>/dev/null; then
echo "graphviz not found — install with: sudo apt install graphviz" >&2
exit 1
fi
for f in "$DOCS_DIR"/*.dot; do
for f in "$GRAPHS_DIR"/*.dot; do
svg="${f%.dot}.svg"
echo "==> $(basename "$f")$(basename "$svg")"
dot -Tsvg "$f" -o "$svg"
done
echo "==> done. Serving at http://localhost:9099 (ctrl-c to stop)"
cd "$DOCS_DIR" && python3 -m http.server 9099

97
docs/viewer.html Normal file
View File

@@ -0,0 +1,97 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Graph Viewer</title>
<style>
* { margin: 0; padding: 0; }
body {
background: #1e1e2e;
overflow: hidden;
width: 100vw;
height: 100vh;
}
#container {
width: 100vw;
height: 100vh;
overflow: hidden;
cursor: grab;
}
#container.dragging { cursor: grabbing; }
img {
transform-origin: 0 0;
user-select: none;
-webkit-user-drag: none;
}
</style>
</head>
<body>
<div id="container">
<img id="img" />
</div>
<script>
var src = new URLSearchParams(location.search).get('src');
var img = document.getElementById('img');
var container = document.getElementById('container');
img.src = src;
var scale = 1;
var x = 0, y = 0;
var dragging = false;
var startX, startY, startPanX, startPanY;
function apply() {
img.style.transform = 'translate(' + x + 'px,' + y + 'px) scale(' + scale + ')';
}
img.onload = function() {
var sw = window.innerWidth / img.naturalWidth;
var sh = window.innerHeight / img.naturalHeight;
scale = Math.min(sw, sh) * 0.95;
x = (window.innerWidth - img.naturalWidth * scale) / 2;
y = (window.innerHeight - img.naturalHeight * scale) / 2;
apply();
};
container.addEventListener('wheel', function(e) {
e.preventDefault();
var factor = e.deltaY < 0 ? 1.12 : 0.89;
var rect = container.getBoundingClientRect();
var mx = e.clientX - rect.left;
var my = e.clientY - rect.top;
x = mx - (mx - x) * factor;
y = my - (my - y) * factor;
scale *= factor;
apply();
}, { passive: false });
container.addEventListener('mousedown', function(e) {
if (e.button !== 0) return;
dragging = true;
startX = e.clientX;
startY = e.clientY;
startPanX = x;
startPanY = y;
container.classList.add('dragging');
e.preventDefault();
});
window.addEventListener('mousemove', function(e) {
if (!dragging) return;
x = startPanX + (e.clientX - startX);
y = startPanY + (e.clientY - startY);
apply();
});
window.addEventListener('mouseup', function() {
dragging = false;
container.classList.remove('dragging');
});
container.addEventListener('dblclick', function() {
img.onload();
});
</script>
</body>
</html>

View File

@@ -1,193 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Media Transport — Architecture</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
display: flex;
height: 100vh;
font-family: monospace;
background: #1e1e2e;
color: #cdd6f4;
}
nav {
width: 220px;
min-width: 220px;
background: #181825;
border-right: 1px solid #313244;
display: flex;
flex-direction: column;
padding: 1rem 0;
}
nav h1 {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.1em;
color: #6c7086;
padding: 0 1rem 0.75rem;
border-bottom: 1px solid #313244;
margin-bottom: 0.5rem;
}
nav a {
display: block;
padding: 0.5rem 1rem;
color: #cdd6f4;
text-decoration: none;
font-size: 0.85rem;
border-left: 3px solid transparent;
transition: background 0.1s, border-color 0.1s;
}
nav a:hover { background: #313244; }
nav a.active { border-left-color: #89b4fa; color: #89b4fa; background: #1e2d3e; }
nav .subtitle {
font-size: 0.7rem;
color: #6c7086;
padding: 0 1rem;
margin-top: 0.25rem;
}
nav .phase-badge {
font-size: 0.65rem;
color: #a6e3a1;
float: right;
}
nav .section {
font-size: 0.65rem;
text-transform: uppercase;
letter-spacing: 0.08em;
color: #6c7086;
padding: 1rem 1rem 0.25rem;
}
main {
flex: 1;
display: flex;
flex-direction: column;
overflow: hidden;
}
header {
padding: 0.75rem 1.25rem;
background: #181825;
border-bottom: 1px solid #313244;
display: flex;
align-items: baseline;
gap: 0.75rem;
}
header h2 { font-size: 0.95rem; }
header .desc { font-size: 0.75rem; color: #6c7086; }
.viewer {
flex: 1;
overflow: auto;
padding: 1.5rem;
display: flex;
align-items: flex-start;
justify-content: center;
background: #1e1e2e;
}
.viewer object,
.viewer img {
max-width: 100%;
border-radius: 6px;
box-shadow: 0 4px 24px rgba(0,0,0,0.5);
}
.placeholder {
color: #6c7086;
font-size: 0.85rem;
margin-top: 4rem;
}
</style>
</head>
<body>
<nav>
<h1>Media Transport</h1>
<div class="section">Workspace</div>
<a href="#" data-svg="crates.svg" data-title="Crate Dependency Graph" data-desc="Workspace members and external deps">
Crate graph
</a>
<div class="section">Client (sender)</div>
<a href="#" data-svg="client-pipeline.svg" data-title="Client Pipeline" data-desc="KMS capture + PulseAudio → VAAPI H.264 + AAC → TCP transport">
Pipeline
</a>
<div class="section">Server (receiver)</div>
<a href="#" data-svg="server-pipeline.svg" data-title="Server Pipeline" data-desc="fMP4 recording, UDP live relay, scene detection (UDS → Python), audio extraction">
Pipeline
</a>
<div class="section">Status</div>
<a href="#" data-svg="" data-title="Current State (2026-04-10)" data-desc="Architecture status and known regressions"
onclick="event.preventDefault(); document.querySelectorAll('nav a').forEach(l=>l.classList.remove('active')); this.classList.add('active'); document.getElementById('title').textContent=this.dataset.title; document.getElementById('desc').textContent=this.dataset.desc; document.getElementById('viewer').innerHTML=document.getElementById('status-content').innerHTML; return false;">
State &amp; regressions
</a>
</nav>
<template id="status-content">
<div style="max-width:720px; font-size:0.85rem; line-height:1.6; color:#cdd6f4">
<h3 style="color:#f38ba8; margin-bottom:0.5rem">Scene detection regressed</h3>
<p>In the Python-only pipeline, scene detection was a branch of the <b>same ffmpeg process</b> that records (fMP4 + UDP relay + CUDA decode + select filter). The flush trick worked because all outputs shared one decoder.</p>
<p style="margin-top:0.5rem">After Rust took over transport, scene detection became a <b>separate ffmpeg</b> fed via <code>scene.sock</code> Unix socket relay. Different buffering semantics broke the "one behind" flush fix, and <code>try_send</code> drops cause decoder corruption until the next keyframe.</p>
<h3 style="color:#a6e3a1; margin:1rem 0 0.5rem">Working fallback</h3>
<p>The Python-only path (<code>StreamRecorder</code> + <code>SessionProcessor</code>) still exists. <code>lifecycle.start(rust_transport=False)</code> bypasses Rust transport entirely. Plan: restore this as the default, keep Rust opt-in.</p>
<h3 style="color:#89b4fa; margin:1rem 0 0.5rem">What Rust transport got right</h3>
<ul style="padding-left:1.2rem">
<li>Connect time: 20s → 3s</li>
<li>Session reload: 1-2s</li>
<li>Custom framed protocol with reconnection support</li>
<li>Clean fMP4 recording + UDP live relay</li>
</ul>
<h3 style="color:#cba6f7; margin:1rem 0 0.5rem">Next: scene detection back into server ffmpeg</h3>
<p>Add scene detection as a third output of the Rust server's ffmpeg command (decode + select filter + MJPEG pipe) instead of relaying raw H.264 to a separate process. See <code>def/10-scene-detect-to-rust.md</code>.</p>
</div>
</template>
<main>
<header>
<h2 id="title">Select a diagram</h2>
<span class="desc" id="desc"></span>
</header>
<div class="viewer" id="viewer">
<p class="placeholder">← pick a diagram from the sidebar</p>
</div>
</main>
<script>
const viewer = document.getElementById('viewer');
const titleEl = document.getElementById('title');
const descEl = document.getElementById('desc');
document.querySelectorAll('nav a').forEach(link => {
link.addEventListener('click', e => {
e.preventDefault();
document.querySelectorAll('nav a').forEach(l => l.classList.remove('active'));
link.classList.add('active');
titleEl.textContent = link.dataset.title;
descEl.textContent = link.dataset.desc;
// Use <object> so SVG internal text/links work
viewer.innerHTML = `<object type="image/svg+xml" data="${link.dataset.svg}"></object>`;
});
});
// Auto-select first
document.querySelector('nav a').click();
</script>
</body>
</html>

View File

@@ -1,286 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: server_pipeline Pages: 1 -->
<svg width="1677pt" height="1243pt"
viewBox="0.00 0.00 1677.00 1243.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1200.16)">
<title>server_pipeline</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1200.16 1633.33,-1200.16 1633.33,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_rust</title>
<polygon fill="#1e1e2e" stroke="#a6e3a1" points="284.12,-622.34 284.12,-1010.15 1314.12,-1010.15 1314.12,-622.34 284.12,-622.34"/>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-992.85" font-family="monospace" font-size="14.00" fill="#a6e3a1">cht&#45;server (Rust)</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_python</title>
<polygon fill="#1e1e2e" stroke="#cba6f7" points="754.12,-114.28 754.12,-310.81 1582.12,-310.81 1582.12,-114.28 754.12,-114.28"/>
<text xml:space="preserve" text-anchor="middle" x="1168.12" y="-293.51" font-family="monospace" font-size="14.00" fill="#cba6f7">Python (cht app)</text>
</g>
<!-- net -->
<g id="node1" class="node">
<title>net</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="915.09,-1156.96 692.44,-1156.96 635.16,-1053.4 857.81,-1053.4 915.09,-1156.96"/>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-1109.13" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4447</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-1091.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">(WirePacket)</text>
</g>
<!-- listener -->
<g id="node3" class="node">
<title>listener</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="916.88,-976.9 633.38,-976.9 633.38,-821.62 916.88,-821.62 916.88,-976.9"/>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-954.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">Listener</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-937.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-920.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP accept</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-903.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads WirePacket</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-885.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">routes by type:</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-868.71" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Video → ffmpeg + scene relay</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-851.46" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Audio → ADTS file</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-834.21" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Control → session lifecycle</text>
</g>
<!-- net&#45;&gt;listener -->
<g id="edge1" class="edge">
<title>net&#45;&gt;listener</title>
<path fill="none" stroke="#585b70" d="M775.12,-1052.99C775.12,-1033.5 775.12,-1010.68 775.12,-988.72"/>
<polygon fill="#585b70" stroke="#585b70" points="778.63,-988.85 775.13,-978.85 771.63,-988.85 778.63,-988.85"/>
<text xml:space="preserve" text-anchor="middle" x="816.38" y="-1022.1" font-family="monospace" font-size="14.00" fill="#a6adc8">WirePacket</text>
</g>
<!-- python -->
<g id="node2" class="node">
<title>python</title>
<polygon fill="#2a2a3e" stroke="#cba6f7" points="1361.98,-508.73 1166.55,-508.73 1116.27,-405.17 1311.7,-405.17 1361.98,-508.73"/>
<text xml:space="preserve" text-anchor="middle" x="1239.12" y="-460.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python GUI</text>
<text xml:space="preserve" text-anchor="middle" x="1239.12" y="-443.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">(cht app)</text>
</g>
<!-- ffmpeg_rec -->
<g id="node4" class="node">
<title>ffmpeg_rec</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="534.25,-742.49 292,-742.49 292,-638.96 534.25,-638.96 534.25,-742.49"/>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-720.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg subprocess</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-703.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-686.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">H.264 pipe:0 → 2 outputs:</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-668.8" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;1. fMP4 (frag_keyframe)</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-651.55" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;2. UDP :4445 (mpegts)</text>
</g>
<!-- listener&#45;&gt;ffmpeg_rec -->
<g id="edge2" class="edge">
<title>listener&#45;&gt;ffmpeg_rec</title>
<path fill="none" stroke="#585b70" d="M640.07,-821.21C597.84,-797.12 552.04,-770.98 512.71,-748.55"/>
<polygon fill="#585b70" stroke="#585b70" points="514.59,-745.59 504.17,-743.67 511.12,-751.67 514.59,-745.59"/>
<text xml:space="preserve" text-anchor="middle" x="650.35" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 video</text>
</g>
<!-- scene_relay -->
<g id="node5" class="node">
<title>scene_relay</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="802.38,-751.12 551.88,-751.12 551.88,-630.34 802.38,-630.34 802.38,-751.12"/>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-729.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Relay</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-711.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-694.68" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix socket (scene.sock)</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-677.43" font-family="monospace" font-size="14.00" fill="#cdd6f4">buffers latest keyframe</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-660.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">best&#45;effort: drops if slow</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-642.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">100ms write timeout</text>
</g>
<!-- listener&#45;&gt;scene_relay -->
<g id="edge3" class="edge">
<title>listener&#45;&gt;scene_relay</title>
<path fill="none" stroke="#585b70" d="M738.63,-821.36C729.36,-801.82 719.45,-780.93 710.35,-761.75"/>
<polygon fill="#585b70" stroke="#585b70" points="713.63,-760.49 706.18,-752.96 707.3,-763.5 713.63,-760.49"/>
<text xml:space="preserve" text-anchor="middle" x="790.94" y="-790.32" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 copy</text>
<text xml:space="preserve" text-anchor="middle" x="790.94" y="-773.07" font-family="monospace" font-size="14.00" fill="#a6adc8">+ keyframe flag</text>
</g>
<!-- audio_writer -->
<g id="node6" class="node">
<title>audio_writer</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1029.75,-733.87 820.5,-733.87 820.5,-647.59 1029.75,-647.59 1029.75,-733.87"/>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-711.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Writer</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-694.68" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-677.43" font-family="monospace" font-size="14.00" fill="#cdd6f4">ADTS header + raw AAC</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-660.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ stream/audio.aac</text>
</g>
<!-- listener&#45;&gt;audio_writer -->
<g id="edge4" class="edge">
<title>listener&#45;&gt;audio_writer</title>
<path fill="none" stroke="#585b70" d="M841.7,-821.42C850.82,-810.9 857.12,-803.62 857.12,-803.62 857.12,-803.62 875.59,-773.23 893.28,-744.13"/>
<polygon fill="#585b70" stroke="#585b70" points="896.23,-746.01 898.44,-735.64 890.25,-742.37 896.23,-746.01"/>
<text xml:space="preserve" text-anchor="middle" x="914.34" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">AAC audio</text>
</g>
<!-- active_session -->
<g id="node7" class="node">
<title>active_session</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1300.5,-742.49 1047.75,-742.49 1047.75,-638.96 1306.5,-638.96 1306.5,-736.49 1300.5,-742.49"/>
<polyline fill="none" stroke="#585b70" points="1300.5,-742.49 1300.5,-736.49"/>
<polyline fill="none" stroke="#585b70" points="1306.5,-736.49 1300.5,-736.49"/>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-720.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-703.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-686.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">file at data/active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-668.8" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python polls to discover</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-651.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">session dir</text>
</g>
<!-- listener&#45;&gt;active_session -->
<g id="edge5" class="edge">
<title>listener&#45;&gt;active_session</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M917.14,-825.3C966.5,-799.94 1021.08,-771.9 1067.43,-748.09"/>
<polygon fill="#585b70" stroke="#585b70" points="1068.85,-751.29 1076.14,-743.61 1065.65,-745.07 1068.85,-751.29"/>
<text xml:space="preserve" text-anchor="middle" x="1085.83" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">on SessionStart</text>
</g>
<!-- fmp4 -->
<g id="node11" class="node">
<title>fmp4</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="176.25,-491.46 173.25,-495.46 152.25,-495.46 149.25,-491.46 0,-491.46 0,-422.44 176.25,-422.44 176.25,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">recording_000.mp4</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">(fragmented MP4)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;fmp4 -->
<g id="edge7" class="edge">
<title>ffmpeg_rec&#45;&gt;fmp4</title>
<path fill="none" stroke="#585b70" d="M323.34,-638.58C259.54,-602.23 185.12,-559.84 185.12,-559.84 185.12,-559.84 154.9,-528.09 128.2,-500.04"/>
<polygon fill="#585b70" stroke="#585b70" points="130.9,-497.8 121.47,-492.97 125.83,-502.63 130.9,-497.8"/>
<text xml:space="preserve" text-anchor="middle" x="288.5" y="-590.41" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- udp_live -->
<g id="node12" class="node">
<title>udp_live</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="508.19,-508.73 258.33,-508.73 194.06,-405.17 443.92,-405.17 508.19,-508.73"/>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-460.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">UDP :4445</text>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-443.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">(mpegts → mpv)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;udp_live -->
<g id="edge8" class="edge">
<title>ffmpeg_rec&#45;&gt;udp_live</title>
<path fill="none" stroke="#585b70" d="M399.44,-638.58C390.06,-603.51 377.57,-556.8 367.63,-519.67"/>
<polygon fill="#585b70" stroke="#585b70" points="371.13,-519.19 365.16,-510.43 364.37,-521 371.13,-519.19"/>
<text xml:space="preserve" text-anchor="middle" x="407.8" y="-590.41" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- scene_ffmpeg -->
<g id="node8" class="node">
<title>scene_ffmpeg</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1004.25,-277.56 762,-277.56 762,-122.28 1004.25,-122.28 1004.25,-277.56"/>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-255.62" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Detector</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-238.37" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-221.12" font-family="monospace" font-size="14.00" fill="#cdd6f4">connects to scene.sock</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-203.87" font-family="monospace" font-size="14.00" fill="#cdd6f4">pipes H.264 → ffmpeg:</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-186.62" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;CUDA decode</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-169.37" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;select=gt(scene,thresh)</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-152.12" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;showinfo → timestamps</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-134.87" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;MJPEG → JPEG frames</text>
</g>
<!-- scene_relay&#45;&gt;scene_ffmpeg -->
<g id="edge6" class="edge">
<title>scene_relay&#45;&gt;scene_ffmpeg</title>
<path fill="none" stroke="#a6e3a1" d="M620.55,-630.11C588.15,-595.96 553.88,-559.84 553.88,-559.84 553.88,-559.84 553.88,-354.06 553.88,-354.06 553.88,-354.06 659.95,-304.72 751.68,-262.06"/>
<polygon fill="#a6e3a1" stroke="#a6e3a1" points="752.96,-265.32 760.55,-257.93 750.01,-258.97 752.96,-265.32"/>
<text xml:space="preserve" text-anchor="middle" x="607.5" y="-460.9" font-family="monospace" font-size="14.00" fill="#a6adc8">raw H.264</text>
<text xml:space="preserve" text-anchor="middle" x="607.5" y="-443.65" font-family="monospace" font-size="14.00" fill="#a6adc8">(Unix socket)</text>
</g>
<!-- regression -->
<g id="node16" class="node">
<title>regression</title>
<polygon fill="#3d1e1e" stroke="#f38ba8" points="922.5,-559.84 669.75,-559.84 669.75,-354.06 928.5,-354.06 928.5,-553.84 922.5,-559.84"/>
<polyline fill="none" stroke="#f38ba8" points="922.5,-559.84 922.5,-553.84"/>
<polyline fill="none" stroke="#f38ba8" points="928.5,-553.84 922.5,-553.84"/>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-537.9" font-family="monospace" font-size="14.00" fill="#f38ba8">⚠ REGRESSED</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-520.65" font-family="monospace" font-size="14.00" fill="#f38ba8">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-503.4" font-family="monospace" font-size="14.00" fill="#f38ba8">Scene relay (separate pipe)</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-486.15" font-family="monospace" font-size="14.00" fill="#f38ba8">breaks &#39;one behind&#39; flush.</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-468.9" font-family="monospace" font-size="14.00" fill="#f38ba8">try_send drops → decoder</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-451.65" font-family="monospace" font-size="14.00" fill="#f38ba8">corruption until keyframe.</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-418.4" font-family="monospace" font-size="14.00" fill="#f38ba8">Fix: move scene detection</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-401.15" font-family="monospace" font-size="14.00" fill="#f38ba8">into server ffmpeg as 3rd</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-383.9" font-family="monospace" font-size="14.00" fill="#f38ba8">output branch (10&#45;scene&#45;</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-366.65" font-family="monospace" font-size="14.00" fill="#f38ba8">detect&#45;to&#45;rust.md)</text>
</g>
<!-- scene_relay&#45;&gt;regression -->
<g id="edge15" class="edge">
<title>scene_relay&#45;&gt;regression</title>
<path fill="none" stroke="#f38ba8" stroke-dasharray="5,2" d="M708.54,-630.04C718.19,-611.71 729.14,-590.91 740.01,-570.26"/>
<polygon fill="#f38ba8" stroke="#f38ba8" points="743.07,-571.97 744.63,-561.49 736.87,-568.71 743.07,-571.97"/>
</g>
<!-- aac_file -->
<g id="node13" class="node">
<title>aac_file</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1097.88,-491.46 1094.88,-495.46 1073.88,-495.46 1070.88,-491.46 946.38,-491.46 946.38,-422.44 1097.88,-422.44 1097.88,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">(ADTS&#45;wrapped)</text>
</g>
<!-- audio_writer&#45;&gt;aac_file -->
<g id="edge9" class="edge">
<title>audio_writer&#45;&gt;aac_file</title>
<path fill="none" stroke="#585b70" d="M942.92,-647.22C960.12,-606.1 986.09,-544.05 1003.56,-502.32"/>
<polygon fill="#585b70" stroke="#585b70" points="1006.7,-503.88 1007.33,-493.3 1000.24,-501.18 1006.7,-503.88"/>
</g>
<!-- active_session&#45;&gt;python -->
<g id="edge14" class="edge">
<title>active_session&#45;&gt;python</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M1190.81,-638.58C1200.19,-603.51 1212.68,-556.8 1222.62,-519.67"/>
<polygon fill="#585b70" stroke="#585b70" points="1225.88,-521 1225.09,-510.43 1219.12,-519.19 1225.88,-521"/>
<text xml:space="preserve" text-anchor="middle" x="1251.64" y="-599.04" font-family="monospace" font-size="14.00" fill="#a6adc8">discovers</text>
<text xml:space="preserve" text-anchor="middle" x="1251.64" y="-581.79" font-family="monospace" font-size="14.00" fill="#a6adc8">session dir</text>
</g>
<!-- frames -->
<g id="node14" class="node">
<title>frames</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="975.38,-51.78 972.38,-55.78 951.38,-55.78 948.38,-51.78 790.88,-51.78 790.88,0 975.38,0 975.38,-51.78"/>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-29.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-12.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">index.json + *.jpg</text>
</g>
<!-- scene_ffmpeg&#45;&gt;frames -->
<g id="edge10" class="edge">
<title>scene_ffmpeg&#45;&gt;frames</title>
<path fill="none" stroke="#585b70" d="M883.12,-121.96C883.12,-101.46 883.12,-80.27 883.12,-63.11"/>
<polygon fill="#585b70" stroke="#585b70" points="886.63,-63.45 883.13,-53.45 879.63,-63.45 886.63,-63.45"/>
<text xml:space="preserve" text-anchor="middle" x="932.62" y="-90.98" font-family="monospace" font-size="14.00" fill="#a6adc8">JPEG on</text>
<text xml:space="preserve" text-anchor="middle" x="932.62" y="-73.73" font-family="monospace" font-size="14.00" fill="#a6adc8">scene change</text>
</g>
<!-- audio_extract -->
<g id="node9" class="node">
<title>audio_extract</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1256.12,-251.69 1022.12,-251.69 1022.12,-148.16 1256.12,-148.16 1256.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Extractor</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg → 16kHz mono WAV</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunks + transcript WAVs</text>
</g>
<!-- audio_dir -->
<g id="node15" class="node">
<title>audio_dir</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1548.12,-491.46 1545.12,-495.46 1524.12,-495.46 1521.12,-491.46 1380.12,-491.46 1380.12,-422.44 1548.12,-422.44 1548.12,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/</text>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunk_*.wav</text>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript_*.wav</text>
</g>
<!-- audio_extract&#45;&gt;audio_dir -->
<g id="edge11" class="edge">
<title>audio_extract&#45;&gt;audio_dir</title>
<path fill="none" stroke="#585b70" d="M1197.7,-252C1230.6,-280.7 1265.12,-310.81 1265.12,-310.81 1265.12,-310.81 1371.12,-354.06 1371.12,-354.06 1371.12,-354.06 1400.1,-385.81 1425.7,-413.86"/>
<polygon fill="#585b70" stroke="#585b70" points="1422.81,-415.88 1432.14,-420.9 1427.98,-411.16 1422.81,-415.88"/>
</g>
<!-- transcriber -->
<g id="node10" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1574.12,-251.69 1274.12,-251.69 1274.12,-148.16 1574.12,-148.16 1574.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Transcriber</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper (CUDA)</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">segment grouping</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">slider: chunk size + lines/group</text>
</g>
<!-- aac_file&#45;&gt;audio_extract -->
<g id="edge13" class="edge">
<title>aac_file&#45;&gt;audio_extract</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M1037.66,-422.08C1056.29,-381.48 1087.87,-312.64 1110.86,-262.52"/>
<polygon fill="#585b70" stroke="#585b70" points="1114.01,-264.06 1115,-253.51 1107.65,-261.14 1114.01,-264.06"/>
<text xml:space="preserve" text-anchor="middle" x="1104.76" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">reads</text>
</g>
<!-- audio_dir&#45;&gt;transcriber -->
<g id="edge12" class="edge">
<title>audio_dir&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M1458.81,-422.08C1452.48,-381.73 1441.78,-313.5 1433.94,-263.47"/>
<polygon fill="#585b70" stroke="#585b70" points="1437.4,-262.96 1432.39,-253.63 1430.48,-264.05 1437.4,-262.96"/>
<text xml:space="preserve" text-anchor="middle" x="1485.38" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV chunks</text>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 24 KiB