diff --git a/media/docs/client-pipeline.dot b/media/docs/client-pipeline.dot index d38f688..b93ea6d 100644 --- a/media/docs/client-pipeline.dot +++ b/media/docs/client-pipeline.dot @@ -1,4 +1,4 @@ -// Client pipeline data flow — Phase 2 +// Client pipeline data flow // Sender machine (Wayland, VAAPI GPU) digraph client_pipeline { graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline] @@ -9,43 +9,58 @@ digraph client_pipeline { // Hardware drm [label="/dev/dri/card0\n(KMS scanout)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"] vaapi [label="/dev/dri/renderD128\n(VAAPI)" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"] - net [label="TCP :4444\nmcrndeb" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"] + pulse [label="PulseAudio\n─────────────\nmonitor: default_sink.monitor\nmic: default-source" shape=cylinder fillcolor="#1e3a2f" color="#a6e3a1"] + net [label="TCP :4447\nmcrndeb" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"] - // Thread boundary subgraph cluster_main { - label="main thread (tokio async)" fontcolor="#a6adc8" color="#45475a" fontname="monospace" + label="main thread (tokio async)" fontcolor="#a6adc8" color="#45475a" fontname="monospace" - session_start [label="session_start\ncontrol message" fillcolor="#2d2038" color="#cba6f7"] - mux [label="select!\npkt_rx | keepalive | ctrl-c" fillcolor="#2d2038" color="#cba6f7"] - keepalive [label="keepalive / 5s" fillcolor="#2d2038" color="#cba6f7"] + wait_server [label="wait_for_server\n─────────────\nretry connect / 2s\nCtrl-C to cancel" fillcolor="#2d2038" color="#cba6f7"] + session_start [label="session_start\n─────────────\nid: YYYYMMDD_HHMMSS\nvideo + audio params" fillcolor="#2d2038" color="#cba6f7"] + mux [label="select!\npkt_rx | keepalive | ctrl-c" fillcolor="#2d2038" color="#cba6f7"] write [label="BufWriter\nwrite_packet()" fillcolor="#1e2d3e" color="#89b4fa"] + shutdown [label="Shutdown\n─────────────\npipeline.stop() (5s timeout)\nSessionStop (2s timeout)\nsingle Ctrl-C" fillcolor="#2d2038" color="#cba6f7"] } - subgraph cluster_pipeline { - label="capture-pipeline thread (blocking)" fontcolor="#a6adc8" color="#45475a" fontname="monospace" + subgraph cluster_subprocess { + label="Subprocess backend (default)" fontcolor="#a6adc8" color="#45475a" fontname="monospace" - capture [label="KmsCapture\n─────────────────\nffmpeg kmsgrab device\ndecoder: passthrough\noutput: DRM_PRIME frames\n+ hw_frames_ctx (DRM device)" - fillcolor="#1e2d3e" color="#89b4fa"] + ffmpeg_cli [label="ffmpeg subprocess\n─────────────\nkmsgrab → VAAPI h264\n+ PulseAudio inputs:\n amix(monitor, mic)\noutput: NUT pipe" fillcolor="#1e2d3e" color="#89b4fa"] - encoder [label="VaapiEncoder\n─────────────────\n[lazy init on frame 1]\nbuffersrc ← hw_frames_ctx\nhwmap derive_device=vaapi\nscale_vaapi NV12 1920×1080\nh264_vaapi QP=20 GOP=30" - fillcolor="#1e2d3e" color="#89b4fa"] + demux [label="NUT Demuxer\n─────────────\nffmpeg-next in-process\nfinds video + audio streams\nsends EncodedPacket\n { data, pts, media_type }" fillcolor="#1e2d3e" color="#89b4fa"] - chan [label="mpsc::channel(64)\nEncodedPacket" shape=parallelogram fillcolor="#2d2038" color="#cba6f7"] + chan [label="mpsc::channel(64)\nEncodedPacket" shape=parallelogram fillcolor="#2d2038" color="#cba6f7"] } - // Flow - drm -> capture [label="DMA-BUF\n(zero copy)"] - vaapi -> encoder [label="hw device\n(derived)" style=dashed color="#a6e3a1"] - capture -> encoder [label="AVFrame\nDRM_PRIME"] - encoder -> chan [label="EncodedPacket\n{ data, pts, keyframe, … }"] - chan -> mux + subgraph cluster_direct { + label="VaapiDirect backend (experimental)" fontcolor="#6c7086" color="#45475a" fontname="monospace" style=dashed + + capture [label="KmsCapture\n─────────────\nDRM_PRIME frames" fillcolor="#2d1e1e" color="#f38ba8"] + encoder [label="VaapiEncoder\n─────────────\nhwmap → scale_vaapi\nh264_vaapi QP=20" fillcolor="#2d1e1e" color="#f38ba8"] + } + + // Flow — subprocess + drm -> ffmpeg_cli [label="kmsgrab"] + vaapi -> ffmpeg_cli [label="h264_vaapi"] + pulse -> ffmpeg_cli [label="-f pulse\nmonitor + mic"] + ffmpeg_cli -> demux [label="NUT pipe\n(stdout)"] + demux -> chan [label="EncodedPacket\n(Video or Audio)"] + + // Flow — direct (dashed, experimental) + drm -> capture [style=dashed] + vaapi -> encoder [style=dashed] + capture -> encoder [style=dashed label="DRM_PRIME"] + encoder -> chan [style=dashed] + + // Flow — main + chan -> mux + wait_server -> session_start session_start -> write - mux -> write [label="WirePacket"] - mux -> keepalive [style=dashed] - keepalive -> write - write -> net + mux -> write [label="WirePacket\nVideo | Audio"] + write -> net + mux -> shutdown [label="Ctrl-C or\nchannel closed"] // Types note - types [label="EncodedPacket\n─────────────\ndata: Vec\ (H.264 NALUs)\npts / dts: i64\nkeyframe: bool\ntime_base: num/den" + types [label="WirePacket types\n─────────────\nVideo: H.264 NALUs + keyframe flag\nAudio: AAC frames\nControl: SessionStart/Stop/Keepalive" shape=note fillcolor="#2a2a3e" color="#585b70"] } diff --git a/media/docs/client-pipeline.svg b/media/docs/client-pipeline.svg index a1f7014..067204d 100644 --- a/media/docs/client-pipeline.svg +++ b/media/docs/client-pipeline.svg @@ -4,182 +4,261 @@ - - + + client_pipeline - + cluster_main - -main thread  (tokio async) + +main thread (tokio async) -cluster_pipeline - -capture-pipeline thread  (blocking) +cluster_subprocess + +Subprocess backend (default) + + +cluster_direct + +VaapiDirect backend (experimental) drm - - -/dev/dri/card0 -(KMS scanout) + + +/dev/dri/card0 +(KMS scanout) + + + +ffmpeg_cli + +ffmpeg subprocess +───────────── +kmsgrab → VAAPI h264 ++ PulseAudio inputs: +  amix(monitor, mic) +output: NUT pipe + + + +drm->ffmpeg_cli + + +kmsgrab - + capture - -KmsCapture -───────────────── -ffmpeg kmsgrab device -decoder: passthrough -output: DRM_PRIME frames -+ hw_frames_ctx (DRM device) + +KmsCapture +───────────── +DRM_PRIME frames - + drm->capture - - -DMA-BUF -(zero copy) + + vaapi - - -/dev/dri/renderD128 -(VAAPI) + + +/dev/dri/renderD128 +(VAAPI) + + + +vaapi->ffmpeg_cli + + +h264_vaapi - + encoder - -VaapiEncoder -───────────────── -[lazy init on frame 1] -buffersrc ← hw_frames_ctx -hwmap derive_device=vaapi -scale_vaapi NV12 1920×1080 -h264_vaapi QP=20 GOP=30 + +VaapiEncoder +───────────── +hwmap → scale_vaapi +h264_vaapi QP=20 - + vaapi->encoder - - -hw device -(derived) + + + + + +pulse + + +PulseAudio +───────────── +monitor: default_sink.monitor +mic: default-source + + + +pulse->ffmpeg_cli + + +-f pulse +monitor + mic - + net - -TCP :4444 -mcrndeb + +TCP :4447 +mcrndeb + + + +wait_server + +wait_for_server +───────────── +retry connect / 2s +Ctrl-C to cancel - + session_start - -session_start -control message + +session_start +───────────── +id: YYYYMMDD_HHMMSS +video + audio params + + + +wait_server->session_start + + - + write - -BufWriter -write_packet() + +BufWriter +write_packet() - + session_start->write - - + + - + mux - -select! -pkt_rx  |  keepalive  |  ctrl-c - - - -keepalive - -keepalive / 5s - - - -mux->keepalive - - + +select! +pkt_rx | keepalive | ctrl-c - + mux->write - - -WirePacket + + +WirePacket +Video | Audio - - -keepalive->write - - + + +shutdown + +Shutdown +───────────── +pipeline.stop() (5s timeout) +SessionStop (2s timeout) +single Ctrl-C + + + +mux->shutdown + + +Ctrl-C or +channel closed - + write->net - - + + - - -capture->encoder - - -AVFrame -DRM_PRIME + + +demux + +NUT Demuxer +───────────── +ffmpeg-next in-process +finds video + audio streams +sends EncodedPacket +  { data, pts, media_type } + + + +ffmpeg_cli->demux + + +NUT pipe +(stdout) - + chan - -mpsc::channel(64) -EncodedPacket + +mpsc::channel(64) +EncodedPacket - - -encoder->chan - - -EncodedPacket -{ data, pts, keyframe, … } + + +demux->chan + + +EncodedPacket +(Video or Audio) - + chan->mux - - + + + + + +capture->encoder + + +DRM_PRIME + + + +encoder->chan + + - + types - - - -EncodedPacket -───────────── -data: Vec<u8>  (H.264 NALUs) -pts / dts: i64 -keyframe: bool -time_base: num/den + + + +WirePacket types +───────────── +Video:   H.264 NALUs + keyframe flag +Audio:   AAC frames +Control: SessionStart/Stop/Keepalive diff --git a/media/docs/crates.dot b/media/docs/crates.dot index cbb8e92..7ba3933 100644 --- a/media/docs/crates.dot +++ b/media/docs/crates.dot @@ -1,5 +1,4 @@ // Cargo workspace crate dependency graph -// Phase 2: client capture + encode implemented; server is a stub digraph crates { graph [fontname="monospace" bgcolor="#1e1e2e" pad="0.5"] node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box @@ -7,21 +6,23 @@ digraph crates { edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"] // External - ffmpeg_next [label="ffmpeg-next 8\n(ffmpeg-sys-next)" shape=component fillcolor="#1e3a2f" color="#a6e3a1"] - tokio [label="tokio 1\n(async runtime)" shape=component fillcolor="#1e2a3e" color="#89b4fa"] + ffmpeg_next [label="ffmpeg-next 8\n(client: NUT demux)" shape=component fillcolor="#1e3a2f" color="#a6e3a1"] + tokio [label="tokio 1 (full)\n(async runtime)" shape=component fillcolor="#1e2a3e" color="#89b4fa"] serde [label="serde / serde_json" shape=component fillcolor="#2a2a3e" color="#cba6f7"] tracing [label="tracing\ntracing-subscriber" shape=component fillcolor="#2a2a3e" color="#cba6f7"] anyhow [label="anyhow" shape=component fillcolor="#2a2a3e" color="#cba6f7"] + libc_crate [label="libc 0.2" shape=component fillcolor="#2a2a3e" color="#cba6f7"] + nix_crate [label="nix 0.29\n(signal, process)" shape=component fillcolor="#2a2a3e" color="#cba6f7"] // Workspace crates - common [label="cht-common\n─────────────\nprotocol.rs (wire framing)\nframe.rs (Frame, AudioBuffer)\nlogging.rs" + common [label="cht-common\n─────────────\nprotocol.rs (WirePacket framing)\n PacketType: Video|Audio|Control\n ControlMessage: Start|Stop|...\nlogging.rs (tracing init)" fillcolor="#2d2038" color="#cba6f7"] - client [label="cht-client [sender, Wayland]\n─────────────────────────────\ncapture.rs KMS/DRM → DRM_PRIME frames\nencoder.rs VAAPI H.264 (lazy init)\npipeline.rs capture→encode thread\nmain.rs TCP transport + keepalive" + client [label="cht-client [sender, Wayland]\n─────────────────────────────\nbackends/subprocess.rs ffmpeg CLI + PulseAudio\n NUT demux → EncodedPacket\nbackends/mod.rs Backend enum\ncapture.rs KmsCapture (direct backend)\nencoder.rs VaapiEncoder + MediaType\npipeline.rs capture→encode thread\nmain.rs wait_for_server, transport,\n YYYYMMDD_HHMMSS session IDs" fillcolor="#1e2d3e" color="#89b4fa"] - server [label="cht-server [receiver, mcrn]\n─────────────────────────────\nmain.rs TCP listener (stub)\n counts packets, no decode yet" - fillcolor="#2d1e1e" color="#f38ba8"] + server [label="cht-server [receiver, mcrndeb]\n─────────────────────────────\nmain.rs TCP listener\n routes Video/Audio/Control\nsession.rs ffmpeg subprocess:\n fMP4 + UDP relay\n ADTS audio writer\n Scene relay (Unix socket)\n keyframe buffering" + fillcolor="#1e2d3e" color="#89b4fa"] // Deps client -> common @@ -36,6 +37,8 @@ digraph crates { client -> tokio client -> tracing client -> anyhow + client -> libc_crate + client -> nix_crate server -> tokio server -> tracing @@ -45,7 +48,6 @@ digraph crates { subgraph cluster_legend { label="Legend" fontcolor="#a6adc8" color="#585b70" fontname="monospace" l1 [label="implemented" fillcolor="#1e2d3e" color="#89b4fa" shape=box] - l2 [label="stub / planned" fillcolor="#2d1e1e" color="#f38ba8" shape=box] l3 [label="external crate" fillcolor="#1e3a2f" color="#a6e3a1" shape=component] } } diff --git a/media/docs/crates.svg b/media/docs/crates.svg index d2188af..e92494a 100644 --- a/media/docs/crates.svg +++ b/media/docs/crates.svg @@ -4,186 +4,219 @@ - - + + crates - + cluster_legend - -Legend + +Legend ffmpeg_next - - - -ffmpeg-next 8 -(ffmpeg-sys-next) + + + +ffmpeg-next 8 +(client: NUT demux) tokio - - - -tokio 1 -(async runtime) + + + +tokio 1  (full) +(async runtime) serde - - - -serde / serde_json + + + +serde / serde_json tracing - - - -tracing -tracing-subscriber + + + +tracing +tracing-subscriber anyhow - - - -anyhow + + + +anyhow + + + +libc_crate + + + +libc 0.2 + + + +nix_crate + + + +nix 0.29 +(signal, process) - + common - -cht-common -───────────── -protocol.rs  (wire framing) -frame.rs     (Frame, AudioBuffer) -logging.rs + +cht-common +───────────── +protocol.rs  (WirePacket framing) +             PacketType: Video|Audio|Control +             ControlMessage: Start|Stop|... +logging.rs   (tracing init) common->tokio - - + + common->serde - - + + common->tracing - - + + common->anyhow - - + + - + client - -cht-client  [sender, Wayland] -───────────────────────────── -capture.rs   KMS/DRM → DRM_PRIME frames -encoder.rs   VAAPI H.264 (lazy init) -pipeline.rs  capture→encode thread -main.rs      TCP transport + keepalive + +cht-client  [sender, Wayland] +───────────────────────────── +backends/subprocess.rs  ffmpeg CLI + PulseAudio +                        NUT demux → EncodedPacket +backends/mod.rs         Backend enum +capture.rs              KmsCapture (direct backend) +encoder.rs              VaapiEncoder + MediaType +pipeline.rs             capture→encode thread +main.rs                 wait_for_server, transport, +                        YYYYMMDD_HHMMSS session IDs client->ffmpeg_next - - + + client->tokio - - + + client->tracing - - + + client->anyhow - - + + + + + +client->libc_crate + + + + + +client->nix_crate + + client->common - - + + - + server - -cht-server  [receiver, mcrn] -───────────────────────────── -main.rs      TCP listener (stub) -             counts packets, no decode yet + +cht-server  [receiver, mcrndeb] +───────────────────────────── +main.rs       TCP listener +              routes Video/Audio/Control +session.rs    ffmpeg subprocess: +                fMP4 + UDP relay +              ADTS audio writer +              Scene relay (Unix socket) +                keyframe buffering - + server->tokio - - + + - + server->tracing - - + + - + server->anyhow - - + + server->common - - + + - + l1 - -implemented - - - -l2 - -stub / planned + +implemented - + l3 - - - -external crate + + + +external crate diff --git a/media/docs/index.html b/media/docs/index.html index 1750dfb..749fbb9 100644 --- a/media/docs/index.html +++ b/media/docs/index.html @@ -117,18 +117,18 @@

Media Transport

Workspace
- - Crate graph phase 2 + + Crate graph
Client (sender)
- - Pipeline phase 2 + + Pipeline
Server (receiver)
- - Pipeline phase 2 stub + + Pipeline diff --git a/media/docs/server-pipeline.dot b/media/docs/server-pipeline.dot index bbce3dd..4e1cd6a 100644 --- a/media/docs/server-pipeline.dot +++ b/media/docs/server-pipeline.dot @@ -1,54 +1,71 @@ -// Server pipeline — Phase 2 (stub) + planned architecture -// Receiver machine (X11, RTX 3080, NVDEC) +// Server pipeline — current implementation +// Receiver machine (mcrndeb: X11, RTX 3080, NVDEC) digraph server_pipeline { graph [fontname="monospace" bgcolor="#1e1e2e" rankdir=TB pad="0.6" splines=polyline] node [fontname="monospace" fontcolor="#cdd6f4" style=filled shape=box fillcolor="#313244" color="#585b70" margin="0.25,0.12"] edge [color="#585b70" fontname="monospace" fontcolor="#a6adc8"] - net [label="TCP :4444" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"] - python [label="Python app\n(stream/manager.py)" shape=parallelogram fillcolor="#2a2a3e" color="#cba6f7"] + net [label="TCP :4447\n(WirePacket)" shape=parallelogram fillcolor="#1e2a3e" color="#89b4fa"] + python [label="Python GUI\n(cht app)" shape=parallelogram fillcolor="#2a2a3e" color="#cba6f7"] - subgraph cluster_implemented { - label="Implemented (Phase 2)" fontcolor="#a6e3a1" color="#a6e3a1" fontname="monospace" + subgraph cluster_rust { + label="cht-server (Rust)" fontcolor="#a6e3a1" color="#a6e3a1" fontname="monospace" - listener [label="Listener\n─────────────\nTCP accept loop\nspawns task per client\nreads WirePacket headers\ncounts video/audio pkts\nlogs keyframes + ts" + listener [label="Listener\n─────────────\nTCP accept\nreads WirePacket\nroutes by type:\n Video → ffmpeg + scene relay\n Audio → ADTS file\n Control → session lifecycle" fillcolor="#1e2d3e" color="#89b4fa"] + + ffmpeg_rec [label="ffmpeg subprocess\n─────────────\nH.264 pipe:0 → 2 outputs:\n 1. fMP4 (frag_keyframe)\n 2. UDP :4445 (mpegts)" + fillcolor="#1e2d3e" color="#89b4fa"] + + scene_relay [label="Scene Relay\n─────────────\nUnix socket (scene.sock)\nbuffers latest keyframe\nbest-effort: drops if slow\n100ms write timeout" + fillcolor="#1e2d3e" color="#89b4fa"] + + audio_writer [label="Audio Writer\n─────────────\nADTS header + raw AAC\n→ stream/audio.aac" + fillcolor="#1e2d3e" color="#89b4fa"] + + active_session [label="active-session\n─────────────\nfile at data/active-session\nPython polls to discover\nsession dir" shape=note + fillcolor="#2a2a3e" color="#585b70"] } - subgraph cluster_planned { - label="Planned" fontcolor="#f38ba8" color="#f38ba8" fontname="monospace" style=dashed + subgraph cluster_python { + label="Python (cht app)" fontcolor="#cba6f7" color="#cba6f7" fontname="monospace" - decoder [label="Decoder (Phase 3)\n─────────────\nNVDEC H.264 → NV12\nGPU frames" fillcolor="#2d1e1e" color="#f38ba8"] - scene [label="Scene Detector (Phase 3)\n─────────────\nffmpeg select filter\nin-process (no subprocess)\nJPEG → frames/\nframes/index.json" fillcolor="#2d1e1e" color="#f38ba8"] - audio [label="Audio Extractor (Phase 4)\n─────────────\nAAC decode\nWAV chunks → audio/" fillcolor="#2d1e1e" color="#f38ba8"] - writer [label="Segment Writer (Phase 3)\n─────────────\nfMP4 segments → stream/\nkeyframe boundaries" fillcolor="#2d1e1e" color="#f38ba8"] - framebuf [label="Frame Buffer (Phase 6)\n─────────────\nGPU ring buffer ~300 frames\nscrub: GPU→CPU on demand\n→ /dev/shm/cht_scrub_frame" fillcolor="#2d1e1e" color="#f38ba8"] - ipc [label="IPC Server (Phase 5)\n─────────────\nUnix socket JSON-lines\ncommands: start/stop/get_frame\nevents: frame_detected/audio_chunk/…" fillcolor="#2d1e1e" color="#f38ba8"] + scene_ffmpeg [label="Scene Detector\n─────────────\nconnects to scene.sock\npipes H.264 → ffmpeg:\n CUDA decode\n select=gt(scene,thresh)\n showinfo → timestamps\n MJPEG → JPEG frames" + fillcolor="#2d2038" color="#cba6f7"] + + audio_extract [label="Audio Extractor\n─────────────\nreads audio.aac\nffmpeg → 16kHz mono WAV\nchunks + transcript WAVs" + fillcolor="#2d2038" color="#cba6f7"] + + transcriber [label="Transcriber\n─────────────\nfaster-whisper (CUDA)\nsegment grouping\nslider: chunk size + lines/group" + fillcolor="#2d2038" color="#cba6f7"] } - // Flow — implemented + // Flow — Rust server net -> listener [label="WirePacket"] + listener -> ffmpeg_rec [label="H.264 video"] + listener -> scene_relay [label="H.264 copy\n+ keyframe flag"] + listener -> audio_writer [label="AAC audio"] + listener -> active_session [style=dashed label="on SessionStart"] - // Flow — planned - listener -> decoder [style=dashed label="H.264 payload"] - decoder -> scene [style=dashed label="NV12 frame"] - decoder -> writer [style=dashed label="encoded pkt"] - decoder -> framebuf [style=dashed label="GPU frame"] - decoder -> audio [style=dashed label="audio pkt"] - scene -> ipc [style=dashed label="frame_detected"] - audio -> ipc [style=dashed label="audio_chunk"] - writer -> ipc [style=dashed label="segment_completed"] - ipc -> python [style=dashed label="JSON-lines\n(Unix socket)"] + // Flow — Python scene detection + scene_relay -> scene_ffmpeg [label="raw H.264\n(Unix socket)" color="#a6e3a1"] // Outputs - frames_dir [label="frames/\nindex.json + *.jpg" shape=folder fillcolor="#2a2a3e" color="#585b70"] - audio_dir [label="audio/\n*.wav chunks" shape=folder fillcolor="#2a2a3e" color="#585b70"] - stream_dir [label="stream/\n*.mp4 segments" shape=folder fillcolor="#2a2a3e" color="#585b70"] - shm [label="/dev/shm/cht_scrub_frame\nraw RGBA pixels" shape=folder fillcolor="#2a2a3e" color="#585b70"] + fmp4 [label="stream/\nrecording_000.mp4\n(fragmented MP4)" shape=folder fillcolor="#2a2a3e" color="#585b70"] + udp_live [label="UDP :4445\n(mpegts → mpv)" shape=parallelogram fillcolor="#2a2a3e" color="#585b70"] + aac_file [label="stream/\naudio.aac\n(ADTS-wrapped)" shape=folder fillcolor="#2a2a3e" color="#585b70"] + frames [label="frames/\nindex.json + *.jpg" shape=folder fillcolor="#2a2a3e" color="#585b70"] + audio_dir [label="audio/\nchunk_*.wav\ntranscript_*.wav" shape=folder fillcolor="#2a2a3e" color="#585b70"] - scene -> frames_dir [style=dashed] - audio -> audio_dir [style=dashed] - writer -> stream_dir [style=dashed] - framebuf -> shm [style=dashed label="get_frame cmd"] + ffmpeg_rec -> fmp4 [label="copy"] + ffmpeg_rec -> udp_live [label="copy"] + audio_writer -> aac_file + scene_ffmpeg -> frames [label="JPEG on\nscene change"] + audio_extract -> audio_dir + audio_dir -> transcriber [label="WAV chunks"] + + // Python reads files + aac_file -> audio_extract [label="reads" style=dashed] + active_session -> python [label="discovers\nsession dir" style=dashed] } diff --git a/media/docs/server-pipeline.svg b/media/docs/server-pipeline.svg index 8e4c8a0..d157d03 100644 --- a/media/docs/server-pipeline.svg +++ b/media/docs/server-pipeline.svg @@ -4,227 +4,260 @@ - - + + server_pipeline - + -cluster_implemented - -Implemented (Phase 2) +cluster_rust + +cht-server (Rust) -cluster_planned - -Planned +cluster_python + +Python (cht app) net - -TCP :4444 + +TCP :4447 +(WirePacket) listener - -Listener -───────────── -TCP accept loop -spawns task per client -reads WirePacket headers -counts video/audio pkts -logs keyframes + ts + +Listener +───────────── +TCP accept +reads WirePacket +routes by type: +  Video → ffmpeg + scene relay +  Audio → ADTS file +  Control → session lifecycle net->listener - - -WirePacket + + +WirePacket python - -Python app -(stream/manager.py) + +Python GUI +(cht app) - + -decoder - -Decoder  (Phase 3) -───────────── -NVDEC H.264 → NV12 -GPU frames +ffmpeg_rec + +ffmpeg subprocess +───────────── +H.264 pipe:0 → 2 outputs: +  1. fMP4 (frag_keyframe) +  2. UDP :4445 (mpegts) - + -listener->decoder - - -H.264 payload +listener->ffmpeg_rec + + +H.264 video - + -scene - -Scene Detector  (Phase 3) -───────────── -ffmpeg select filter -in-process (no subprocess) -JPEG → frames/ -frames/index.json +scene_relay + +Scene Relay +───────────── +Unix socket (scene.sock) +buffers latest keyframe +best-effort: drops if slow +100ms write timeout - + -decoder->scene - - -NV12 frame +listener->scene_relay + + +H.264 copy ++ keyframe flag - + -audio - -Audio Extractor  (Phase 4) -───────────── -AAC decode -WAV chunks → audio/ +audio_writer + +Audio Writer +───────────── +ADTS header + raw AAC +→ stream/audio.aac - - -decoder->audio - - -audio pkt - - - -writer - -Segment Writer  (Phase 3) -───────────── -fMP4 segments → stream/ -keyframe boundaries - - + -decoder->writer - - -encoded pkt +listener->audio_writer + + +AAC audio - - -framebuf - -Frame Buffer  (Phase 6) -───────────── -GPU ring buffer ~300 frames -scrub: GPU→CPU on demand -→ /dev/shm/cht_scrub_frame + + +active_session + + + +active-session +───────────── +file at data/active-session +Python polls to discover +session dir - + -decoder->framebuf - - -GPU frame +listener->active_session + + +on SessionStart - - -ipc - -IPC Server  (Phase 5) -───────────── -Unix socket JSON-lines -commands: start/stop/get_frame -events: frame_detected/audio_chunk/… + + +fmp4 + +stream/ +recording_000.mp4 +(fragmented MP4) - + -scene->ipc - - -frame_detected +ffmpeg_rec->fmp4 + + +copy - - -frames_dir - -frames/ -index.json + *.jpg + + +udp_live + +UDP :4445 +(mpegts → mpv) - - -scene->frames_dir - - - - + -audio->ipc - - -audio_chunk +ffmpeg_rec->udp_live + + +copy + + + +scene_ffmpeg + +Scene Detector +───────────── +connects to scene.sock +pipes H.264 → ffmpeg: +  CUDA decode +  select=gt(scene,thresh) +  showinfo → timestamps +  MJPEG → JPEG frames + + + +scene_relay->scene_ffmpeg + + +raw H.264 +(Unix socket) + + + +aac_file + +stream/ +audio.aac +(ADTS-wrapped) + + + +audio_writer->aac_file + + + + + +active_session->python + + +discovers +session dir + + + +frames + +frames/ +index.json + *.jpg + + + +scene_ffmpeg->frames + + +JPEG on +scene change + + + +audio_extract + +Audio Extractor +───────────── +reads audio.aac +ffmpeg → 16kHz mono WAV +chunks + transcript WAVs - + audio_dir - -audio/ -*.wav chunks + +audio/ +chunk_*.wav +transcript_*.wav - - -audio->audio_dir - - + + +audio_extract->audio_dir + + - - -writer->ipc - - -segment_completed + + +transcriber + +Transcriber +───────────── +faster-whisper (CUDA) +segment grouping +slider: chunk size + lines/group - - -stream_dir - -stream/ -*.mp4 segments - - + -writer->stream_dir - - +aac_file->audio_extract + + +reads - - -shm - -/dev/shm/cht_scrub_frame -raw RGBA pixels - - - -framebuf->shm - - -get_frame cmd - - - -ipc->python - - -JSON-lines -(Unix socket) + + +audio_dir->transcriber + + +WAV chunks diff --git a/media/server/src/session.rs b/media/server/src/session.rs index 12b8098..69d9a03 100644 --- a/media/server/src/session.rs +++ b/media/server/src/session.rs @@ -112,6 +112,7 @@ impl Session { // Scene relay: Unix socket for Python scene detection. let socket_path = stream_dir.join(SCENE_SOCKET_NAME); let (scene_tx, scene_rx) = tokio::sync::mpsc::channel(32); + info!("Scene relay: spawning for {}", socket_path.display()); tokio::spawn(scene_relay_task(socket_path, scene_rx)); // Tell Python which session dir to watch. @@ -217,9 +218,22 @@ async fn scene_relay_task( last_keyframe = Some(pkt.data.clone()); } let stream = client.as_mut().unwrap(); - if stream.write_all(&pkt.data).await.is_err() { - info!("Scene relay: client disconnected"); - client = None; + // Use a short timeout so a slow reader doesn't stall us. + // A stalled relay would queue old frames — better to drop. + let write_result = tokio::time::timeout( + std::time::Duration::from_millis(100), + stream.write_all(&pkt.data), + ).await; + match write_result { + Ok(Ok(())) => {} + Ok(Err(_)) => { + info!("Scene relay: client disconnected"); + client = None; + } + Err(_) => { + // Timeout — reader too slow, drop this packet. + debug!("Scene relay: slow reader, dropping packet"); + } } } None => break, // Channel closed, session ending.