Files
mitus/media/docs/server-pipeline.svg

287 lines
24 KiB
XML

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 14.1.2 (0)
-->
<!-- Title: server_pipeline Pages: 1 -->
<svg width="1677pt" height="1243pt"
viewBox="0.00 0.00 1677.00 1243.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(43.2 1200.16)">
<title>server_pipeline</title>
<polygon fill="#1e1e2e" stroke="none" points="-43.2,43.2 -43.2,-1200.16 1633.33,-1200.16 1633.33,43.2 -43.2,43.2"/>
<g id="clust1" class="cluster">
<title>cluster_rust</title>
<polygon fill="#1e1e2e" stroke="#a6e3a1" points="284.12,-622.34 284.12,-1010.15 1314.12,-1010.15 1314.12,-622.34 284.12,-622.34"/>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-992.85" font-family="monospace" font-size="14.00" fill="#a6e3a1">cht&#45;server (Rust)</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_python</title>
<polygon fill="#1e1e2e" stroke="#cba6f7" points="754.12,-114.28 754.12,-310.81 1582.12,-310.81 1582.12,-114.28 754.12,-114.28"/>
<text xml:space="preserve" text-anchor="middle" x="1168.12" y="-293.51" font-family="monospace" font-size="14.00" fill="#cba6f7">Python (cht app)</text>
</g>
<!-- net -->
<g id="node1" class="node">
<title>net</title>
<polygon fill="#1e2a3e" stroke="#89b4fa" points="915.09,-1156.96 692.44,-1156.96 635.16,-1053.4 857.81,-1053.4 915.09,-1156.96"/>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-1109.13" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP :4447</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-1091.88" font-family="monospace" font-size="14.00" fill="#cdd6f4">(WirePacket)</text>
</g>
<!-- listener -->
<g id="node3" class="node">
<title>listener</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="916.88,-976.9 633.38,-976.9 633.38,-821.62 916.88,-821.62 916.88,-976.9"/>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-954.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">Listener</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-937.71" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-920.46" font-family="monospace" font-size="14.00" fill="#cdd6f4">TCP accept</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-903.21" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads WirePacket</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-885.96" font-family="monospace" font-size="14.00" fill="#cdd6f4">routes by type:</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-868.71" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Video → ffmpeg + scene relay</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-851.46" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Audio → ADTS file</text>
<text xml:space="preserve" text-anchor="middle" x="775.12" y="-834.21" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;Control → session lifecycle</text>
</g>
<!-- net&#45;&gt;listener -->
<g id="edge1" class="edge">
<title>net&#45;&gt;listener</title>
<path fill="none" stroke="#585b70" d="M775.12,-1052.99C775.12,-1033.5 775.12,-1010.68 775.12,-988.72"/>
<polygon fill="#585b70" stroke="#585b70" points="778.63,-988.85 775.13,-978.85 771.63,-988.85 778.63,-988.85"/>
<text xml:space="preserve" text-anchor="middle" x="816.38" y="-1022.1" font-family="monospace" font-size="14.00" fill="#a6adc8">WirePacket</text>
</g>
<!-- python -->
<g id="node2" class="node">
<title>python</title>
<polygon fill="#2a2a3e" stroke="#cba6f7" points="1361.98,-508.73 1166.55,-508.73 1116.27,-405.17 1311.7,-405.17 1361.98,-508.73"/>
<text xml:space="preserve" text-anchor="middle" x="1239.12" y="-460.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python GUI</text>
<text xml:space="preserve" text-anchor="middle" x="1239.12" y="-443.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">(cht app)</text>
</g>
<!-- ffmpeg_rec -->
<g id="node4" class="node">
<title>ffmpeg_rec</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="534.25,-742.49 292,-742.49 292,-638.96 534.25,-638.96 534.25,-742.49"/>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-720.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg subprocess</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-703.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-686.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">H.264 pipe:0 → 2 outputs:</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-668.8" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;1. fMP4 (frag_keyframe)</text>
<text xml:space="preserve" text-anchor="middle" x="413.12" y="-651.55" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;2. UDP :4445 (mpegts)</text>
</g>
<!-- listener&#45;&gt;ffmpeg_rec -->
<g id="edge2" class="edge">
<title>listener&#45;&gt;ffmpeg_rec</title>
<path fill="none" stroke="#585b70" d="M640.07,-821.21C597.84,-797.12 552.04,-770.98 512.71,-748.55"/>
<polygon fill="#585b70" stroke="#585b70" points="514.59,-745.59 504.17,-743.67 511.12,-751.67 514.59,-745.59"/>
<text xml:space="preserve" text-anchor="middle" x="650.35" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 video</text>
</g>
<!-- scene_relay -->
<g id="node5" class="node">
<title>scene_relay</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="802.38,-751.12 551.88,-751.12 551.88,-630.34 802.38,-630.34 802.38,-751.12"/>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-729.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Relay</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-711.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-694.68" font-family="monospace" font-size="14.00" fill="#cdd6f4">Unix socket (scene.sock)</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-677.43" font-family="monospace" font-size="14.00" fill="#cdd6f4">buffers latest keyframe</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-660.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">best&#45;effort: drops if slow</text>
<text xml:space="preserve" text-anchor="middle" x="677.12" y="-642.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">100ms write timeout</text>
</g>
<!-- listener&#45;&gt;scene_relay -->
<g id="edge3" class="edge">
<title>listener&#45;&gt;scene_relay</title>
<path fill="none" stroke="#585b70" d="M738.63,-821.36C729.36,-801.82 719.45,-780.93 710.35,-761.75"/>
<polygon fill="#585b70" stroke="#585b70" points="713.63,-760.49 706.18,-752.96 707.3,-763.5 713.63,-760.49"/>
<text xml:space="preserve" text-anchor="middle" x="790.94" y="-790.32" font-family="monospace" font-size="14.00" fill="#a6adc8">H.264 copy</text>
<text xml:space="preserve" text-anchor="middle" x="790.94" y="-773.07" font-family="monospace" font-size="14.00" fill="#a6adc8">+ keyframe flag</text>
</g>
<!-- audio_writer -->
<g id="node6" class="node">
<title>audio_writer</title>
<polygon fill="#1e2d3e" stroke="#89b4fa" points="1029.75,-733.87 820.5,-733.87 820.5,-647.59 1029.75,-647.59 1029.75,-733.87"/>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-711.93" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Writer</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-694.68" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-677.43" font-family="monospace" font-size="14.00" fill="#cdd6f4">ADTS header + raw AAC</text>
<text xml:space="preserve" text-anchor="middle" x="925.12" y="-660.18" font-family="monospace" font-size="14.00" fill="#cdd6f4">→ stream/audio.aac</text>
</g>
<!-- listener&#45;&gt;audio_writer -->
<g id="edge4" class="edge">
<title>listener&#45;&gt;audio_writer</title>
<path fill="none" stroke="#585b70" d="M841.7,-821.42C850.82,-810.9 857.12,-803.62 857.12,-803.62 857.12,-803.62 875.59,-773.23 893.28,-744.13"/>
<polygon fill="#585b70" stroke="#585b70" points="896.23,-746.01 898.44,-735.64 890.25,-742.37 896.23,-746.01"/>
<text xml:space="preserve" text-anchor="middle" x="914.34" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">AAC audio</text>
</g>
<!-- active_session -->
<g id="node7" class="node">
<title>active_session</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1300.5,-742.49 1047.75,-742.49 1047.75,-638.96 1306.5,-638.96 1306.5,-736.49 1300.5,-742.49"/>
<polyline fill="none" stroke="#585b70" points="1300.5,-742.49 1300.5,-736.49"/>
<polyline fill="none" stroke="#585b70" points="1306.5,-736.49 1300.5,-736.49"/>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-720.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-703.3" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-686.05" font-family="monospace" font-size="14.00" fill="#cdd6f4">file at data/active&#45;session</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-668.8" font-family="monospace" font-size="14.00" fill="#cdd6f4">Python polls to discover</text>
<text xml:space="preserve" text-anchor="middle" x="1177.12" y="-651.55" font-family="monospace" font-size="14.00" fill="#cdd6f4">session dir</text>
</g>
<!-- listener&#45;&gt;active_session -->
<g id="edge5" class="edge">
<title>listener&#45;&gt;active_session</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M917.14,-825.3C966.5,-799.94 1021.08,-771.9 1067.43,-748.09"/>
<polygon fill="#585b70" stroke="#585b70" points="1068.85,-751.29 1076.14,-743.61 1065.65,-745.07 1068.85,-751.29"/>
<text xml:space="preserve" text-anchor="middle" x="1085.83" y="-781.69" font-family="monospace" font-size="14.00" fill="#a6adc8">on SessionStart</text>
</g>
<!-- fmp4 -->
<g id="node11" class="node">
<title>fmp4</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="176.25,-491.46 173.25,-495.46 152.25,-495.46 149.25,-491.46 0,-491.46 0,-422.44 176.25,-422.44 176.25,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">recording_000.mp4</text>
<text xml:space="preserve" text-anchor="middle" x="88.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">(fragmented MP4)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;fmp4 -->
<g id="edge7" class="edge">
<title>ffmpeg_rec&#45;&gt;fmp4</title>
<path fill="none" stroke="#585b70" d="M323.34,-638.58C259.54,-602.23 185.12,-559.84 185.12,-559.84 185.12,-559.84 154.9,-528.09 128.2,-500.04"/>
<polygon fill="#585b70" stroke="#585b70" points="130.9,-497.8 121.47,-492.97 125.83,-502.63 130.9,-497.8"/>
<text xml:space="preserve" text-anchor="middle" x="288.5" y="-590.41" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- udp_live -->
<g id="node12" class="node">
<title>udp_live</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="508.19,-508.73 258.33,-508.73 194.06,-405.17 443.92,-405.17 508.19,-508.73"/>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-460.9" font-family="monospace" font-size="14.00" fill="#cdd6f4">UDP :4445</text>
<text xml:space="preserve" text-anchor="middle" x="351.12" y="-443.65" font-family="monospace" font-size="14.00" fill="#cdd6f4">(mpegts → mpv)</text>
</g>
<!-- ffmpeg_rec&#45;&gt;udp_live -->
<g id="edge8" class="edge">
<title>ffmpeg_rec&#45;&gt;udp_live</title>
<path fill="none" stroke="#585b70" d="M399.44,-638.58C390.06,-603.51 377.57,-556.8 367.63,-519.67"/>
<polygon fill="#585b70" stroke="#585b70" points="371.13,-519.19 365.16,-510.43 364.37,-521 371.13,-519.19"/>
<text xml:space="preserve" text-anchor="middle" x="407.8" y="-590.41" font-family="monospace" font-size="14.00" fill="#a6adc8">copy</text>
</g>
<!-- scene_ffmpeg -->
<g id="node8" class="node">
<title>scene_ffmpeg</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1004.25,-277.56 762,-277.56 762,-122.28 1004.25,-122.28 1004.25,-277.56"/>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-255.62" font-family="monospace" font-size="14.00" fill="#cdd6f4">Scene Detector</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-238.37" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-221.12" font-family="monospace" font-size="14.00" fill="#cdd6f4">connects to scene.sock</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-203.87" font-family="monospace" font-size="14.00" fill="#cdd6f4">pipes H.264 → ffmpeg:</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-186.62" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;CUDA decode</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-169.37" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;select=gt(scene,thresh)</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-152.12" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;showinfo → timestamps</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-134.87" font-family="monospace" font-size="14.00" fill="#cdd6f4"> &#160;MJPEG → JPEG frames</text>
</g>
<!-- scene_relay&#45;&gt;scene_ffmpeg -->
<g id="edge6" class="edge">
<title>scene_relay&#45;&gt;scene_ffmpeg</title>
<path fill="none" stroke="#a6e3a1" d="M620.55,-630.11C588.15,-595.96 553.88,-559.84 553.88,-559.84 553.88,-559.84 553.88,-354.06 553.88,-354.06 553.88,-354.06 659.95,-304.72 751.68,-262.06"/>
<polygon fill="#a6e3a1" stroke="#a6e3a1" points="752.96,-265.32 760.55,-257.93 750.01,-258.97 752.96,-265.32"/>
<text xml:space="preserve" text-anchor="middle" x="607.5" y="-460.9" font-family="monospace" font-size="14.00" fill="#a6adc8">raw H.264</text>
<text xml:space="preserve" text-anchor="middle" x="607.5" y="-443.65" font-family="monospace" font-size="14.00" fill="#a6adc8">(Unix socket)</text>
</g>
<!-- regression -->
<g id="node16" class="node">
<title>regression</title>
<polygon fill="#3d1e1e" stroke="#f38ba8" points="922.5,-559.84 669.75,-559.84 669.75,-354.06 928.5,-354.06 928.5,-553.84 922.5,-559.84"/>
<polyline fill="none" stroke="#f38ba8" points="922.5,-559.84 922.5,-553.84"/>
<polyline fill="none" stroke="#f38ba8" points="928.5,-553.84 922.5,-553.84"/>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-537.9" font-family="monospace" font-size="14.00" fill="#f38ba8">⚠ REGRESSED</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-520.65" font-family="monospace" font-size="14.00" fill="#f38ba8">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-503.4" font-family="monospace" font-size="14.00" fill="#f38ba8">Scene relay (separate pipe)</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-486.15" font-family="monospace" font-size="14.00" fill="#f38ba8">breaks &#39;one behind&#39; flush.</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-468.9" font-family="monospace" font-size="14.00" fill="#f38ba8">try_send drops → decoder</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-451.65" font-family="monospace" font-size="14.00" fill="#f38ba8">corruption until keyframe.</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-418.4" font-family="monospace" font-size="14.00" fill="#f38ba8">Fix: move scene detection</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-401.15" font-family="monospace" font-size="14.00" fill="#f38ba8">into server ffmpeg as 3rd</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-383.9" font-family="monospace" font-size="14.00" fill="#f38ba8">output branch (10&#45;scene&#45;</text>
<text xml:space="preserve" text-anchor="middle" x="799.12" y="-366.65" font-family="monospace" font-size="14.00" fill="#f38ba8">detect&#45;to&#45;rust.md)</text>
</g>
<!-- scene_relay&#45;&gt;regression -->
<g id="edge15" class="edge">
<title>scene_relay&#45;&gt;regression</title>
<path fill="none" stroke="#f38ba8" stroke-dasharray="5,2" d="M708.54,-630.04C718.19,-611.71 729.14,-590.91 740.01,-570.26"/>
<polygon fill="#f38ba8" stroke="#f38ba8" points="743.07,-571.97 744.63,-561.49 736.87,-568.71 743.07,-571.97"/>
</g>
<!-- aac_file -->
<g id="node13" class="node">
<title>aac_file</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1097.88,-491.46 1094.88,-495.46 1073.88,-495.46 1070.88,-491.46 946.38,-491.46 946.38,-422.44 1097.88,-422.44 1097.88,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">stream/</text>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="1022.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">(ADTS&#45;wrapped)</text>
</g>
<!-- audio_writer&#45;&gt;aac_file -->
<g id="edge9" class="edge">
<title>audio_writer&#45;&gt;aac_file</title>
<path fill="none" stroke="#585b70" d="M942.92,-647.22C960.12,-606.1 986.09,-544.05 1003.56,-502.32"/>
<polygon fill="#585b70" stroke="#585b70" points="1006.7,-503.88 1007.33,-493.3 1000.24,-501.18 1006.7,-503.88"/>
</g>
<!-- active_session&#45;&gt;python -->
<g id="edge14" class="edge">
<title>active_session&#45;&gt;python</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M1190.81,-638.58C1200.19,-603.51 1212.68,-556.8 1222.62,-519.67"/>
<polygon fill="#585b70" stroke="#585b70" points="1225.88,-521 1225.09,-510.43 1219.12,-519.19 1225.88,-521"/>
<text xml:space="preserve" text-anchor="middle" x="1251.64" y="-599.04" font-family="monospace" font-size="14.00" fill="#a6adc8">discovers</text>
<text xml:space="preserve" text-anchor="middle" x="1251.64" y="-581.79" font-family="monospace" font-size="14.00" fill="#a6adc8">session dir</text>
</g>
<!-- frames -->
<g id="node14" class="node">
<title>frames</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="975.38,-51.78 972.38,-55.78 951.38,-55.78 948.38,-51.78 790.88,-51.78 790.88,0 975.38,0 975.38,-51.78"/>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-29.84" font-family="monospace" font-size="14.00" fill="#cdd6f4">frames/</text>
<text xml:space="preserve" text-anchor="middle" x="883.12" y="-12.59" font-family="monospace" font-size="14.00" fill="#cdd6f4">index.json + *.jpg</text>
</g>
<!-- scene_ffmpeg&#45;&gt;frames -->
<g id="edge10" class="edge">
<title>scene_ffmpeg&#45;&gt;frames</title>
<path fill="none" stroke="#585b70" d="M883.12,-121.96C883.12,-101.46 883.12,-80.27 883.12,-63.11"/>
<polygon fill="#585b70" stroke="#585b70" points="886.63,-63.45 883.13,-53.45 879.63,-63.45 886.63,-63.45"/>
<text xml:space="preserve" text-anchor="middle" x="932.62" y="-90.98" font-family="monospace" font-size="14.00" fill="#a6adc8">JPEG on</text>
<text xml:space="preserve" text-anchor="middle" x="932.62" y="-73.73" font-family="monospace" font-size="14.00" fill="#a6adc8">scene change</text>
</g>
<!-- audio_extract -->
<g id="node9" class="node">
<title>audio_extract</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1256.12,-251.69 1022.12,-251.69 1022.12,-148.16 1256.12,-148.16 1256.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Audio Extractor</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">reads audio.aac</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">ffmpeg → 16kHz mono WAV</text>
<text xml:space="preserve" text-anchor="middle" x="1139.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunks + transcript WAVs</text>
</g>
<!-- audio_dir -->
<g id="node15" class="node">
<title>audio_dir</title>
<polygon fill="#2a2a3e" stroke="#585b70" points="1548.12,-491.46 1545.12,-495.46 1524.12,-495.46 1521.12,-491.46 1380.12,-491.46 1380.12,-422.44 1548.12,-422.44 1548.12,-491.46"/>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-469.52" font-family="monospace" font-size="14.00" fill="#cdd6f4">audio/</text>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-452.27" font-family="monospace" font-size="14.00" fill="#cdd6f4">chunk_*.wav</text>
<text xml:space="preserve" text-anchor="middle" x="1464.12" y="-435.02" font-family="monospace" font-size="14.00" fill="#cdd6f4">transcript_*.wav</text>
</g>
<!-- audio_extract&#45;&gt;audio_dir -->
<g id="edge11" class="edge">
<title>audio_extract&#45;&gt;audio_dir</title>
<path fill="none" stroke="#585b70" d="M1197.7,-252C1230.6,-280.7 1265.12,-310.81 1265.12,-310.81 1265.12,-310.81 1371.12,-354.06 1371.12,-354.06 1371.12,-354.06 1400.1,-385.81 1425.7,-413.86"/>
<polygon fill="#585b70" stroke="#585b70" points="1422.81,-415.88 1432.14,-420.9 1427.98,-411.16 1422.81,-415.88"/>
</g>
<!-- transcriber -->
<g id="node10" class="node">
<title>transcriber</title>
<polygon fill="#2d2038" stroke="#cba6f7" points="1574.12,-251.69 1274.12,-251.69 1274.12,-148.16 1574.12,-148.16 1574.12,-251.69"/>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-229.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">Transcriber</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-212.5" font-family="monospace" font-size="14.00" fill="#cdd6f4">─────────────</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-195.25" font-family="monospace" font-size="14.00" fill="#cdd6f4">faster&#45;whisper (CUDA)</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-178" font-family="monospace" font-size="14.00" fill="#cdd6f4">segment grouping</text>
<text xml:space="preserve" text-anchor="middle" x="1424.12" y="-160.75" font-family="monospace" font-size="14.00" fill="#cdd6f4">slider: chunk size + lines/group</text>
</g>
<!-- aac_file&#45;&gt;audio_extract -->
<g id="edge13" class="edge">
<title>aac_file&#45;&gt;audio_extract</title>
<path fill="none" stroke="#585b70" stroke-dasharray="5,2" d="M1037.66,-422.08C1056.29,-381.48 1087.87,-312.64 1110.86,-262.52"/>
<polygon fill="#585b70" stroke="#585b70" points="1114.01,-264.06 1115,-253.51 1107.65,-261.14 1114.01,-264.06"/>
<text xml:space="preserve" text-anchor="middle" x="1104.76" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">reads</text>
</g>
<!-- audio_dir&#45;&gt;transcriber -->
<g id="edge12" class="edge">
<title>audio_dir&#45;&gt;transcriber</title>
<path fill="none" stroke="#585b70" d="M1458.81,-422.08C1452.48,-381.73 1441.78,-313.5 1433.94,-263.47"/>
<polygon fill="#585b70" stroke="#585b70" points="1437.4,-262.96 1432.39,-253.63 1430.48,-264.05 1437.4,-262.96"/>
<text xml:space="preserve" text-anchor="middle" x="1485.38" y="-322.76" font-family="monospace" font-size="14.00" fill="#a6adc8">WAV chunks</text>
</g>
</g>
</svg>