audio and transcript
This commit is contained in:
@@ -10,9 +10,14 @@ gi.require_version("Adw", "1")
|
||||
gi.require_version("GdkPixbuf", "2.0")
|
||||
from gi.repository import Gtk, Gdk, Adw, GLib, Pango, GdkPixbuf
|
||||
|
||||
from threading import Thread
|
||||
|
||||
from cht.config import APP_NAME, SCENE_THRESHOLD
|
||||
from cht.ui.timeline import Timeline, TimelineControls
|
||||
from cht.ui.monitor import MonitorWidget
|
||||
from cht.ui.waveform import WaveformWidget
|
||||
from cht.audio.waveform import WaveformEngine
|
||||
from cht.transcriber.engine import TranscriberEngine
|
||||
from cht.stream.manager import StreamManager, list_sessions
|
||||
from cht.stream.tracker import RecordingTracker
|
||||
from cht.agent.runner import AgentRunner, ACTIONS, check_claude_cli
|
||||
@@ -37,6 +42,8 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
# Timeline is the central state machine
|
||||
self._timeline = Timeline()
|
||||
self._agent = AgentRunner()
|
||||
self._waveform_engine = WaveformEngine()
|
||||
self._transcriber = TranscriberEngine()
|
||||
|
||||
# Main layout
|
||||
self._main_paned = Gtk.Paned(orientation=Gtk.Orientation.HORIZONTAL)
|
||||
@@ -165,6 +172,34 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
# Load existing frames into the strip
|
||||
self._load_existing_frames()
|
||||
|
||||
# Load existing transcript
|
||||
transcript_index = self._stream_mgr.transcript_dir / "index.json"
|
||||
if transcript_index.exists():
|
||||
self._transcriber.load_index(transcript_index)
|
||||
segs = self._transcriber.all_segments()
|
||||
if segs:
|
||||
self._append_transcript_segments(segs)
|
||||
self._append_agent_output(f" Loaded {len(segs)} transcript segments.\n")
|
||||
|
||||
# Compute waveform from existing recordings (background thread)
|
||||
if segments:
|
||||
from cht.stream import ffmpeg as ff
|
||||
|
||||
def _compute_waveform():
|
||||
audio_dir = self._stream_mgr.audio_dir
|
||||
audio_dir.mkdir(parents=True, exist_ok=True)
|
||||
full_wav = audio_dir / "full.wav"
|
||||
try:
|
||||
ff.extract_audio_chunk(segments[0], full_wav)
|
||||
self._waveform_engine.compute_full(full_wav)
|
||||
peaks = self._waveform_engine.peaks
|
||||
bucket_dur = self._waveform_engine.bucket_duration
|
||||
GLib.idle_add(self._waveform_widget.set_peaks, peaks.copy(), bucket_dur)
|
||||
except Exception as e:
|
||||
log.error("Waveform computation failed: %s", e)
|
||||
|
||||
Thread(target=_compute_waveform, daemon=True, name="waveform_load").start()
|
||||
|
||||
# Set up agent auth/model if not already done
|
||||
self._populate_model_dropdown()
|
||||
|
||||
@@ -197,6 +232,9 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
# Start scene detection
|
||||
self._stream_mgr.start_scene_detector(on_new_frames=self._on_new_scene_frames)
|
||||
|
||||
# Start audio extraction (waveform + transcription)
|
||||
self._stream_mgr.start_audio_extractor(on_new_audio=self._on_new_audio)
|
||||
|
||||
# Start polling for frame thumbnails
|
||||
GLib.timeout_add(1000, self._poll_frames)
|
||||
|
||||
@@ -237,6 +275,26 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
for f in frames:
|
||||
GLib.idle_add(self._timeline.add_scene_marker, f["timestamp"])
|
||||
|
||||
def _on_new_audio(self, wav_path, start_time, duration):
|
||||
"""Called from audio extractor thread with new WAV chunk."""
|
||||
# Compute waveform peaks (fast, ~1ms)
|
||||
self._waveform_engine.append_chunk(wav_path, start_time)
|
||||
peaks = self._waveform_engine.peaks
|
||||
bucket_dur = self._waveform_engine.bucket_duration
|
||||
GLib.idle_add(self._waveform_widget.set_peaks, peaks.copy(), bucket_dur)
|
||||
|
||||
# Transcribe in separate thread (GPU-bound, ~1-2s per chunk)
|
||||
def _transcribe():
|
||||
new_segs = self._transcriber.transcribe_chunk(wav_path, time_offset=start_time)
|
||||
if self._stream_mgr:
|
||||
self._transcriber.save_index(
|
||||
self._stream_mgr.transcript_dir / "index.json"
|
||||
)
|
||||
if new_segs:
|
||||
GLib.idle_add(self._append_transcript_segments, new_segs)
|
||||
|
||||
Thread(target=_transcribe, daemon=True, name="transcriber").start()
|
||||
|
||||
def _check_recorder(self):
|
||||
"""Watchdog: restart recorder if it died (sender disconnect, etc)."""
|
||||
if not self._streaming or not self._stream_mgr:
|
||||
@@ -257,6 +315,10 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
log.info("Stopping stream...")
|
||||
self._timeline.reset()
|
||||
self._monitor.stop()
|
||||
self._waveform_engine.reset()
|
||||
self._waveform_widget.set_peaks(None, 0.05)
|
||||
self._transcriber.reset()
|
||||
self._transcript_view.get_buffer().set_text("")
|
||||
if self._tracker:
|
||||
self._tracker.stop()
|
||||
self._tracker = None
|
||||
@@ -298,8 +360,10 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
stream_frame.set_child(self._monitor)
|
||||
top_paned.set_start_child(stream_frame)
|
||||
|
||||
self._waveform_area = self._build_placeholder("Waveform", height=250, width=200)
|
||||
top_paned.set_end_child(self._waveform_area)
|
||||
self._waveform_widget = WaveformWidget(self._timeline)
|
||||
waveform_frame = Gtk.Frame()
|
||||
waveform_frame.set_child(self._waveform_widget)
|
||||
top_paned.set_end_child(waveform_frame)
|
||||
top_paned.set_position(650)
|
||||
right_box.append(top_paned)
|
||||
|
||||
@@ -819,6 +883,16 @@ class ChtWindow(Adw.ApplicationWindow):
|
||||
# Auto-scroll to bottom
|
||||
self._agent_output_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
|
||||
|
||||
def _append_transcript_segments(self, segments):
|
||||
"""Append transcription segments to the transcript panel."""
|
||||
buf = self._transcript_view.get_buffer()
|
||||
for seg in segments:
|
||||
m1, s1 = divmod(int(seg.start), 60)
|
||||
m2, s2 = divmod(int(seg.end), 60)
|
||||
line = f"[{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.id} {seg.text}\n"
|
||||
buf.insert(buf.get_end_iter(), line)
|
||||
self._transcript_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
|
||||
|
||||
# -- Frame thumbnails --
|
||||
|
||||
def _load_existing_frames(self):
|
||||
|
||||
Reference in New Issue
Block a user