This commit is contained in:
2026-04-03 00:25:14 -03:00
parent d61e2a5492
commit cae9312db1
7 changed files with 341 additions and 91 deletions

View File

@@ -17,7 +17,7 @@ from cht.ui.timeline import Timeline, TimelineControls
from cht.ui.monitor import MonitorWidget
from cht.ui.waveform import WaveformWidget
from cht.audio.waveform import WaveformEngine
from cht.transcriber.engine import TranscriberEngine
from cht.transcriber.engine import TranscriberEngine, LANGUAGES
from cht.stream.manager import StreamManager, list_sessions
from cht.stream.tracker import RecordingTracker
from cht.agent.runner import AgentRunner, ACTIONS, check_claude_cli
@@ -38,6 +38,10 @@ class ChtWindow(Adw.ApplicationWindow):
self._selected_frame = None # currently selected frame ID
self._frame_widgets = {} # frame_id → outer Box widget
self._frame_order = [] # ordered list of frame IDs
self._transcript_order = [] # ordered list of transcript segment IDs
self._transcript_rows = {} # segment_id → ListBoxRow
self._transcript_texts = {} # segment_id → text (clean, no timestamps)
self._selected_transcripts = [] # ordered list of selected transcript IDs
# Timeline is the central state machine
self._timeline = Timeline()
@@ -85,14 +89,17 @@ class ChtWindow(Adw.ApplicationWindow):
log.info("Window initialized")
GLib.idle_add(self._start_stream)
GLib.idle_add(self._check_agent_auth)
def _on_connect_clicked(self, button):
if self._streaming:
self._stop_stream()
self._stop_stream(reload_session=True)
else:
self._start_stream()
# If a session is loaded, continue it; otherwise start fresh
session_id = self._stream_mgr.session_id if self._stream_mgr else None
if self._stream_mgr:
self._stop_stream() # clean teardown first
self._start_stream(session_id=session_id)
def _on_load_session_clicked(self, button):
sessions = list_sessions()
@@ -203,7 +210,7 @@ class ChtWindow(Adw.ApplicationWindow):
# Set up agent auth/model if not already done
self._populate_model_dropdown()
def _start_stream(self):
def _start_stream(self, session_id=None):
log.info("Starting stream...")
self._connect_btn.set_label("Disconnect")
self._connect_btn.remove_css_class("suggested-action")
@@ -211,8 +218,8 @@ class ChtWindow(Adw.ApplicationWindow):
self._streaming = True
self._gone_live = False
# Create session
self._stream_mgr = StreamManager()
# Continue existing session or create new one
self._stream_mgr = StreamManager(session_id=session_id)
self._stream_mgr.setup_dirs()
# Start ffmpeg recorder (listens for sender, relays to UDP)
@@ -244,6 +251,17 @@ class ChtWindow(Adw.ApplicationWindow):
# Watchdog: restart recorder on crash/disconnect
GLib.timeout_add(2000, self._check_recorder)
# If resuming a session, reload existing frames/transcript/waveform
if session_id:
self._load_existing_frames()
transcript_index = self._stream_mgr.transcript_dir / "index.json"
if transcript_index.exists():
self._transcriber.load_index(transcript_index)
segs = self._transcriber.all_segments()
if segs:
self._append_transcript_segments(segs)
self.set_title(f"{APP_NAME}{self._stream_mgr.session_id}")
log.info("Waiting for sender...")
def _go_live_once(self):
@@ -277,6 +295,8 @@ class ChtWindow(Adw.ApplicationWindow):
def _on_new_audio(self, wav_path, start_time, duration):
"""Called from audio extractor thread with new WAV chunk."""
if not self._stream_mgr:
return
# Compute waveform peaks (fast, ~1ms)
self._waveform_engine.append_chunk(wav_path, start_time)
peaks = self._waveform_engine.peaks
@@ -284,12 +304,12 @@ class ChtWindow(Adw.ApplicationWindow):
GLib.idle_add(self._waveform_widget.set_peaks, peaks.copy(), bucket_dur)
# Transcribe in separate thread (GPU-bound, ~1-2s per chunk)
mgr = self._stream_mgr # capture ref before thread starts
def _transcribe():
new_segs = self._transcriber.transcribe_chunk(wav_path, time_offset=start_time)
if self._stream_mgr:
self._transcriber.save_index(
self._stream_mgr.transcript_dir / "index.json"
)
if mgr:
self._transcriber.save_index(mgr.transcript_dir / "index.json")
if new_segs:
GLib.idle_add(self._append_transcript_segments, new_segs)
@@ -311,14 +331,12 @@ class ChtWindow(Adw.ApplicationWindow):
pos = self._monitor.get_live_position()
self._timeline.toggle_live(live_player_pos=pos)
def _stop_stream(self):
def _stop_stream(self, reload_session=False):
log.info("Stopping stream...")
self._timeline.reset()
self._monitor.stop()
self._waveform_engine.reset()
self._waveform_widget.set_peaks(None, 0.05)
self._transcriber.reset()
self._transcript_view.get_buffer().set_text("")
# Remember session for reload
last_session_id = self._stream_mgr.session_id if self._stream_mgr and not self._stream_mgr.readonly else None
# Stop background threads first (sets stop flags, kills procs)
if self._tracker:
self._tracker.stop()
self._tracker = None
@@ -326,6 +344,18 @@ class ChtWindow(Adw.ApplicationWindow):
if not self._stream_mgr.readonly:
self._stream_mgr.stop_all()
self._stream_mgr = None
# Then clean up UI
self._timeline.reset()
self._monitor.reset()
self._waveform_engine.reset()
self._waveform_widget.set_peaks(None, 0.05)
self._transcriber.reset()
self._transcript_order.clear()
self._transcript_rows.clear()
self._transcript_texts.clear()
self._selected_transcripts.clear()
while child := self._transcript_list.get_first_child():
self._transcript_list.remove(child)
self._known_frames = set()
self._selected_frame = None
self._frame_widgets = {}
@@ -341,8 +371,19 @@ class ChtWindow(Adw.ApplicationWindow):
self._streaming = False
self.set_title(APP_NAME)
# Reload last session in review mode
if reload_session and last_session_id:
GLib.idle_add(self._load_session, last_session_id)
def _on_close(self, *args):
self._stop_stream()
self.teardown()
def teardown(self):
"""Full cleanup for app exit — safe to call multiple times."""
if self._stream_mgr or self._streaming:
self._stop_stream()
# Terminate mpv players and GL contexts (only on app exit)
self._monitor.stop()
# -- Right panels --
@@ -467,18 +508,14 @@ class ChtWindow(Adw.ApplicationWindow):
label.set_margin_bottom(4)
box.append(label)
self._transcript_view = Gtk.TextView()
self._transcript_view.set_editable(False)
self._transcript_view.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
self._transcript_view.set_cursor_visible(False)
self._transcript_view.set_left_margin(8)
self._transcript_view.set_right_margin(8)
self._transcript_list = Gtk.ListBox()
self._transcript_list.set_selection_mode(Gtk.SelectionMode.NONE)
scroll = Gtk.ScrolledWindow()
scroll.set_vexpand(True)
scroll.set_min_content_height(150)
scroll.set_child(self._transcript_view)
box.append(scroll)
self._transcript_scroll = Gtk.ScrolledWindow()
self._transcript_scroll.set_vexpand(True)
self._transcript_scroll.set_min_content_height(150)
self._transcript_scroll.set_child(self._transcript_list)
box.append(self._transcript_scroll)
frame = Gtk.Frame()
frame.set_child(box)
@@ -553,6 +590,16 @@ class ChtWindow(Adw.ApplicationWindow):
self._model_dropdown.connect("notify::selected", self._on_model_changed)
actions_box.append(self._model_dropdown)
lang_label = Gtk.Label(label="Lang:")
lang_label.add_css_class("dim-label")
actions_box.append(lang_label)
lang_names = list(LANGUAGES.keys())
self._lang_dropdown = Gtk.DropDown.new_from_strings(lang_names)
self._lang_dropdown.set_selected(0)
self._lang_dropdown.connect("notify::selected", self._on_lang_changed)
actions_box.append(self._lang_dropdown)
outer.append(actions_box)
# Text entry + send
@@ -589,15 +636,41 @@ class ChtWindow(Adw.ApplicationWindow):
adj.set_value(x + w - page)
return False
def _clear_frame_selection(self):
if self._selected_frame and self._selected_frame in self._frame_widgets:
self._frame_widgets[self._selected_frame].remove_css_class("frame-selected")
self._selected_frame = None
def _clear_transcript_selection(self):
for old_id in self._selected_transcripts:
if old_id in self._transcript_rows:
self._transcript_rows[old_id].remove_css_class("frame-selected")
self._selected_transcripts.clear()
def _build_selection_message(self, verb: str) -> str | None:
"""Build a message from verb + selected frame ref + transcript texts."""
parts = [verb]
if self._selected_frame:
parts.append(f"@{self._selected_frame}")
if self._selected_transcripts:
texts = [self._transcript_texts[tid]
for tid in self._selected_transcripts
if tid in self._transcript_texts]
if texts:
parts.append(" ".join(texts))
return " ".join(parts) if len(parts) > 1 else None
def _send_action(self, verb: str):
"""Send a predefined action with the selected frame."""
if not self._selected_frame:
self._append_agent_output("Select a frame first.\n")
"""Send a predefined action with selected frame/transcript."""
msg = self._build_selection_message(verb)
if not msg:
self._append_agent_output("Select a frame or transcript first.\n")
return
self._send_message(f"{verb} @{self._selected_frame}")
self._send_message(msg)
def _select_frame(self, frame_id: str):
"""Select a frame thumbnail (or deselect if already selected)."""
self._clear_transcript_selection()
# Deselect previous
if self._selected_frame and self._selected_frame in self._frame_widgets:
self._frame_widgets[self._selected_frame].remove_css_class("frame-selected")
@@ -613,22 +686,93 @@ class ChtWindow(Adw.ApplicationWindow):
# Scroll after layout settles (idle may fire before allocation)
GLib.timeout_add(50, self._scroll_to_frame, widget)
def _select_transcript(self, seg_id, extend=False):
"""Select a transcript segment. If extend=True, add to selection."""
self._clear_frame_selection()
if not extend:
# Clear previous selection
for old_id in self._selected_transcripts:
if old_id in self._transcript_rows:
self._transcript_rows[old_id].remove_css_class("frame-selected")
self._selected_transcripts.clear()
if seg_id in self._selected_transcripts:
# Deselect if clicking same one (only in non-extend mode)
if not extend:
return
self._selected_transcripts.remove(seg_id)
if seg_id in self._transcript_rows:
self._transcript_rows[seg_id].remove_css_class("frame-selected")
return
self._selected_transcripts.append(seg_id)
if seg_id in self._transcript_rows:
row = self._transcript_rows[seg_id]
row.add_css_class("frame-selected")
# Scroll row into view
GLib.timeout_add(50, self._scroll_transcript_to_row, row)
def _scroll_transcript_to_row(self, row):
adj = self._transcript_scroll.get_vadjustment()
alloc = row.get_allocation()
y = alloc.y
h = alloc.height
if h <= 0:
return False
page = adj.get_page_size()
val = adj.get_value()
if y < val:
adj.set_value(y)
elif y + h > val + page:
adj.set_value(y + h - page)
return False
def _select_adjacent_transcript(self, delta, extend=False):
"""Select next/prev transcript segment. Shift extends selection."""
if not self._transcript_order:
return
if not self._selected_transcripts:
idx = 0 if delta > 0 else len(self._transcript_order) - 1
else:
last = self._selected_transcripts[-1]
try:
cur = self._transcript_order.index(last)
except ValueError:
cur = 0
idx = cur + delta
if idx < 0 or idx >= len(self._transcript_order):
return
self._select_transcript(self._transcript_order[idx], extend=extend)
def _on_key_pressed(self, controller, keyval, keycode, state):
"""Handle Left/Right arrow for frame selection, Enter for answer."""
"""Keyboard shortcuts: Left/Right=frames, Up/Down=transcript, Enter=answer."""
# Don't intercept when text entry is focused
focus = self.get_focus()
if isinstance(focus, (Gtk.Entry, Gtk.TextView)):
return False
shift = bool(state & Gdk.ModifierType.SHIFT_MASK)
if keyval == Gdk.KEY_Left:
self._clear_transcript_selection()
self._select_adjacent_frame(-1)
return True
elif keyval == Gdk.KEY_Right:
self._clear_transcript_selection()
self._select_adjacent_frame(1)
return True
elif keyval == Gdk.KEY_Up:
self._clear_frame_selection()
self._select_adjacent_transcript(-1, extend=shift)
return True
elif keyval == Gdk.KEY_Down:
self._clear_frame_selection()
self._select_adjacent_transcript(1, extend=shift)
return True
elif keyval in (Gdk.KEY_Return, Gdk.KEY_KP_Enter):
if self._selected_frame:
self._send_action("answer")
msg = self._build_selection_message("answer")
if msg:
self._send_message(msg)
return True
elif keyval == Gdk.KEY_Delete:
self._agent_output_view.get_buffer().set_text("")
@@ -845,6 +989,14 @@ class ChtWindow(Adw.ApplicationWindow):
buf.apply_tag(tag, buf.get_iter_at_mark(mark), it)
buf.delete_mark(mark)
def _on_lang_changed(self, dropdown, _pspec):
idx = dropdown.get_selected()
lang_names = list(LANGUAGES.keys())
if idx < len(lang_names):
lang_code = LANGUAGES[lang_names[idx]]
self._transcriber.language = lang_code
log.info("Transcript language: %s (%s)", lang_names[idx], lang_code or "auto")
def _on_model_changed(self, dropdown, _pspec):
idx = dropdown.get_selected()
model = self._agent.available_models[idx] if idx < len(self._agent.available_models) else None
@@ -884,14 +1036,35 @@ class ChtWindow(Adw.ApplicationWindow):
self._agent_output_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
def _append_transcript_segments(self, segments):
"""Append transcription segments to the transcript panel."""
buf = self._transcript_view.get_buffer()
"""Append transcription segments to the transcript ListBox."""
for seg in segments:
m1, s1 = divmod(int(seg.start), 60)
m2, s2 = divmod(int(seg.end), 60)
line = f"[{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.id} {seg.text}\n"
buf.insert(buf.get_end_iter(), line)
self._transcript_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
text = f"{seg.id} [{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.text}"
row_label = Gtk.Label(label=text)
row_label.set_xalign(0)
row_label.set_wrap(True)
row_label.set_margin_start(8)
row_label.set_margin_end(8)
row_label.set_margin_top(2)
row_label.set_margin_bottom(2)
row = Gtk.ListBoxRow()
row.set_child(row_label)
gesture = Gtk.GestureClick()
gesture.connect("released", lambda g, n, x, y, sid=seg.id: self._select_transcript(sid))
row.add_controller(gesture)
self._transcript_list.append(row)
self._transcript_rows[seg.id] = row
self._transcript_texts[seg.id] = seg.text
self._transcript_order.append(seg.id)
# Auto-scroll to bottom
adj = self._transcript_scroll.get_vadjustment()
GLib.idle_add(lambda: adj.set_value(adj.get_upper()) or False)
# -- Frame thumbnails --