diff --git a/cht/agent/runner.py b/cht/agent/runner.py index 7c44ed8..e644f7e 100644 --- a/cht/agent/runner.py +++ b/cht/agent/runner.py @@ -51,21 +51,37 @@ def _resolve_provider() -> AgentProvider: return ClaudeSDKProvider() +def _expand_ref_nums(spec: str) -> list[int]: + """Expand a ref spec like '2-6' or '2,4,6' or '2-4,6,8-10' into sorted ints.""" + nums = set() + for part in spec.split(","): + part = part.strip() + if "-" in part: + a, b = part.split("-", 1) + try: + nums.update(range(int(a), int(b) + 1)) + except ValueError: + pass + elif part: + try: + nums.add(int(part)) + except ValueError: + pass + return sorted(nums) + + def _parse_mentions(message: str, frames: list[FrameRef]) -> list[FrameRef]: - """Extract @-references from message. Accepts: - @F0001 @f1 @1 @001 — all match frame F0001 - """ + """Extract @F references. Accepts @F1, @F2-6, @F2,4,6, @F2-4,6,8-10.""" mentioned = [] seen = set() - for match in re.finditer(r"@([Ff]?\d+)", message): - raw = match.group(1).lstrip("Ff") - num = int(raw) - fid = f"F{num:04d}" - if fid not in seen: - frame = next((f for f in frames if f.id == fid), None) - if frame: - mentioned.append(frame) - seen.add(fid) + for match in re.finditer(r"@[Ff]([\d,\-]+)", message): + for num in _expand_ref_nums(match.group(1)): + fid = f"F{num:04d}" + if fid not in seen: + frame = next((f for f in frames if f.id == fid), None) + if frame: + mentioned.append(frame) + seen.add(fid) return mentioned @@ -111,17 +127,17 @@ def _load_transcript(transcript_dir: Path) -> list[TranscriptRef]: def _parse_transcript_mentions(message: str, segments: list[TranscriptRef]) -> list[TranscriptRef]: - """Extract @T references from message. Accepts @T0001, @t1, @T1.""" + """Extract @T references. Accepts @T1, @T2-6, @T2,4,6, @T1-3,5,7-10.""" mentioned = [] seen = set() - for match in re.finditer(r"@[Tt](\d+)", message): - num = int(match.group(1)) - tid = f"T{num:04d}" - if tid not in seen: - seg = next((s for s in segments if s.id == tid), None) - if seg: - mentioned.append(seg) - seen.add(tid) + for match in re.finditer(r"@[Tt]([\d,\-]+)", message): + for num in _expand_ref_nums(match.group(1)): + tid = f"T{num:04d}" + if tid not in seen: + seg = next((s for s in segments if s.id == tid), None) + if seg: + mentioned.append(seg) + seen.add(tid) return mentioned diff --git a/cht/app.py b/cht/app.py index ccc53c8..1e0592c 100644 --- a/cht/app.py +++ b/cht/app.py @@ -1,5 +1,6 @@ import logging import os +import signal import sys import threading import gi @@ -19,13 +20,30 @@ class ChtApp(Adw.Application): application_id=APP_ID, flags=Gio.ApplicationFlags.DEFAULT_FLAGS, ) + # Let GLib handle SIGINT/SIGTERM so Ctrl+C triggers graceful shutdown + GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._on_signal) + GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._on_signal) + + def _on_signal(self): + log = logging.getLogger("cht") + log.info("Signal received — shutting down gracefully") + self.quit() + return GLib.SOURCE_REMOVE + + def do_shutdown(self): + # Ensure all windows tear down before the process exits + for win in self.get_windows(): + if hasattr(win, "teardown"): + win.teardown() + Adw.Application.do_shutdown(self) def do_activate(self): win = self.props.active_window if not win: css = Gtk.CssProvider() css.load_from_string( - ".frame-selected { border: 3px solid @accent_color; border-radius: 6px; }" + ".frame-selected { border: 3px solid @accent_color; border-radius: 6px; }\n" + "row.frame-selected { background: alpha(@accent_color, 0.25); border: none; border-radius: 0; }" ) Gtk.StyleContext.add_provider_for_display( Gdk.Display.get_default(), diff --git a/cht/stream/manager.py b/cht/stream/manager.py index dfafe9c..b2b9a15 100644 --- a/cht/stream/manager.py +++ b/cht/stream/manager.py @@ -131,7 +131,9 @@ class StreamManager: def start_recorder(self): """Start ffmpeg to receive TCP stream, write to fMP4, and relay to UDP.""" - self._segment = 0 + # Start after existing segments (for resumed sessions) + existing = self.recording_segments + self._segment = len(existing) self._launch_recorder() def restart_recorder(self): diff --git a/cht/transcriber/engine.py b/cht/transcriber/engine.py index ba37534..15fe35c 100644 --- a/cht/transcriber/engine.py +++ b/cht/transcriber/engine.py @@ -7,11 +7,18 @@ and persists to transcript/index.json in the session directory. import json import logging +import threading from dataclasses import dataclass, asdict from pathlib import Path log = logging.getLogger(__name__) +LANGUAGES = { + "Auto": None, + "English": "en", + "Spanish": "es", +} + @dataclass class TranscriptSegment: @@ -30,6 +37,9 @@ class TranscriberEngine: self._device = device self._segments: list[TranscriptSegment] = [] self._next_id = 1 + self._lock = threading.Lock() + self._stopped = False + self.language = None # None = auto-detect, "en", "es", etc. def _ensure_model(self): if self._model is not None: @@ -45,32 +55,36 @@ class TranscriberEngine: def transcribe_chunk(self, wav_path, time_offset=0.0) -> list[TranscriptSegment]: """Transcribe a WAV chunk. Returns new segments with absolute timestamps.""" + if self._stopped: + return [] self._ensure_model() try: - segments_iter, _info = self._model.transcribe( - str(wav_path), - beam_size=5, - vad_filter=True, - ) + kwargs = {"beam_size": 5, "vad_filter": True} + if self.language: + kwargs["language"] = self.language + segments_iter, info = self._model.transcribe(str(wav_path), **kwargs) except Exception as e: log.error("Whisper transcription failed: %s", e) return [] new_segments = [] - for seg in segments_iter: - text = seg.text.strip() - if not text: - continue - tid = f"T{self._next_id:04d}" - self._next_id += 1 - entry = TranscriptSegment( - id=tid, - start=time_offset + seg.start, - end=time_offset + seg.end, - text=text, - ) - self._segments.append(entry) - new_segments.append(entry) + with self._lock: + if self._stopped: + return [] + for seg in segments_iter: + text = seg.text.strip() + if not text: + continue + tid = f"T{self._next_id:04d}" + self._next_id += 1 + entry = TranscriptSegment( + id=tid, + start=time_offset + seg.start, + end=time_offset + seg.end, + text=text, + ) + self._segments.append(entry) + new_segments.append(entry) return new_segments @@ -78,7 +92,10 @@ class TranscriberEngine: return list(self._segments) def save_index(self, path: Path): - data = [asdict(s) for s in self._segments] + with self._lock: + if self._stopped: + return + data = [asdict(s) for s in self._segments] path.write_text(json.dumps(data, indent=2)) def load_index(self, path: Path): @@ -87,12 +104,16 @@ class TranscriberEngine: except Exception as e: log.warning("Failed to load transcript index: %s", e) return - self._segments = [TranscriptSegment(**e) for e in data] - if self._segments: - last_num = max(int(s.id.lstrip("T")) for s in self._segments) - self._next_id = last_num + 1 + with self._lock: + self._segments = [TranscriptSegment(**e) for e in data] + if self._segments: + last_num = max(int(s.id.lstrip("T")) for s in self._segments) + self._next_id = last_num + 1 + self._stopped = False log.info("Loaded %d transcript segments", len(self._segments)) def reset(self): - self._segments.clear() - self._next_id = 1 + with self._lock: + self._stopped = True + self._segments.clear() + self._next_id = 1 diff --git a/cht/ui/monitor.py b/cht/ui/monitor.py index ea7b007..b3f004b 100644 --- a/cht/ui/monitor.py +++ b/cht/ui/monitor.py @@ -99,7 +99,20 @@ class MonitorWidget(Gtk.Box): elif self._review_player: self._review_player.screenshot(path) + def reset(self): + """Reset for session transition — keep players alive, just unload content.""" + log.info("Resetting monitor") + self._live_source_url = None + self._recording_path = None + self._live_loaded = False + if self._live_player: + self._live_player.command("stop") + if self._review_player: + self._review_player.command("stop") + self._stack.set_visible_child_name("live") + def stop(self): + """Full teardown — terminates mpv players. Only call on app exit.""" log.info("Stopping monitor") if self._live_player: self._live_player.terminate() diff --git a/cht/ui/mpv.py b/cht/ui/mpv.py index 005a904..1f75513 100644 --- a/cht/ui/mpv.py +++ b/cht/ui/mpv.py @@ -106,6 +106,13 @@ class Player: log.info("mpv load_live: %s", url) self._player.loadfile(str(url), mode="replace") + def command(self, *args): + """Send a command to mpv.""" + try: + self._player.command(*args) + except Exception: + pass + def play(self): """Resume/start playback.""" self._player.pause = False diff --git a/cht/window.py b/cht/window.py index de1e3be..16c9c0b 100644 --- a/cht/window.py +++ b/cht/window.py @@ -17,7 +17,7 @@ from cht.ui.timeline import Timeline, TimelineControls from cht.ui.monitor import MonitorWidget from cht.ui.waveform import WaveformWidget from cht.audio.waveform import WaveformEngine -from cht.transcriber.engine import TranscriberEngine +from cht.transcriber.engine import TranscriberEngine, LANGUAGES from cht.stream.manager import StreamManager, list_sessions from cht.stream.tracker import RecordingTracker from cht.agent.runner import AgentRunner, ACTIONS, check_claude_cli @@ -38,6 +38,10 @@ class ChtWindow(Adw.ApplicationWindow): self._selected_frame = None # currently selected frame ID self._frame_widgets = {} # frame_id → outer Box widget self._frame_order = [] # ordered list of frame IDs + self._transcript_order = [] # ordered list of transcript segment IDs + self._transcript_rows = {} # segment_id → ListBoxRow + self._transcript_texts = {} # segment_id → text (clean, no timestamps) + self._selected_transcripts = [] # ordered list of selected transcript IDs # Timeline is the central state machine self._timeline = Timeline() @@ -85,14 +89,17 @@ class ChtWindow(Adw.ApplicationWindow): log.info("Window initialized") - GLib.idle_add(self._start_stream) GLib.idle_add(self._check_agent_auth) def _on_connect_clicked(self, button): if self._streaming: - self._stop_stream() + self._stop_stream(reload_session=True) else: - self._start_stream() + # If a session is loaded, continue it; otherwise start fresh + session_id = self._stream_mgr.session_id if self._stream_mgr else None + if self._stream_mgr: + self._stop_stream() # clean teardown first + self._start_stream(session_id=session_id) def _on_load_session_clicked(self, button): sessions = list_sessions() @@ -203,7 +210,7 @@ class ChtWindow(Adw.ApplicationWindow): # Set up agent auth/model if not already done self._populate_model_dropdown() - def _start_stream(self): + def _start_stream(self, session_id=None): log.info("Starting stream...") self._connect_btn.set_label("Disconnect") self._connect_btn.remove_css_class("suggested-action") @@ -211,8 +218,8 @@ class ChtWindow(Adw.ApplicationWindow): self._streaming = True self._gone_live = False - # Create session - self._stream_mgr = StreamManager() + # Continue existing session or create new one + self._stream_mgr = StreamManager(session_id=session_id) self._stream_mgr.setup_dirs() # Start ffmpeg recorder (listens for sender, relays to UDP) @@ -244,6 +251,17 @@ class ChtWindow(Adw.ApplicationWindow): # Watchdog: restart recorder on crash/disconnect GLib.timeout_add(2000, self._check_recorder) + # If resuming a session, reload existing frames/transcript/waveform + if session_id: + self._load_existing_frames() + transcript_index = self._stream_mgr.transcript_dir / "index.json" + if transcript_index.exists(): + self._transcriber.load_index(transcript_index) + segs = self._transcriber.all_segments() + if segs: + self._append_transcript_segments(segs) + + self.set_title(f"{APP_NAME} — {self._stream_mgr.session_id}") log.info("Waiting for sender...") def _go_live_once(self): @@ -277,6 +295,8 @@ class ChtWindow(Adw.ApplicationWindow): def _on_new_audio(self, wav_path, start_time, duration): """Called from audio extractor thread with new WAV chunk.""" + if not self._stream_mgr: + return # Compute waveform peaks (fast, ~1ms) self._waveform_engine.append_chunk(wav_path, start_time) peaks = self._waveform_engine.peaks @@ -284,12 +304,12 @@ class ChtWindow(Adw.ApplicationWindow): GLib.idle_add(self._waveform_widget.set_peaks, peaks.copy(), bucket_dur) # Transcribe in separate thread (GPU-bound, ~1-2s per chunk) + mgr = self._stream_mgr # capture ref before thread starts + def _transcribe(): new_segs = self._transcriber.transcribe_chunk(wav_path, time_offset=start_time) - if self._stream_mgr: - self._transcriber.save_index( - self._stream_mgr.transcript_dir / "index.json" - ) + if mgr: + self._transcriber.save_index(mgr.transcript_dir / "index.json") if new_segs: GLib.idle_add(self._append_transcript_segments, new_segs) @@ -311,14 +331,12 @@ class ChtWindow(Adw.ApplicationWindow): pos = self._monitor.get_live_position() self._timeline.toggle_live(live_player_pos=pos) - def _stop_stream(self): + def _stop_stream(self, reload_session=False): log.info("Stopping stream...") - self._timeline.reset() - self._monitor.stop() - self._waveform_engine.reset() - self._waveform_widget.set_peaks(None, 0.05) - self._transcriber.reset() - self._transcript_view.get_buffer().set_text("") + # Remember session for reload + last_session_id = self._stream_mgr.session_id if self._stream_mgr and not self._stream_mgr.readonly else None + + # Stop background threads first (sets stop flags, kills procs) if self._tracker: self._tracker.stop() self._tracker = None @@ -326,6 +344,18 @@ class ChtWindow(Adw.ApplicationWindow): if not self._stream_mgr.readonly: self._stream_mgr.stop_all() self._stream_mgr = None + # Then clean up UI + self._timeline.reset() + self._monitor.reset() + self._waveform_engine.reset() + self._waveform_widget.set_peaks(None, 0.05) + self._transcriber.reset() + self._transcript_order.clear() + self._transcript_rows.clear() + self._transcript_texts.clear() + self._selected_transcripts.clear() + while child := self._transcript_list.get_first_child(): + self._transcript_list.remove(child) self._known_frames = set() self._selected_frame = None self._frame_widgets = {} @@ -341,8 +371,19 @@ class ChtWindow(Adw.ApplicationWindow): self._streaming = False self.set_title(APP_NAME) + # Reload last session in review mode + if reload_session and last_session_id: + GLib.idle_add(self._load_session, last_session_id) + def _on_close(self, *args): - self._stop_stream() + self.teardown() + + def teardown(self): + """Full cleanup for app exit — safe to call multiple times.""" + if self._stream_mgr or self._streaming: + self._stop_stream() + # Terminate mpv players and GL contexts (only on app exit) + self._monitor.stop() # -- Right panels -- @@ -467,18 +508,14 @@ class ChtWindow(Adw.ApplicationWindow): label.set_margin_bottom(4) box.append(label) - self._transcript_view = Gtk.TextView() - self._transcript_view.set_editable(False) - self._transcript_view.set_wrap_mode(Gtk.WrapMode.WORD_CHAR) - self._transcript_view.set_cursor_visible(False) - self._transcript_view.set_left_margin(8) - self._transcript_view.set_right_margin(8) + self._transcript_list = Gtk.ListBox() + self._transcript_list.set_selection_mode(Gtk.SelectionMode.NONE) - scroll = Gtk.ScrolledWindow() - scroll.set_vexpand(True) - scroll.set_min_content_height(150) - scroll.set_child(self._transcript_view) - box.append(scroll) + self._transcript_scroll = Gtk.ScrolledWindow() + self._transcript_scroll.set_vexpand(True) + self._transcript_scroll.set_min_content_height(150) + self._transcript_scroll.set_child(self._transcript_list) + box.append(self._transcript_scroll) frame = Gtk.Frame() frame.set_child(box) @@ -553,6 +590,16 @@ class ChtWindow(Adw.ApplicationWindow): self._model_dropdown.connect("notify::selected", self._on_model_changed) actions_box.append(self._model_dropdown) + lang_label = Gtk.Label(label="Lang:") + lang_label.add_css_class("dim-label") + actions_box.append(lang_label) + + lang_names = list(LANGUAGES.keys()) + self._lang_dropdown = Gtk.DropDown.new_from_strings(lang_names) + self._lang_dropdown.set_selected(0) + self._lang_dropdown.connect("notify::selected", self._on_lang_changed) + actions_box.append(self._lang_dropdown) + outer.append(actions_box) # Text entry + send @@ -589,15 +636,41 @@ class ChtWindow(Adw.ApplicationWindow): adj.set_value(x + w - page) return False + def _clear_frame_selection(self): + if self._selected_frame and self._selected_frame in self._frame_widgets: + self._frame_widgets[self._selected_frame].remove_css_class("frame-selected") + self._selected_frame = None + + def _clear_transcript_selection(self): + for old_id in self._selected_transcripts: + if old_id in self._transcript_rows: + self._transcript_rows[old_id].remove_css_class("frame-selected") + self._selected_transcripts.clear() + + def _build_selection_message(self, verb: str) -> str | None: + """Build a message from verb + selected frame ref + transcript texts.""" + parts = [verb] + if self._selected_frame: + parts.append(f"@{self._selected_frame}") + if self._selected_transcripts: + texts = [self._transcript_texts[tid] + for tid in self._selected_transcripts + if tid in self._transcript_texts] + if texts: + parts.append(" ".join(texts)) + return " ".join(parts) if len(parts) > 1 else None + def _send_action(self, verb: str): - """Send a predefined action with the selected frame.""" - if not self._selected_frame: - self._append_agent_output("Select a frame first.\n") + """Send a predefined action with selected frame/transcript.""" + msg = self._build_selection_message(verb) + if not msg: + self._append_agent_output("Select a frame or transcript first.\n") return - self._send_message(f"{verb} @{self._selected_frame}") + self._send_message(msg) def _select_frame(self, frame_id: str): """Select a frame thumbnail (or deselect if already selected).""" + self._clear_transcript_selection() # Deselect previous if self._selected_frame and self._selected_frame in self._frame_widgets: self._frame_widgets[self._selected_frame].remove_css_class("frame-selected") @@ -613,22 +686,93 @@ class ChtWindow(Adw.ApplicationWindow): # Scroll after layout settles (idle may fire before allocation) GLib.timeout_add(50, self._scroll_to_frame, widget) + def _select_transcript(self, seg_id, extend=False): + """Select a transcript segment. If extend=True, add to selection.""" + self._clear_frame_selection() + if not extend: + # Clear previous selection + for old_id in self._selected_transcripts: + if old_id in self._transcript_rows: + self._transcript_rows[old_id].remove_css_class("frame-selected") + self._selected_transcripts.clear() + + if seg_id in self._selected_transcripts: + # Deselect if clicking same one (only in non-extend mode) + if not extend: + return + self._selected_transcripts.remove(seg_id) + if seg_id in self._transcript_rows: + self._transcript_rows[seg_id].remove_css_class("frame-selected") + return + + self._selected_transcripts.append(seg_id) + if seg_id in self._transcript_rows: + row = self._transcript_rows[seg_id] + row.add_css_class("frame-selected") + # Scroll row into view + GLib.timeout_add(50, self._scroll_transcript_to_row, row) + + def _scroll_transcript_to_row(self, row): + adj = self._transcript_scroll.get_vadjustment() + alloc = row.get_allocation() + y = alloc.y + h = alloc.height + if h <= 0: + return False + page = adj.get_page_size() + val = adj.get_value() + if y < val: + adj.set_value(y) + elif y + h > val + page: + adj.set_value(y + h - page) + return False + + def _select_adjacent_transcript(self, delta, extend=False): + """Select next/prev transcript segment. Shift extends selection.""" + if not self._transcript_order: + return + if not self._selected_transcripts: + idx = 0 if delta > 0 else len(self._transcript_order) - 1 + else: + last = self._selected_transcripts[-1] + try: + cur = self._transcript_order.index(last) + except ValueError: + cur = 0 + idx = cur + delta + if idx < 0 or idx >= len(self._transcript_order): + return + self._select_transcript(self._transcript_order[idx], extend=extend) + def _on_key_pressed(self, controller, keyval, keycode, state): - """Handle Left/Right arrow for frame selection, Enter for answer.""" + """Keyboard shortcuts: Left/Right=frames, Up/Down=transcript, Enter=answer.""" # Don't intercept when text entry is focused focus = self.get_focus() if isinstance(focus, (Gtk.Entry, Gtk.TextView)): return False + shift = bool(state & Gdk.ModifierType.SHIFT_MASK) + if keyval == Gdk.KEY_Left: + self._clear_transcript_selection() self._select_adjacent_frame(-1) return True elif keyval == Gdk.KEY_Right: + self._clear_transcript_selection() self._select_adjacent_frame(1) return True + elif keyval == Gdk.KEY_Up: + self._clear_frame_selection() + self._select_adjacent_transcript(-1, extend=shift) + return True + elif keyval == Gdk.KEY_Down: + self._clear_frame_selection() + self._select_adjacent_transcript(1, extend=shift) + return True elif keyval in (Gdk.KEY_Return, Gdk.KEY_KP_Enter): - if self._selected_frame: - self._send_action("answer") + msg = self._build_selection_message("answer") + if msg: + self._send_message(msg) return True elif keyval == Gdk.KEY_Delete: self._agent_output_view.get_buffer().set_text("") @@ -845,6 +989,14 @@ class ChtWindow(Adw.ApplicationWindow): buf.apply_tag(tag, buf.get_iter_at_mark(mark), it) buf.delete_mark(mark) + def _on_lang_changed(self, dropdown, _pspec): + idx = dropdown.get_selected() + lang_names = list(LANGUAGES.keys()) + if idx < len(lang_names): + lang_code = LANGUAGES[lang_names[idx]] + self._transcriber.language = lang_code + log.info("Transcript language: %s (%s)", lang_names[idx], lang_code or "auto") + def _on_model_changed(self, dropdown, _pspec): idx = dropdown.get_selected() model = self._agent.available_models[idx] if idx < len(self._agent.available_models) else None @@ -884,14 +1036,35 @@ class ChtWindow(Adw.ApplicationWindow): self._agent_output_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0) def _append_transcript_segments(self, segments): - """Append transcription segments to the transcript panel.""" - buf = self._transcript_view.get_buffer() + """Append transcription segments to the transcript ListBox.""" for seg in segments: m1, s1 = divmod(int(seg.start), 60) m2, s2 = divmod(int(seg.end), 60) - line = f"[{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.id} {seg.text}\n" - buf.insert(buf.get_end_iter(), line) - self._transcript_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0) + text = f"{seg.id} [{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.text}" + + row_label = Gtk.Label(label=text) + row_label.set_xalign(0) + row_label.set_wrap(True) + row_label.set_margin_start(8) + row_label.set_margin_end(8) + row_label.set_margin_top(2) + row_label.set_margin_bottom(2) + + row = Gtk.ListBoxRow() + row.set_child(row_label) + + gesture = Gtk.GestureClick() + gesture.connect("released", lambda g, n, x, y, sid=seg.id: self._select_transcript(sid)) + row.add_controller(gesture) + + self._transcript_list.append(row) + self._transcript_rows[seg.id] = row + self._transcript_texts[seg.id] = seg.text + self._transcript_order.append(seg.id) + + # Auto-scroll to bottom + adj = self._transcript_scroll.get_vadjustment() + GLib.idle_add(lambda: adj.set_value(adj.get_upper()) or False) # -- Frame thumbnails --