This commit is contained in:
2026-04-03 00:25:14 -03:00
parent d61e2a5492
commit cae9312db1
7 changed files with 341 additions and 91 deletions

View File

@@ -51,15 +51,31 @@ def _resolve_provider() -> AgentProvider:
return ClaudeSDKProvider()
def _expand_ref_nums(spec: str) -> list[int]:
"""Expand a ref spec like '2-6' or '2,4,6' or '2-4,6,8-10' into sorted ints."""
nums = set()
for part in spec.split(","):
part = part.strip()
if "-" in part:
a, b = part.split("-", 1)
try:
nums.update(range(int(a), int(b) + 1))
except ValueError:
pass
elif part:
try:
nums.add(int(part))
except ValueError:
pass
return sorted(nums)
def _parse_mentions(message: str, frames: list[FrameRef]) -> list[FrameRef]:
"""Extract @-references from message. Accepts:
@F0001 @f1 @1 @001 — all match frame F0001
"""
"""Extract @F references. Accepts @F1, @F2-6, @F2,4,6, @F2-4,6,8-10."""
mentioned = []
seen = set()
for match in re.finditer(r"@([Ff]?\d+)", message):
raw = match.group(1).lstrip("Ff")
num = int(raw)
for match in re.finditer(r"@[Ff]([\d,\-]+)", message):
for num in _expand_ref_nums(match.group(1)):
fid = f"F{num:04d}"
if fid not in seen:
frame = next((f for f in frames if f.id == fid), None)
@@ -111,11 +127,11 @@ def _load_transcript(transcript_dir: Path) -> list[TranscriptRef]:
def _parse_transcript_mentions(message: str, segments: list[TranscriptRef]) -> list[TranscriptRef]:
"""Extract @T references from message. Accepts @T0001, @t1, @T1."""
"""Extract @T references. Accepts @T1, @T2-6, @T2,4,6, @T1-3,5,7-10."""
mentioned = []
seen = set()
for match in re.finditer(r"@[Tt](\d+)", message):
num = int(match.group(1))
for match in re.finditer(r"@[Tt]([\d,\-]+)", message):
for num in _expand_ref_nums(match.group(1)):
tid = f"T{num:04d}"
if tid not in seen:
seg = next((s for s in segments if s.id == tid), None)

View File

@@ -1,5 +1,6 @@
import logging
import os
import signal
import sys
import threading
import gi
@@ -19,13 +20,30 @@ class ChtApp(Adw.Application):
application_id=APP_ID,
flags=Gio.ApplicationFlags.DEFAULT_FLAGS,
)
# Let GLib handle SIGINT/SIGTERM so Ctrl+C triggers graceful shutdown
GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._on_signal)
GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._on_signal)
def _on_signal(self):
log = logging.getLogger("cht")
log.info("Signal received — shutting down gracefully")
self.quit()
return GLib.SOURCE_REMOVE
def do_shutdown(self):
# Ensure all windows tear down before the process exits
for win in self.get_windows():
if hasattr(win, "teardown"):
win.teardown()
Adw.Application.do_shutdown(self)
def do_activate(self):
win = self.props.active_window
if not win:
css = Gtk.CssProvider()
css.load_from_string(
".frame-selected { border: 3px solid @accent_color; border-radius: 6px; }"
".frame-selected { border: 3px solid @accent_color; border-radius: 6px; }\n"
"row.frame-selected { background: alpha(@accent_color, 0.25); border: none; border-radius: 0; }"
)
Gtk.StyleContext.add_provider_for_display(
Gdk.Display.get_default(),

View File

@@ -131,7 +131,9 @@ class StreamManager:
def start_recorder(self):
"""Start ffmpeg to receive TCP stream, write to fMP4, and relay to UDP."""
self._segment = 0
# Start after existing segments (for resumed sessions)
existing = self.recording_segments
self._segment = len(existing)
self._launch_recorder()
def restart_recorder(self):

View File

@@ -7,11 +7,18 @@ and persists to transcript/index.json in the session directory.
import json
import logging
import threading
from dataclasses import dataclass, asdict
from pathlib import Path
log = logging.getLogger(__name__)
LANGUAGES = {
"Auto": None,
"English": "en",
"Spanish": "es",
}
@dataclass
class TranscriptSegment:
@@ -30,6 +37,9 @@ class TranscriberEngine:
self._device = device
self._segments: list[TranscriptSegment] = []
self._next_id = 1
self._lock = threading.Lock()
self._stopped = False
self.language = None # None = auto-detect, "en", "es", etc.
def _ensure_model(self):
if self._model is not None:
@@ -45,18 +55,22 @@ class TranscriberEngine:
def transcribe_chunk(self, wav_path, time_offset=0.0) -> list[TranscriptSegment]:
"""Transcribe a WAV chunk. Returns new segments with absolute timestamps."""
if self._stopped:
return []
self._ensure_model()
try:
segments_iter, _info = self._model.transcribe(
str(wav_path),
beam_size=5,
vad_filter=True,
)
kwargs = {"beam_size": 5, "vad_filter": True}
if self.language:
kwargs["language"] = self.language
segments_iter, info = self._model.transcribe(str(wav_path), **kwargs)
except Exception as e:
log.error("Whisper transcription failed: %s", e)
return []
new_segments = []
with self._lock:
if self._stopped:
return []
for seg in segments_iter:
text = seg.text.strip()
if not text:
@@ -78,6 +92,9 @@ class TranscriberEngine:
return list(self._segments)
def save_index(self, path: Path):
with self._lock:
if self._stopped:
return
data = [asdict(s) for s in self._segments]
path.write_text(json.dumps(data, indent=2))
@@ -87,12 +104,16 @@ class TranscriberEngine:
except Exception as e:
log.warning("Failed to load transcript index: %s", e)
return
with self._lock:
self._segments = [TranscriptSegment(**e) for e in data]
if self._segments:
last_num = max(int(s.id.lstrip("T")) for s in self._segments)
self._next_id = last_num + 1
self._stopped = False
log.info("Loaded %d transcript segments", len(self._segments))
def reset(self):
with self._lock:
self._stopped = True
self._segments.clear()
self._next_id = 1

View File

@@ -99,7 +99,20 @@ class MonitorWidget(Gtk.Box):
elif self._review_player:
self._review_player.screenshot(path)
def reset(self):
"""Reset for session transition — keep players alive, just unload content."""
log.info("Resetting monitor")
self._live_source_url = None
self._recording_path = None
self._live_loaded = False
if self._live_player:
self._live_player.command("stop")
if self._review_player:
self._review_player.command("stop")
self._stack.set_visible_child_name("live")
def stop(self):
"""Full teardown — terminates mpv players. Only call on app exit."""
log.info("Stopping monitor")
if self._live_player:
self._live_player.terminate()

View File

@@ -106,6 +106,13 @@ class Player:
log.info("mpv load_live: %s", url)
self._player.loadfile(str(url), mode="replace")
def command(self, *args):
"""Send a command to mpv."""
try:
self._player.command(*args)
except Exception:
pass
def play(self):
"""Resume/start playback."""
self._player.pause = False

View File

@@ -17,7 +17,7 @@ from cht.ui.timeline import Timeline, TimelineControls
from cht.ui.monitor import MonitorWidget
from cht.ui.waveform import WaveformWidget
from cht.audio.waveform import WaveformEngine
from cht.transcriber.engine import TranscriberEngine
from cht.transcriber.engine import TranscriberEngine, LANGUAGES
from cht.stream.manager import StreamManager, list_sessions
from cht.stream.tracker import RecordingTracker
from cht.agent.runner import AgentRunner, ACTIONS, check_claude_cli
@@ -38,6 +38,10 @@ class ChtWindow(Adw.ApplicationWindow):
self._selected_frame = None # currently selected frame ID
self._frame_widgets = {} # frame_id → outer Box widget
self._frame_order = [] # ordered list of frame IDs
self._transcript_order = [] # ordered list of transcript segment IDs
self._transcript_rows = {} # segment_id → ListBoxRow
self._transcript_texts = {} # segment_id → text (clean, no timestamps)
self._selected_transcripts = [] # ordered list of selected transcript IDs
# Timeline is the central state machine
self._timeline = Timeline()
@@ -85,14 +89,17 @@ class ChtWindow(Adw.ApplicationWindow):
log.info("Window initialized")
GLib.idle_add(self._start_stream)
GLib.idle_add(self._check_agent_auth)
def _on_connect_clicked(self, button):
if self._streaming:
self._stop_stream()
self._stop_stream(reload_session=True)
else:
self._start_stream()
# If a session is loaded, continue it; otherwise start fresh
session_id = self._stream_mgr.session_id if self._stream_mgr else None
if self._stream_mgr:
self._stop_stream() # clean teardown first
self._start_stream(session_id=session_id)
def _on_load_session_clicked(self, button):
sessions = list_sessions()
@@ -203,7 +210,7 @@ class ChtWindow(Adw.ApplicationWindow):
# Set up agent auth/model if not already done
self._populate_model_dropdown()
def _start_stream(self):
def _start_stream(self, session_id=None):
log.info("Starting stream...")
self._connect_btn.set_label("Disconnect")
self._connect_btn.remove_css_class("suggested-action")
@@ -211,8 +218,8 @@ class ChtWindow(Adw.ApplicationWindow):
self._streaming = True
self._gone_live = False
# Create session
self._stream_mgr = StreamManager()
# Continue existing session or create new one
self._stream_mgr = StreamManager(session_id=session_id)
self._stream_mgr.setup_dirs()
# Start ffmpeg recorder (listens for sender, relays to UDP)
@@ -244,6 +251,17 @@ class ChtWindow(Adw.ApplicationWindow):
# Watchdog: restart recorder on crash/disconnect
GLib.timeout_add(2000, self._check_recorder)
# If resuming a session, reload existing frames/transcript/waveform
if session_id:
self._load_existing_frames()
transcript_index = self._stream_mgr.transcript_dir / "index.json"
if transcript_index.exists():
self._transcriber.load_index(transcript_index)
segs = self._transcriber.all_segments()
if segs:
self._append_transcript_segments(segs)
self.set_title(f"{APP_NAME}{self._stream_mgr.session_id}")
log.info("Waiting for sender...")
def _go_live_once(self):
@@ -277,6 +295,8 @@ class ChtWindow(Adw.ApplicationWindow):
def _on_new_audio(self, wav_path, start_time, duration):
"""Called from audio extractor thread with new WAV chunk."""
if not self._stream_mgr:
return
# Compute waveform peaks (fast, ~1ms)
self._waveform_engine.append_chunk(wav_path, start_time)
peaks = self._waveform_engine.peaks
@@ -284,12 +304,12 @@ class ChtWindow(Adw.ApplicationWindow):
GLib.idle_add(self._waveform_widget.set_peaks, peaks.copy(), bucket_dur)
# Transcribe in separate thread (GPU-bound, ~1-2s per chunk)
mgr = self._stream_mgr # capture ref before thread starts
def _transcribe():
new_segs = self._transcriber.transcribe_chunk(wav_path, time_offset=start_time)
if self._stream_mgr:
self._transcriber.save_index(
self._stream_mgr.transcript_dir / "index.json"
)
if mgr:
self._transcriber.save_index(mgr.transcript_dir / "index.json")
if new_segs:
GLib.idle_add(self._append_transcript_segments, new_segs)
@@ -311,14 +331,12 @@ class ChtWindow(Adw.ApplicationWindow):
pos = self._monitor.get_live_position()
self._timeline.toggle_live(live_player_pos=pos)
def _stop_stream(self):
def _stop_stream(self, reload_session=False):
log.info("Stopping stream...")
self._timeline.reset()
self._monitor.stop()
self._waveform_engine.reset()
self._waveform_widget.set_peaks(None, 0.05)
self._transcriber.reset()
self._transcript_view.get_buffer().set_text("")
# Remember session for reload
last_session_id = self._stream_mgr.session_id if self._stream_mgr and not self._stream_mgr.readonly else None
# Stop background threads first (sets stop flags, kills procs)
if self._tracker:
self._tracker.stop()
self._tracker = None
@@ -326,6 +344,18 @@ class ChtWindow(Adw.ApplicationWindow):
if not self._stream_mgr.readonly:
self._stream_mgr.stop_all()
self._stream_mgr = None
# Then clean up UI
self._timeline.reset()
self._monitor.reset()
self._waveform_engine.reset()
self._waveform_widget.set_peaks(None, 0.05)
self._transcriber.reset()
self._transcript_order.clear()
self._transcript_rows.clear()
self._transcript_texts.clear()
self._selected_transcripts.clear()
while child := self._transcript_list.get_first_child():
self._transcript_list.remove(child)
self._known_frames = set()
self._selected_frame = None
self._frame_widgets = {}
@@ -341,8 +371,19 @@ class ChtWindow(Adw.ApplicationWindow):
self._streaming = False
self.set_title(APP_NAME)
# Reload last session in review mode
if reload_session and last_session_id:
GLib.idle_add(self._load_session, last_session_id)
def _on_close(self, *args):
self.teardown()
def teardown(self):
"""Full cleanup for app exit — safe to call multiple times."""
if self._stream_mgr or self._streaming:
self._stop_stream()
# Terminate mpv players and GL contexts (only on app exit)
self._monitor.stop()
# -- Right panels --
@@ -467,18 +508,14 @@ class ChtWindow(Adw.ApplicationWindow):
label.set_margin_bottom(4)
box.append(label)
self._transcript_view = Gtk.TextView()
self._transcript_view.set_editable(False)
self._transcript_view.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
self._transcript_view.set_cursor_visible(False)
self._transcript_view.set_left_margin(8)
self._transcript_view.set_right_margin(8)
self._transcript_list = Gtk.ListBox()
self._transcript_list.set_selection_mode(Gtk.SelectionMode.NONE)
scroll = Gtk.ScrolledWindow()
scroll.set_vexpand(True)
scroll.set_min_content_height(150)
scroll.set_child(self._transcript_view)
box.append(scroll)
self._transcript_scroll = Gtk.ScrolledWindow()
self._transcript_scroll.set_vexpand(True)
self._transcript_scroll.set_min_content_height(150)
self._transcript_scroll.set_child(self._transcript_list)
box.append(self._transcript_scroll)
frame = Gtk.Frame()
frame.set_child(box)
@@ -553,6 +590,16 @@ class ChtWindow(Adw.ApplicationWindow):
self._model_dropdown.connect("notify::selected", self._on_model_changed)
actions_box.append(self._model_dropdown)
lang_label = Gtk.Label(label="Lang:")
lang_label.add_css_class("dim-label")
actions_box.append(lang_label)
lang_names = list(LANGUAGES.keys())
self._lang_dropdown = Gtk.DropDown.new_from_strings(lang_names)
self._lang_dropdown.set_selected(0)
self._lang_dropdown.connect("notify::selected", self._on_lang_changed)
actions_box.append(self._lang_dropdown)
outer.append(actions_box)
# Text entry + send
@@ -589,15 +636,41 @@ class ChtWindow(Adw.ApplicationWindow):
adj.set_value(x + w - page)
return False
def _clear_frame_selection(self):
if self._selected_frame and self._selected_frame in self._frame_widgets:
self._frame_widgets[self._selected_frame].remove_css_class("frame-selected")
self._selected_frame = None
def _clear_transcript_selection(self):
for old_id in self._selected_transcripts:
if old_id in self._transcript_rows:
self._transcript_rows[old_id].remove_css_class("frame-selected")
self._selected_transcripts.clear()
def _build_selection_message(self, verb: str) -> str | None:
"""Build a message from verb + selected frame ref + transcript texts."""
parts = [verb]
if self._selected_frame:
parts.append(f"@{self._selected_frame}")
if self._selected_transcripts:
texts = [self._transcript_texts[tid]
for tid in self._selected_transcripts
if tid in self._transcript_texts]
if texts:
parts.append(" ".join(texts))
return " ".join(parts) if len(parts) > 1 else None
def _send_action(self, verb: str):
"""Send a predefined action with the selected frame."""
if not self._selected_frame:
self._append_agent_output("Select a frame first.\n")
"""Send a predefined action with selected frame/transcript."""
msg = self._build_selection_message(verb)
if not msg:
self._append_agent_output("Select a frame or transcript first.\n")
return
self._send_message(f"{verb} @{self._selected_frame}")
self._send_message(msg)
def _select_frame(self, frame_id: str):
"""Select a frame thumbnail (or deselect if already selected)."""
self._clear_transcript_selection()
# Deselect previous
if self._selected_frame and self._selected_frame in self._frame_widgets:
self._frame_widgets[self._selected_frame].remove_css_class("frame-selected")
@@ -613,22 +686,93 @@ class ChtWindow(Adw.ApplicationWindow):
# Scroll after layout settles (idle may fire before allocation)
GLib.timeout_add(50, self._scroll_to_frame, widget)
def _select_transcript(self, seg_id, extend=False):
"""Select a transcript segment. If extend=True, add to selection."""
self._clear_frame_selection()
if not extend:
# Clear previous selection
for old_id in self._selected_transcripts:
if old_id in self._transcript_rows:
self._transcript_rows[old_id].remove_css_class("frame-selected")
self._selected_transcripts.clear()
if seg_id in self._selected_transcripts:
# Deselect if clicking same one (only in non-extend mode)
if not extend:
return
self._selected_transcripts.remove(seg_id)
if seg_id in self._transcript_rows:
self._transcript_rows[seg_id].remove_css_class("frame-selected")
return
self._selected_transcripts.append(seg_id)
if seg_id in self._transcript_rows:
row = self._transcript_rows[seg_id]
row.add_css_class("frame-selected")
# Scroll row into view
GLib.timeout_add(50, self._scroll_transcript_to_row, row)
def _scroll_transcript_to_row(self, row):
adj = self._transcript_scroll.get_vadjustment()
alloc = row.get_allocation()
y = alloc.y
h = alloc.height
if h <= 0:
return False
page = adj.get_page_size()
val = adj.get_value()
if y < val:
adj.set_value(y)
elif y + h > val + page:
adj.set_value(y + h - page)
return False
def _select_adjacent_transcript(self, delta, extend=False):
"""Select next/prev transcript segment. Shift extends selection."""
if not self._transcript_order:
return
if not self._selected_transcripts:
idx = 0 if delta > 0 else len(self._transcript_order) - 1
else:
last = self._selected_transcripts[-1]
try:
cur = self._transcript_order.index(last)
except ValueError:
cur = 0
idx = cur + delta
if idx < 0 or idx >= len(self._transcript_order):
return
self._select_transcript(self._transcript_order[idx], extend=extend)
def _on_key_pressed(self, controller, keyval, keycode, state):
"""Handle Left/Right arrow for frame selection, Enter for answer."""
"""Keyboard shortcuts: Left/Right=frames, Up/Down=transcript, Enter=answer."""
# Don't intercept when text entry is focused
focus = self.get_focus()
if isinstance(focus, (Gtk.Entry, Gtk.TextView)):
return False
shift = bool(state & Gdk.ModifierType.SHIFT_MASK)
if keyval == Gdk.KEY_Left:
self._clear_transcript_selection()
self._select_adjacent_frame(-1)
return True
elif keyval == Gdk.KEY_Right:
self._clear_transcript_selection()
self._select_adjacent_frame(1)
return True
elif keyval == Gdk.KEY_Up:
self._clear_frame_selection()
self._select_adjacent_transcript(-1, extend=shift)
return True
elif keyval == Gdk.KEY_Down:
self._clear_frame_selection()
self._select_adjacent_transcript(1, extend=shift)
return True
elif keyval in (Gdk.KEY_Return, Gdk.KEY_KP_Enter):
if self._selected_frame:
self._send_action("answer")
msg = self._build_selection_message("answer")
if msg:
self._send_message(msg)
return True
elif keyval == Gdk.KEY_Delete:
self._agent_output_view.get_buffer().set_text("")
@@ -845,6 +989,14 @@ class ChtWindow(Adw.ApplicationWindow):
buf.apply_tag(tag, buf.get_iter_at_mark(mark), it)
buf.delete_mark(mark)
def _on_lang_changed(self, dropdown, _pspec):
idx = dropdown.get_selected()
lang_names = list(LANGUAGES.keys())
if idx < len(lang_names):
lang_code = LANGUAGES[lang_names[idx]]
self._transcriber.language = lang_code
log.info("Transcript language: %s (%s)", lang_names[idx], lang_code or "auto")
def _on_model_changed(self, dropdown, _pspec):
idx = dropdown.get_selected()
model = self._agent.available_models[idx] if idx < len(self._agent.available_models) else None
@@ -884,14 +1036,35 @@ class ChtWindow(Adw.ApplicationWindow):
self._agent_output_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
def _append_transcript_segments(self, segments):
"""Append transcription segments to the transcript panel."""
buf = self._transcript_view.get_buffer()
"""Append transcription segments to the transcript ListBox."""
for seg in segments:
m1, s1 = divmod(int(seg.start), 60)
m2, s2 = divmod(int(seg.end), 60)
line = f"[{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.id} {seg.text}\n"
buf.insert(buf.get_end_iter(), line)
self._transcript_view.scroll_to_iter(buf.get_end_iter(), 0, False, 0, 0)
text = f"{seg.id} [{m1:02d}:{s1:02d}-{m2:02d}:{s2:02d}] {seg.text}"
row_label = Gtk.Label(label=text)
row_label.set_xalign(0)
row_label.set_wrap(True)
row_label.set_margin_start(8)
row_label.set_margin_end(8)
row_label.set_margin_top(2)
row_label.set_margin_bottom(2)
row = Gtk.ListBoxRow()
row.set_child(row_label)
gesture = Gtk.GestureClick()
gesture.connect("released", lambda g, n, x, y, sid=seg.id: self._select_transcript(sid))
row.add_controller(gesture)
self._transcript_list.append(row)
self._transcript_rows[seg.id] = row
self._transcript_texts[seg.id] = seg.text
self._transcript_order.append(seg.id)
# Auto-scroll to bottom
adj = self._transcript_scroll.get_vadjustment()
GLib.idle_add(lambda: adj.set_value(adj.get_upper()) or False)
# -- Frame thumbnails --