diff --git a/cht/agent/openai_compat_provider.py b/cht/agent/openai_compat_provider.py index a2b559e..6b34e7c 100644 --- a/cht/agent/openai_compat_provider.py +++ b/cht/agent/openai_compat_provider.py @@ -74,8 +74,7 @@ class OpenAICompatProvider(AgentProvider): f"Total frames: {len(context.frames)}\n" ) - # Include mentioned frames as images, fall back to last 3 frames - frames_to_send = context.mentioned_frames or context.frames[-3:] + frames_to_send = context.mentioned_frames content: list[dict] = [{"type": "text", "text": ctx_text + message}] for frame in frames_to_send: diff --git a/cht/stream/ffmpeg.py b/cht/stream/ffmpeg.py index 7a14a34..fe523a7 100644 --- a/cht/stream/ffmpeg.py +++ b/cht/stream/ffmpeg.py @@ -111,9 +111,14 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10, except ffmpeg.Error as e: # ffmpeg may exit non-zero on growing files (corrupt tail) but still # produce valid frames. Return the stderr for parsing anyway. - log.debug("ffmpeg exited with error (may still have valid frames)") - stdout = e.stdout or b"" stderr = e.stderr or b"" + err_text = stderr.decode("utf-8", errors="replace") + # Log the last meaningful line so we can see the real cause + for line in reversed(err_text.splitlines()): + if line.strip() and not line.startswith(" "): + log.debug("ffmpeg scene error: %s", line.strip()) + break + stdout = e.stdout or b"" return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace") diff --git a/cht/window.py b/cht/window.py index 05f18bd..255b115 100644 --- a/cht/window.py +++ b/cht/window.py @@ -331,6 +331,7 @@ class ChtWindow(Adw.ApplicationWindow): self._agent_output_view.set_cursor_visible(False) self._agent_output_view.set_left_margin(8) self._agent_output_view.set_right_margin(8) + self._setup_md_tags(self._agent_output_view.get_buffer()) scroll = Gtk.ScrolledWindow() scroll.set_vexpand(True) @@ -392,25 +393,183 @@ class ChtWindow(Adw.ApplicationWindow): stream_mgr=self._stream_mgr, tracker=self._tracker, on_chunk=lambda chunk: GLib.idle_add(self._replace_thinking, chunk), - on_done=lambda err: GLib.idle_add( - self._append_agent_output, - f"[Error: {err}]\n" if err else "\n" - ), + on_done=lambda err: GLib.idle_add(self._on_agent_done, err), ) self._thinking_replaced = False + self._response_start_mark = None + self._response_accum = [] def _replace_thinking(self, chunk: str): - """Replace the '…' placeholder with the first chunk, then append normally.""" + """Replace the '…' placeholder with the first real chunk.""" + buf = self._agent_output_view.get_buffer() if not self._thinking_replaced: self._thinking_replaced = True - buf = self._agent_output_view.get_buffer() - # Remove the trailing '…\n' (3 chars) end = buf.get_end_iter() start = end.copy() - start.backward_chars(2) + start.backward_chars(2) # remove '…\n' buf.delete(start, end) + # Mark where the response starts for later MD formatting + self._response_start_mark = buf.create_mark( + None, buf.get_end_iter(), left_gravity=True + ) + self._response_accum.append(chunk) self._append_agent_output(chunk) + def _on_agent_done(self, err: str | None): + if err: + self._append_agent_output(f"[Error: {err}]\n") + return + # Re-render accumulated response with markdown formatting + if self._response_start_mark and self._response_accum: + buf = self._agent_output_view.get_buffer() + start = buf.get_iter_at_mark(self._response_start_mark) + end = buf.get_end_iter() + buf.delete(start, end) + self._render_md(buf, start, "".join(self._response_accum)) + buf.delete_mark(self._response_start_mark) + self._append_agent_output("\n") + + def _setup_md_tags(self, buf): + buf.create_tag("h1", weight=700, scale=1.4) + buf.create_tag("h2", weight=700, scale=1.2) + buf.create_tag("h3", weight=700, scale=1.05) + buf.create_tag("bold", weight=700) + buf.create_tag("italic", style=2) # Pango.Style.ITALIC = 2 + buf.create_tag("code", family="monospace", background="#2a2a2a", foreground="#e8e8e8") + buf.create_tag("codeblock", family="monospace", background="#1e1e1e", + foreground="#e8e8e8", left_margin=16, right_margin=16, + pixels_above_lines=4, pixels_below_lines=4) + buf.create_tag("bullet", left_margin=16) + + def _render_md(self, buf, insert_iter, text: str): + """Insert markdown-formatted text into buf at insert_iter using text tags.""" + import re + lines = text.split("\n") + i = 0 + while i < len(lines): + line = lines[i] + + # Fenced code block + if line.startswith("```"): + lang = line[3:].strip() + block_lines = [] + i += 1 + while i < len(lines) and not lines[i].startswith("```"): + block_lines.append(lines[i]) + i += 1 + code = "\n".join(block_lines) + "\n" + block_mark = buf.create_mark(None, insert_iter, True) + self._insert_highlighted_code(buf, insert_iter, code, lang) + buf.apply_tag_by_name("codeblock", buf.get_iter_at_mark(block_mark), insert_iter) + buf.delete_mark(block_mark) + buf.insert(insert_iter, "\n") + i += 1 + continue + + # Headers + header_match = re.match(r"^(#{1,3})\s+(.*)", line) + if header_match: + level = len(header_match.group(1)) + content = header_match.group(2) + tag = f"h{level}" + mark = buf.create_mark(None, insert_iter, True) + buf.insert(insert_iter, content + "\n") + buf.apply_tag_by_name(tag, buf.get_iter_at_mark(mark), insert_iter) + i += 1 + continue + + # Bullet points + bullet_match = re.match(r"^[\-\*]\s+(.*)", line) + if bullet_match: + self._insert_inline(buf, insert_iter, "• " + bullet_match.group(1), "bullet") + buf.insert(insert_iter, "\n") + i += 1 + continue + + # Inline formatting pass on normal lines + self._insert_inline_line(buf, insert_iter, line) + buf.insert(insert_iter, "\n") + i += 1 + + def _insert_inline(self, buf, it, text: str, outer_tag: str | None = None): + mark = buf.create_mark(None, it, True) + self._insert_inline_line(buf, it, text) + if outer_tag: + buf.apply_tag_by_name(outer_tag, buf.get_iter_at_mark(mark), it) + + def _insert_inline_line(self, buf, it, text: str): + """Insert text with inline bold/italic/code formatting.""" + import re + pattern = re.compile(r"(\*\*(.+?)\*\*|__(.+?)__|" + r"\*(.+?)\*|_(.+?)_|" + r"`([^`]+?)`)") + pos = 0 + for m in pattern.finditer(text): + # Plain text before match + if m.start() > pos: + buf.insert(it, text[pos:m.start()]) + full = m.group(0) + if full.startswith("**") or full.startswith("__"): + inner = m.group(2) or m.group(3) + mark = buf.create_mark(None, it, True) + buf.insert(it, inner) + buf.apply_tag_by_name("bold", buf.get_iter_at_mark(mark), it) + elif full.startswith("*") or full.startswith("_"): + inner = m.group(4) or m.group(5) + mark = buf.create_mark(None, it, True) + buf.insert(it, inner) + buf.apply_tag_by_name("italic", buf.get_iter_at_mark(mark), it) + elif full.startswith("`"): + inner = m.group(6) + mark = buf.create_mark(None, it, True) + buf.insert(it, inner) + buf.apply_tag_by_name("code", buf.get_iter_at_mark(mark), it) + pos = m.end() + if pos < len(text): + buf.insert(it, text[pos:]) + + def _insert_highlighted_code(self, buf, it, code: str, lang: str): + """Insert syntax-highlighted code using Pygments token tags.""" + try: + from pygments.lexers import get_lexer_by_name, guess_lexer + from pygments.styles import get_style_by_name + except ImportError: + buf.insert(it, code) + return + + try: + lexer = get_lexer_by_name(lang, stripall=False) if lang else guess_lexer(code) + except Exception: + try: + lexer = guess_lexer(code) + except Exception: + buf.insert(it, code) + return + + try: + style = get_style_by_name("monokai") + except Exception: + buf.insert(it, code) + return + + tag_table = buf.get_tag_table() + for ttype, value in lexer.get_tokens(code): + tag_name = f"pyg_{ttype}" + tag = tag_table.lookup(tag_name) + if tag is None: + sf = style.style_for_token(ttype) + tag = buf.create_tag(tag_name, family="monospace") + if sf.get("color"): + tag.set_property("foreground", f"#{sf['color']}") + if sf.get("bold"): + tag.set_property("weight", 700) + if sf.get("italic"): + tag.set_property("style", 2) + mark = buf.create_mark(None, it, True) + buf.insert(it, value) + buf.apply_tag(tag, buf.get_iter_at_mark(mark), it) + buf.delete_mark(mark) + def _check_agent_auth(self): import os if os.environ.get("GROQ_API_KEY") or os.environ.get("OPENAI_API_KEY"): @@ -480,7 +639,7 @@ class ChtWindow(Adw.ApplicationWindow): # Click to highlight — does NOT switch mode or seek # (future: jump to timestamp in scrub bar without leaving live) gesture = Gtk.GestureClick() - gesture.connect("released", lambda g, n, x, y: log.debug("Frame clicked: %s at %.1fs", frame_id, timestamp)) + gesture.connect("released", lambda g, n, x, y, fid=frame_id: self._send_message(f"solve this @{fid}")) box.add_controller(gesture) self._frames_strip.append(box) diff --git a/pyproject.toml b/pyproject.toml index 7d309f5..b9d4d96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "numpy", "claude-agent-sdk", "openai", + "pygments", ] [tool.setuptools.packages.find] diff --git a/sender/stream_av.sh b/sender/stream_av.sh index 33bf6ae..f8e6b6e 100755 --- a/sender/stream_av.sh +++ b/sender/stream_av.sh @@ -6,15 +6,15 @@ # Requires: sudo for kmsgrab, PulseAudio for audio capture # Audio is non-blocking (monitor source = passive tap) # -# Auto-restarts on stall: a watchdog checks ffmpeg's frame counter -# and kills/restarts if video freezes (DRM/VAAPI contention from -# other apps using the GPU, e.g. video calls). +# Auto-restarts on stall: watchdog checks output bytes + frame counter. +# Also restarts immediately on fatal kmsgrab errors (DRM plane format +# change from fullscreen / direct-scanout). set -uo pipefail RECEIVER_IP="${1:-mcrndeb}" PORT="${2:-4444}" -STALL_TIMEOUT=10 # seconds with no frame progress before restart +STALL_TIMEOUT=10 # seconds with no progress before restart # Let root access the user's PulseAudio session REAL_UID="${SUDO_UID:-$(id -u)}" @@ -33,7 +33,14 @@ echo "Streaming to: ${RECEIVER_IP}:${PORT}" ulimit -n 65536 PROGRESS_FILE=$(mktemp) -trap 'rm -f "$PROGRESS_FILE"' EXIT +FFLOG=$(mktemp) +FFPID="" + +cleanup() { + [ -n "$FFPID" ] && kill "$FFPID" 2>/dev/null && wait "$FFPID" 2>/dev/null + rm -f "$PROGRESS_FILE" "$FFLOG" +} +trap cleanup EXIT INT TERM start_ffmpeg() { local args=( @@ -60,38 +67,57 @@ start_ffmpeg() { -hide_banner -progress "$PROGRESS_FILE" ) - "${args[@]}" & + > "$FFLOG" + "${args[@]}" 2>"$FFLOG" & echo $! } -get_frame_count() { - # -progress file writes key=value pairs; frame= is the video frame counter - grep -oP '^frame=\K[0-9]+' "$PROGRESS_FILE" 2>/dev/null | tail -1 +get_progress_val() { + grep -oP "^${1}=\K[0-9]+" "$PROGRESS_FILE" 2>/dev/null | tail -1 +} + +fatal_kmsgrab_error() { + grep -q "framebuffer format changed\|Error during demuxing" "$FFLOG" 2>/dev/null } while true; do echo "--- Starting sender $(date) ---" - > "$PROGRESS_FILE" # reset + > "$PROGRESS_FILE" FFPID=$(start_ffmpeg) echo "ffmpeg started: pid=$FFPID" + last_bytes=0 last_frame=0 stall_since=$SECONDS while kill -0 "$FFPID" 2>/dev/null; do - sleep 2 + sleep 1 - cur_frame=$(get_frame_count) + # Immediate restart on fatal kmsgrab errors (DRM plane format change) + if fatal_kmsgrab_error; then + echo "Fatal kmsgrab error — restarting immediately" + kill "$FFPID" 2>/dev/null + wait "$FFPID" 2>/dev/null + break + fi + + cur_bytes=$(get_progress_val total_size) + cur_bytes=${cur_bytes:-0} + cur_frame=$(get_progress_val frame) cur_frame=${cur_frame:-0} - if (( cur_frame > last_frame )); then + # Either metric advancing counts as healthy: + # total_size: catches TCP output stalls (muxer blocked) + # frame: catches kmsgrab/encoder stalls (audio keeps total_size ticking) + if (( cur_bytes > last_bytes || cur_frame > last_frame )); then + last_bytes=$cur_bytes last_frame=$cur_frame stall_since=$SECONDS fi if (( SECONDS - stall_since > STALL_TIMEOUT )); then - echo "Video stalled at frame $last_frame for ${STALL_TIMEOUT}s — killing ffmpeg" + echo "Stream stalled at frame ${last_frame} / ${last_bytes}B for ${STALL_TIMEOUT}s — killing ffmpeg" kill "$FFPID" 2>/dev/null wait "$FFPID" 2>/dev/null break