From e69fec5aea0e514eebf71dd2f9e54af9043cc719 Mon Sep 17 00:00:00 2001 From: buenosairesam Date: Thu, 9 Apr 2026 14:58:15 -0300 Subject: [PATCH] agent improvement --- cht/agent/claude_sdk_connection.py | 23 +++--- cht/agent/runner.py | 14 ++++ cht/ui/agent_input.py | 25 ++++++ cht/ui/agent_output.py | 128 +++++++++++++++++++---------- cht/window.py | 7 ++ 5 files changed, 141 insertions(+), 56 deletions(-) diff --git a/cht/agent/claude_sdk_connection.py b/cht/agent/claude_sdk_connection.py index dacaa81..d9e9125 100644 --- a/cht/agent/claude_sdk_connection.py +++ b/cht/agent/claude_sdk_connection.py @@ -37,9 +37,11 @@ You help the user understand what happened during their recording session. You have access to frame screenshots extracted from the recording. When frames are mentioned, use the Read tool to view them. Frame timestamps are in seconds from the start of the recording. -You also have tools to search transcripts, get session info, and capture new frames. +You can use any available tools including WebFetch and WebSearch when the user asks you to +look something up. Use them freely — all tools are pre-authorized. -Be concise and specific. Focus on what's visible in the frames.""" +Your primary role is description and analysis, not code generation. Be concise and specific. +Focus on what's visible in the frames and what's in the transcript.""" MODELS = [ "claude-sonnet-4-6", @@ -78,21 +80,16 @@ def _messages_to_prompt(messages: list[Message]) -> str: return "\n".join(lines) -def _tool_schemas(tools: list[Tool]) -> list[str]: - """Extract tool names for the SDK's allowed_tools parameter.""" - # The Claude SDK uses allowed_tools as a list of tool name strings. - # Our custom tools are executed by the runner, not by the SDK, - # so we only pass "Read" to the SDK (for frame viewing). - return ["Read"] - - class ClaudeSDKConnection: """AgentConnection using claude_agent_sdk — requires Claude Code CLI.""" - def __init__(self, cwd: str | None = None, max_turns: int = 5, model: str = MODELS[0]): + def __init__(self, cwd: str | None = None, max_turns: int | None = None, + model: str = MODELS[0], permission_mode: str | None = None): + from cht.config import AGENT_PERMISSION_MODE, AGENT_MAX_TURNS self._cwd = cwd - self._max_turns = max_turns + self._max_turns = max_turns or AGENT_MAX_TURNS self._model = model + self._permission_mode = permission_mode or AGENT_PERMISSION_MODE self._cancelled = False @property @@ -147,9 +144,9 @@ class ClaudeSDKConnection: options=ClaudeAgentOptions( model=self._model, cwd=cwd or ".", - allowed_tools=_tool_schemas(tools), system_prompt=SYSTEM_PROMPT, max_turns=self._max_turns, + permission_mode=self._permission_mode, ), ): if self._cancelled: diff --git a/cht/agent/runner.py b/cht/agent/runner.py index ece3785..d6742bb 100644 --- a/cht/agent/runner.py +++ b/cht/agent/runner.py @@ -177,6 +177,20 @@ class AgentRunner: def model(self, value: str): self._get_connection().set_model(value) + @property + def permission_mode(self) -> str: + conn = self._get_connection() + return getattr(conn, "_permission_mode", "default") + + @permission_mode.setter + def permission_mode(self, value: str): + conn = self._get_connection() + if hasattr(conn, "_permission_mode"): + conn._permission_mode = value + import cht.config + cht.config.AGENT_PERMISSION_MODE = value + log.info("Permission mode set to %s", value) + def clear_history(self): self._thread = AgentThread() diff --git a/cht/ui/agent_input.py b/cht/ui/agent_input.py index b2a7fe0..4b2e9b0 100644 --- a/cht/ui/agent_input.py +++ b/cht/ui/agent_input.py @@ -21,6 +21,7 @@ class AgentInputPanel(Gtk.Frame): "model-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)), "lang-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)), "history-toggled": (GObject.SignalFlags.RUN_FIRST, None, (bool,)), + "permission-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)), } def __init__(self, **kwargs): @@ -68,6 +69,18 @@ class AgentInputPanel(Gtk.Frame): history_toggle.connect("toggled", lambda b: self.emit("history-toggled", b.get_active())) actions_box.append(history_toggle) + perm_label = Gtk.Label(label="Perms:") + perm_label.add_css_class("dim-label") + actions_box.append(perm_label) + + self._perm_modes = ["default", "acceptEdits", "bypassPermissions", "dontAsk", "plan"] + self._perm_labels = ["Default", "Accept Edits", "Bypass All", "Don't Ask", "Plan Only"] + self._perm_dropdown = Gtk.DropDown.new_from_strings(self._perm_labels) + self._perm_dropdown.set_size_request(140, -1) + self._perm_dropdown.set_tooltip_text("Claude SDK permission mode") + self._perm_dropdown.connect("notify::selected", self._on_perm_changed) + actions_box.append(self._perm_dropdown) + outer.append(actions_box) input_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=4) @@ -123,3 +136,15 @@ class AgentInputPanel(Gtk.Frame): if idx < len(self._lang_names): lang_code = LANGUAGES[self._lang_names[idx]] self.emit("lang-changed", lang_code or "") + + def _on_perm_changed(self, dropdown, _pspec): + idx = dropdown.get_selected() + if idx < len(self._perm_modes): + self.emit("permission-changed", self._perm_modes[idx]) + + def set_permission_mode(self, mode: str) -> None: + try: + idx = self._perm_modes.index(mode) + self._perm_dropdown.set_selected(idx) + except ValueError: + pass diff --git a/cht/ui/agent_output.py b/cht/ui/agent_output.py index 0834633..c87c912 100644 --- a/cht/ui/agent_output.py +++ b/cht/ui/agent_output.py @@ -1,4 +1,4 @@ -"""Agent output panel — single TextView, fully selectable/copy-pastable.""" +"""Agent output panel — single TextView, colored message regions, copy-pastable.""" import logging @@ -41,6 +41,11 @@ class AgentOutputPanel(Gtk.Frame): label.set_halign(Gtk.Align.START) header.append(label) + # Spinner for "thinking" feedback + self._spinner = Gtk.Spinner() + self._spinner.set_visible(False) + header.append(self._spinner) + clear_btn = Gtk.Button(label="Clear") clear_btn.add_css_class("flat") clear_btn.connect("clicked", lambda _: self.clear()) @@ -66,52 +71,72 @@ class AgentOutputPanel(Gtk.Frame): self.set_child(box) - # Streaming state: track where the current assistant response starts + # Streaming state self._response_marks: dict[str, Gtk.TextMark] = {} self._response_accum: dict[str, list[str]] = {} def _setup_tags(self): buf = self._view.get_buffer() markdown.setup_tags(buf) + # Message region backgrounds — user indented right, assistant left + buf.create_tag("user-bg", + paragraph_background="#1a2a3a", + left_margin=120, right_margin=12, + pixels_above_lines=6, pixels_below_lines=6) + buf.create_tag("assistant-bg", + paragraph_background="#1a2a1a", + left_margin=12, right_margin=120, + pixels_above_lines=4, pixels_below_lines=4) + # Inline styles buf.create_tag("user-prefix", weight=700, foreground="#7aafff") + buf.create_tag("user-text", foreground="#c8ddf0") buf.create_tag("assistant-prefix", weight=700, foreground="#8bc78b") buf.create_tag("tool-prefix", weight=700, foreground="#d4a053") - buf.create_tag("tool-output", foreground="#aaaaaa", left_margin=16) + buf.create_tag("tool-output", foreground="#aaaaaa", left_margin=24) buf.create_tag("error", foreground="#ff6b6b") - buf.create_tag("ref-chip", foreground="#7aafff", style=Pango.Style.ITALIC) + buf.create_tag("ref-chip", foreground="#5a9fd4", style=Pango.Style.ITALIC) buf.create_tag("status", foreground="#888888") + buf.create_tag("thinking", foreground="#666666", style=Pango.Style.ITALIC) + + # -- helpers -- + + def _insert_tagged(self, text: str, *tags: str): + """Insert text at end and apply tags.""" + buf = self._view.get_buffer() + mark = buf.create_mark(None, buf.get_end_iter(), True) + buf.insert(buf.get_end_iter(), text) + start = buf.get_iter_at_mark(mark) + end = buf.get_end_iter() + for tag in tags: + buf.apply_tag_by_name(tag, start, end) + buf.delete_mark(mark) # -- Public API -- def append(self, text: str) -> None: """Append a status/info line.""" - buf = self._view.get_buffer() - end = buf.get_end_iter() - mark = buf.create_mark(None, end, True) - buf.insert(end, text) - start = buf.get_iter_at_mark(mark) - buf.apply_tag_by_name("status", start, buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged(text, "status") self._scroll_to_bottom() def clear(self) -> None: self._view.get_buffer().set_text("") self._response_marks.clear() self._response_accum.clear() + self._spinner.set_visible(False) + self._spinner.stop() def add_user_message(self, text: str, frames: list | None = None, transcripts: list | None = None) -> None: buf = self._view.get_buffer() - end = buf.get_end_iter() + + # Region start mark + region_mark = buf.create_mark(None, buf.get_end_iter(), True) # Prefix - mark = buf.create_mark(None, end, True) - buf.insert(end, "\n> ") - buf.apply_tag_by_name("user-prefix", buf.get_iter_at_mark(mark), buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged("\n> ", "user-prefix") # Text - buf.insert(buf.get_end_iter(), text) + self._insert_tagged(text, "user-text") # Reference chips refs = [] @@ -124,13 +149,15 @@ class AgentOutputPanel(Gtk.Frame): tid = t.transcript_id if hasattr(t, "transcript_id") else (t.id if hasattr(t, "id") else str(t)) refs.append(tid) if refs: - end = buf.get_end_iter() - mark = buf.create_mark(None, end, True) - buf.insert(end, " [" + ", ".join(refs) + "]") - buf.apply_tag_by_name("ref-chip", buf.get_iter_at_mark(mark), buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged(" [" + ", ".join(refs) + "]", "ref-chip") buf.insert(buf.get_end_iter(), "\n") + + # Apply background to entire user region + buf.apply_tag_by_name("user-bg", + buf.get_iter_at_mark(region_mark), + buf.get_end_iter()) + buf.delete_mark(region_mark) self._scroll_to_bottom() def begin_assistant_message(self, msg_id: str) -> None: @@ -141,63 +168,73 @@ class AgentOutputPanel(Gtk.Frame): self._response_marks[msg_id] = buf.create_mark(f"resp_{msg_id}", end, True) self._response_accum[msg_id] = [] + # Show inline thinking indicator + header spinner + self._insert_tagged("thinking...\n", "thinking") + self._spinner.set_visible(True) + self._spinner.start() + self._scroll_to_bottom() + def append_to_assistant(self, msg_id: str, text: str) -> None: if msg_id not in self._response_accum: return - self._response_accum[msg_id].append(text) buf = self._view.get_buffer() + # On first chunk, clear the "thinking..." placeholder + if not self._response_accum[msg_id]: + mark = self._response_marks.get(msg_id) + if mark: + start = buf.get_iter_at_mark(mark) + buf.delete(start, buf.get_end_iter()) + self._response_accum[msg_id].append(text) buf.insert(buf.get_end_iter(), text) self._scroll_to_bottom() def finish_assistant(self, msg_id: str, full_text: str) -> None: + # Stop spinner + self._spinner.set_visible(False) + self._spinner.stop() + mark = self._response_marks.pop(msg_id, None) self._response_accum.pop(msg_id, None) if not mark: return + buf = self._view.get_buffer() start = buf.get_iter_at_mark(mark) end = buf.get_end_iter() buf.delete(start, end) + + # Re-render with markdown + region_start = buf.create_mark(None, buf.get_iter_at_mark(mark), True) it = buf.get_iter_at_mark(mark) markdown.render(buf, it, full_text) buf.insert(buf.get_end_iter(), "\n") + + # Apply assistant background to rendered region + buf.apply_tag_by_name("assistant-bg", + buf.get_iter_at_mark(region_start), + buf.get_end_iter()) + buf.delete_mark(region_start) buf.delete_mark(mark) def add_tool_call(self, tool_use: ToolUse) -> None: - buf = self._view.get_buffer() - end = buf.get_end_iter() - - mark = buf.create_mark(None, end, True) - buf.insert(end, f" ▶ {tool_use.tool_name}") - buf.apply_tag_by_name("tool-prefix", buf.get_iter_at_mark(mark), buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged(f" ▶ {tool_use.tool_name}", "tool-prefix") if tool_use.input: inp = str(tool_use.input) if len(inp) > 80: inp = inp[:77] + "..." - end = buf.get_end_iter() - mark = buf.create_mark(None, end, True) - buf.insert(end, f" {inp}") - buf.apply_tag_by_name("tool-output", buf.get_iter_at_mark(mark), buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged(f" {inp}", "tool-output") - buf.insert(buf.get_end_iter(), "\n") + self._view.get_buffer().insert(self._view.get_buffer().get_end_iter(), "\n") self._scroll_to_bottom() def update_tool_result(self, tool_use_id: str, result: ToolResult) -> None: - buf = self._view.get_buffer() text = result.error or result.output or "" if not text: return - end = buf.get_end_iter() - mark = buf.create_mark(None, end, True) - # Indent tool output indented = "\n".join(f" {line}" for line in text.split("\n")) tag = "error" if result.error else "tool-output" - buf.insert(end, indented + "\n") - buf.apply_tag_by_name(tag, buf.get_iter_at_mark(mark), buf.get_end_iter()) - buf.delete_mark(mark) + self._insert_tagged(indented + "\n", tag) self._scroll_to_bottom() def load_thread(self, thread: Thread) -> None: @@ -219,9 +256,14 @@ class AgentOutputPanel(Gtk.Frame): elif isinstance(msg, AssistantMessage): text = " ".join(b.text for b in msg.content if isinstance(b, TextBlock)) buf = self._view.get_buffer() + region_mark = buf.create_mark(None, buf.get_end_iter(), True) it = buf.get_end_iter() markdown.render(buf, it, text) buf.insert(buf.get_end_iter(), "\n") + buf.apply_tag_by_name("assistant-bg", + buf.get_iter_at_mark(region_mark), + buf.get_end_iter()) + buf.delete_mark(region_mark) elif isinstance(msg, ToolUse): self.add_tool_call(msg) elif isinstance(msg, ToolResult): diff --git a/cht/window.py b/cht/window.py index f6e8823..4bf5508 100644 --- a/cht/window.py +++ b/cht/window.py @@ -555,6 +555,7 @@ class ChtWindow(Adw.ApplicationWindow): self._agent_input.connect("model-changed", self._on_model_changed) self._agent_input.connect("lang-changed", self._on_lang_changed) self._agent_input.connect("history-toggled", lambda p, v: setattr(self._agent, "include_history", v)) + self._agent_input.connect("permission-changed", self._on_permission_changed) right_box.append(self._agent_input) return right_box @@ -678,10 +679,16 @@ class ChtWindow(Adw.ApplicationWindow): self._agent.model = model log.info("Model switched to %s", model) + def _on_permission_changed(self, _panel, mode): + self._agent.permission_mode = mode + log.info("Permission mode switched to %s", mode) + def _populate_model_dropdown(self): self._agent_input.populate_models( self._agent.available_models, self._agent.model ) + from cht.config import AGENT_PERMISSION_MODE + self._agent_input.set_permission_mode(AGENT_PERMISSION_MODE) def _check_agent_auth(self): import os