agent improvement

This commit is contained in:
2026-04-09 14:58:15 -03:00
parent 64ecdca71e
commit e69fec5aea
5 changed files with 141 additions and 56 deletions

View File

@@ -37,9 +37,11 @@ You help the user understand what happened during their recording session.
You have access to frame screenshots extracted from the recording. When frames are mentioned, You have access to frame screenshots extracted from the recording. When frames are mentioned,
use the Read tool to view them. Frame timestamps are in seconds from the start of the recording. use the Read tool to view them. Frame timestamps are in seconds from the start of the recording.
You also have tools to search transcripts, get session info, and capture new frames. You can use any available tools including WebFetch and WebSearch when the user asks you to
look something up. Use them freely — all tools are pre-authorized.
Be concise and specific. Focus on what's visible in the frames.""" Your primary role is description and analysis, not code generation. Be concise and specific.
Focus on what's visible in the frames and what's in the transcript."""
MODELS = [ MODELS = [
"claude-sonnet-4-6", "claude-sonnet-4-6",
@@ -78,21 +80,16 @@ def _messages_to_prompt(messages: list[Message]) -> str:
return "\n".join(lines) return "\n".join(lines)
def _tool_schemas(tools: list[Tool]) -> list[str]:
"""Extract tool names for the SDK's allowed_tools parameter."""
# The Claude SDK uses allowed_tools as a list of tool name strings.
# Our custom tools are executed by the runner, not by the SDK,
# so we only pass "Read" to the SDK (for frame viewing).
return ["Read"]
class ClaudeSDKConnection: class ClaudeSDKConnection:
"""AgentConnection using claude_agent_sdk — requires Claude Code CLI.""" """AgentConnection using claude_agent_sdk — requires Claude Code CLI."""
def __init__(self, cwd: str | None = None, max_turns: int = 5, model: str = MODELS[0]): def __init__(self, cwd: str | None = None, max_turns: int | None = None,
model: str = MODELS[0], permission_mode: str | None = None):
from cht.config import AGENT_PERMISSION_MODE, AGENT_MAX_TURNS
self._cwd = cwd self._cwd = cwd
self._max_turns = max_turns self._max_turns = max_turns or AGENT_MAX_TURNS
self._model = model self._model = model
self._permission_mode = permission_mode or AGENT_PERMISSION_MODE
self._cancelled = False self._cancelled = False
@property @property
@@ -147,9 +144,9 @@ class ClaudeSDKConnection:
options=ClaudeAgentOptions( options=ClaudeAgentOptions(
model=self._model, model=self._model,
cwd=cwd or ".", cwd=cwd or ".",
allowed_tools=_tool_schemas(tools),
system_prompt=SYSTEM_PROMPT, system_prompt=SYSTEM_PROMPT,
max_turns=self._max_turns, max_turns=self._max_turns,
permission_mode=self._permission_mode,
), ),
): ):
if self._cancelled: if self._cancelled:

View File

@@ -177,6 +177,20 @@ class AgentRunner:
def model(self, value: str): def model(self, value: str):
self._get_connection().set_model(value) self._get_connection().set_model(value)
@property
def permission_mode(self) -> str:
conn = self._get_connection()
return getattr(conn, "_permission_mode", "default")
@permission_mode.setter
def permission_mode(self, value: str):
conn = self._get_connection()
if hasattr(conn, "_permission_mode"):
conn._permission_mode = value
import cht.config
cht.config.AGENT_PERMISSION_MODE = value
log.info("Permission mode set to %s", value)
def clear_history(self): def clear_history(self):
self._thread = AgentThread() self._thread = AgentThread()

View File

@@ -21,6 +21,7 @@ class AgentInputPanel(Gtk.Frame):
"model-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)), "model-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)),
"lang-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)), "lang-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)),
"history-toggled": (GObject.SignalFlags.RUN_FIRST, None, (bool,)), "history-toggled": (GObject.SignalFlags.RUN_FIRST, None, (bool,)),
"permission-changed": (GObject.SignalFlags.RUN_FIRST, None, (str,)),
} }
def __init__(self, **kwargs): def __init__(self, **kwargs):
@@ -68,6 +69,18 @@ class AgentInputPanel(Gtk.Frame):
history_toggle.connect("toggled", lambda b: self.emit("history-toggled", b.get_active())) history_toggle.connect("toggled", lambda b: self.emit("history-toggled", b.get_active()))
actions_box.append(history_toggle) actions_box.append(history_toggle)
perm_label = Gtk.Label(label="Perms:")
perm_label.add_css_class("dim-label")
actions_box.append(perm_label)
self._perm_modes = ["default", "acceptEdits", "bypassPermissions", "dontAsk", "plan"]
self._perm_labels = ["Default", "Accept Edits", "Bypass All", "Don't Ask", "Plan Only"]
self._perm_dropdown = Gtk.DropDown.new_from_strings(self._perm_labels)
self._perm_dropdown.set_size_request(140, -1)
self._perm_dropdown.set_tooltip_text("Claude SDK permission mode")
self._perm_dropdown.connect("notify::selected", self._on_perm_changed)
actions_box.append(self._perm_dropdown)
outer.append(actions_box) outer.append(actions_box)
input_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=4) input_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=4)
@@ -123,3 +136,15 @@ class AgentInputPanel(Gtk.Frame):
if idx < len(self._lang_names): if idx < len(self._lang_names):
lang_code = LANGUAGES[self._lang_names[idx]] lang_code = LANGUAGES[self._lang_names[idx]]
self.emit("lang-changed", lang_code or "") self.emit("lang-changed", lang_code or "")
def _on_perm_changed(self, dropdown, _pspec):
idx = dropdown.get_selected()
if idx < len(self._perm_modes):
self.emit("permission-changed", self._perm_modes[idx])
def set_permission_mode(self, mode: str) -> None:
try:
idx = self._perm_modes.index(mode)
self._perm_dropdown.set_selected(idx)
except ValueError:
pass

View File

@@ -1,4 +1,4 @@
"""Agent output panel — single TextView, fully selectable/copy-pastable.""" """Agent output panel — single TextView, colored message regions, copy-pastable."""
import logging import logging
@@ -41,6 +41,11 @@ class AgentOutputPanel(Gtk.Frame):
label.set_halign(Gtk.Align.START) label.set_halign(Gtk.Align.START)
header.append(label) header.append(label)
# Spinner for "thinking" feedback
self._spinner = Gtk.Spinner()
self._spinner.set_visible(False)
header.append(self._spinner)
clear_btn = Gtk.Button(label="Clear") clear_btn = Gtk.Button(label="Clear")
clear_btn.add_css_class("flat") clear_btn.add_css_class("flat")
clear_btn.connect("clicked", lambda _: self.clear()) clear_btn.connect("clicked", lambda _: self.clear())
@@ -66,52 +71,72 @@ class AgentOutputPanel(Gtk.Frame):
self.set_child(box) self.set_child(box)
# Streaming state: track where the current assistant response starts # Streaming state
self._response_marks: dict[str, Gtk.TextMark] = {} self._response_marks: dict[str, Gtk.TextMark] = {}
self._response_accum: dict[str, list[str]] = {} self._response_accum: dict[str, list[str]] = {}
def _setup_tags(self): def _setup_tags(self):
buf = self._view.get_buffer() buf = self._view.get_buffer()
markdown.setup_tags(buf) markdown.setup_tags(buf)
# Message region backgrounds — user indented right, assistant left
buf.create_tag("user-bg",
paragraph_background="#1a2a3a",
left_margin=120, right_margin=12,
pixels_above_lines=6, pixels_below_lines=6)
buf.create_tag("assistant-bg",
paragraph_background="#1a2a1a",
left_margin=12, right_margin=120,
pixels_above_lines=4, pixels_below_lines=4)
# Inline styles
buf.create_tag("user-prefix", weight=700, foreground="#7aafff") buf.create_tag("user-prefix", weight=700, foreground="#7aafff")
buf.create_tag("user-text", foreground="#c8ddf0")
buf.create_tag("assistant-prefix", weight=700, foreground="#8bc78b") buf.create_tag("assistant-prefix", weight=700, foreground="#8bc78b")
buf.create_tag("tool-prefix", weight=700, foreground="#d4a053") buf.create_tag("tool-prefix", weight=700, foreground="#d4a053")
buf.create_tag("tool-output", foreground="#aaaaaa", left_margin=16) buf.create_tag("tool-output", foreground="#aaaaaa", left_margin=24)
buf.create_tag("error", foreground="#ff6b6b") buf.create_tag("error", foreground="#ff6b6b")
buf.create_tag("ref-chip", foreground="#7aafff", style=Pango.Style.ITALIC) buf.create_tag("ref-chip", foreground="#5a9fd4", style=Pango.Style.ITALIC)
buf.create_tag("status", foreground="#888888") buf.create_tag("status", foreground="#888888")
buf.create_tag("thinking", foreground="#666666", style=Pango.Style.ITALIC)
# -- helpers --
def _insert_tagged(self, text: str, *tags: str):
"""Insert text at end and apply tags."""
buf = self._view.get_buffer()
mark = buf.create_mark(None, buf.get_end_iter(), True)
buf.insert(buf.get_end_iter(), text)
start = buf.get_iter_at_mark(mark)
end = buf.get_end_iter()
for tag in tags:
buf.apply_tag_by_name(tag, start, end)
buf.delete_mark(mark)
# -- Public API -- # -- Public API --
def append(self, text: str) -> None: def append(self, text: str) -> None:
"""Append a status/info line.""" """Append a status/info line."""
buf = self._view.get_buffer() self._insert_tagged(text, "status")
end = buf.get_end_iter()
mark = buf.create_mark(None, end, True)
buf.insert(end, text)
start = buf.get_iter_at_mark(mark)
buf.apply_tag_by_name("status", start, buf.get_end_iter())
buf.delete_mark(mark)
self._scroll_to_bottom() self._scroll_to_bottom()
def clear(self) -> None: def clear(self) -> None:
self._view.get_buffer().set_text("") self._view.get_buffer().set_text("")
self._response_marks.clear() self._response_marks.clear()
self._response_accum.clear() self._response_accum.clear()
self._spinner.set_visible(False)
self._spinner.stop()
def add_user_message(self, text: str, frames: list | None = None, def add_user_message(self, text: str, frames: list | None = None,
transcripts: list | None = None) -> None: transcripts: list | None = None) -> None:
buf = self._view.get_buffer() buf = self._view.get_buffer()
end = buf.get_end_iter()
# Region start mark
region_mark = buf.create_mark(None, buf.get_end_iter(), True)
# Prefix # Prefix
mark = buf.create_mark(None, end, True) self._insert_tagged("\n> ", "user-prefix")
buf.insert(end, "\n> ")
buf.apply_tag_by_name("user-prefix", buf.get_iter_at_mark(mark), buf.get_end_iter())
buf.delete_mark(mark)
# Text # Text
buf.insert(buf.get_end_iter(), text) self._insert_tagged(text, "user-text")
# Reference chips # Reference chips
refs = [] refs = []
@@ -124,13 +149,15 @@ class AgentOutputPanel(Gtk.Frame):
tid = t.transcript_id if hasattr(t, "transcript_id") else (t.id if hasattr(t, "id") else str(t)) tid = t.transcript_id if hasattr(t, "transcript_id") else (t.id if hasattr(t, "id") else str(t))
refs.append(tid) refs.append(tid)
if refs: if refs:
end = buf.get_end_iter() self._insert_tagged(" [" + ", ".join(refs) + "]", "ref-chip")
mark = buf.create_mark(None, end, True)
buf.insert(end, " [" + ", ".join(refs) + "]")
buf.apply_tag_by_name("ref-chip", buf.get_iter_at_mark(mark), buf.get_end_iter())
buf.delete_mark(mark)
buf.insert(buf.get_end_iter(), "\n") buf.insert(buf.get_end_iter(), "\n")
# Apply background to entire user region
buf.apply_tag_by_name("user-bg",
buf.get_iter_at_mark(region_mark),
buf.get_end_iter())
buf.delete_mark(region_mark)
self._scroll_to_bottom() self._scroll_to_bottom()
def begin_assistant_message(self, msg_id: str) -> None: def begin_assistant_message(self, msg_id: str) -> None:
@@ -141,63 +168,73 @@ class AgentOutputPanel(Gtk.Frame):
self._response_marks[msg_id] = buf.create_mark(f"resp_{msg_id}", end, True) self._response_marks[msg_id] = buf.create_mark(f"resp_{msg_id}", end, True)
self._response_accum[msg_id] = [] self._response_accum[msg_id] = []
# Show inline thinking indicator + header spinner
self._insert_tagged("thinking...\n", "thinking")
self._spinner.set_visible(True)
self._spinner.start()
self._scroll_to_bottom()
def append_to_assistant(self, msg_id: str, text: str) -> None: def append_to_assistant(self, msg_id: str, text: str) -> None:
if msg_id not in self._response_accum: if msg_id not in self._response_accum:
return return
self._response_accum[msg_id].append(text)
buf = self._view.get_buffer() buf = self._view.get_buffer()
# On first chunk, clear the "thinking..." placeholder
if not self._response_accum[msg_id]:
mark = self._response_marks.get(msg_id)
if mark:
start = buf.get_iter_at_mark(mark)
buf.delete(start, buf.get_end_iter())
self._response_accum[msg_id].append(text)
buf.insert(buf.get_end_iter(), text) buf.insert(buf.get_end_iter(), text)
self._scroll_to_bottom() self._scroll_to_bottom()
def finish_assistant(self, msg_id: str, full_text: str) -> None: def finish_assistant(self, msg_id: str, full_text: str) -> None:
# Stop spinner
self._spinner.set_visible(False)
self._spinner.stop()
mark = self._response_marks.pop(msg_id, None) mark = self._response_marks.pop(msg_id, None)
self._response_accum.pop(msg_id, None) self._response_accum.pop(msg_id, None)
if not mark: if not mark:
return return
buf = self._view.get_buffer() buf = self._view.get_buffer()
start = buf.get_iter_at_mark(mark) start = buf.get_iter_at_mark(mark)
end = buf.get_end_iter() end = buf.get_end_iter()
buf.delete(start, end) buf.delete(start, end)
# Re-render with markdown
region_start = buf.create_mark(None, buf.get_iter_at_mark(mark), True)
it = buf.get_iter_at_mark(mark) it = buf.get_iter_at_mark(mark)
markdown.render(buf, it, full_text) markdown.render(buf, it, full_text)
buf.insert(buf.get_end_iter(), "\n") buf.insert(buf.get_end_iter(), "\n")
# Apply assistant background to rendered region
buf.apply_tag_by_name("assistant-bg",
buf.get_iter_at_mark(region_start),
buf.get_end_iter())
buf.delete_mark(region_start)
buf.delete_mark(mark) buf.delete_mark(mark)
def add_tool_call(self, tool_use: ToolUse) -> None: def add_tool_call(self, tool_use: ToolUse) -> None:
buf = self._view.get_buffer() self._insert_tagged(f"{tool_use.tool_name}", "tool-prefix")
end = buf.get_end_iter()
mark = buf.create_mark(None, end, True)
buf.insert(end, f"{tool_use.tool_name}")
buf.apply_tag_by_name("tool-prefix", buf.get_iter_at_mark(mark), buf.get_end_iter())
buf.delete_mark(mark)
if tool_use.input: if tool_use.input:
inp = str(tool_use.input) inp = str(tool_use.input)
if len(inp) > 80: if len(inp) > 80:
inp = inp[:77] + "..." inp = inp[:77] + "..."
end = buf.get_end_iter() self._insert_tagged(f" {inp}", "tool-output")
mark = buf.create_mark(None, end, True)
buf.insert(end, f" {inp}")
buf.apply_tag_by_name("tool-output", buf.get_iter_at_mark(mark), buf.get_end_iter())
buf.delete_mark(mark)
buf.insert(buf.get_end_iter(), "\n") self._view.get_buffer().insert(self._view.get_buffer().get_end_iter(), "\n")
self._scroll_to_bottom() self._scroll_to_bottom()
def update_tool_result(self, tool_use_id: str, result: ToolResult) -> None: def update_tool_result(self, tool_use_id: str, result: ToolResult) -> None:
buf = self._view.get_buffer()
text = result.error or result.output or "" text = result.error or result.output or ""
if not text: if not text:
return return
end = buf.get_end_iter()
mark = buf.create_mark(None, end, True)
# Indent tool output
indented = "\n".join(f" {line}" for line in text.split("\n")) indented = "\n".join(f" {line}" for line in text.split("\n"))
tag = "error" if result.error else "tool-output" tag = "error" if result.error else "tool-output"
buf.insert(end, indented + "\n") self._insert_tagged(indented + "\n", tag)
buf.apply_tag_by_name(tag, buf.get_iter_at_mark(mark), buf.get_end_iter())
buf.delete_mark(mark)
self._scroll_to_bottom() self._scroll_to_bottom()
def load_thread(self, thread: Thread) -> None: def load_thread(self, thread: Thread) -> None:
@@ -219,9 +256,14 @@ class AgentOutputPanel(Gtk.Frame):
elif isinstance(msg, AssistantMessage): elif isinstance(msg, AssistantMessage):
text = " ".join(b.text for b in msg.content if isinstance(b, TextBlock)) text = " ".join(b.text for b in msg.content if isinstance(b, TextBlock))
buf = self._view.get_buffer() buf = self._view.get_buffer()
region_mark = buf.create_mark(None, buf.get_end_iter(), True)
it = buf.get_end_iter() it = buf.get_end_iter()
markdown.render(buf, it, text) markdown.render(buf, it, text)
buf.insert(buf.get_end_iter(), "\n") buf.insert(buf.get_end_iter(), "\n")
buf.apply_tag_by_name("assistant-bg",
buf.get_iter_at_mark(region_mark),
buf.get_end_iter())
buf.delete_mark(region_mark)
elif isinstance(msg, ToolUse): elif isinstance(msg, ToolUse):
self.add_tool_call(msg) self.add_tool_call(msg)
elif isinstance(msg, ToolResult): elif isinstance(msg, ToolResult):

View File

@@ -555,6 +555,7 @@ class ChtWindow(Adw.ApplicationWindow):
self._agent_input.connect("model-changed", self._on_model_changed) self._agent_input.connect("model-changed", self._on_model_changed)
self._agent_input.connect("lang-changed", self._on_lang_changed) self._agent_input.connect("lang-changed", self._on_lang_changed)
self._agent_input.connect("history-toggled", lambda p, v: setattr(self._agent, "include_history", v)) self._agent_input.connect("history-toggled", lambda p, v: setattr(self._agent, "include_history", v))
self._agent_input.connect("permission-changed", self._on_permission_changed)
right_box.append(self._agent_input) right_box.append(self._agent_input)
return right_box return right_box
@@ -678,10 +679,16 @@ class ChtWindow(Adw.ApplicationWindow):
self._agent.model = model self._agent.model = model
log.info("Model switched to %s", model) log.info("Model switched to %s", model)
def _on_permission_changed(self, _panel, mode):
self._agent.permission_mode = mode
log.info("Permission mode switched to %s", mode)
def _populate_model_dropdown(self): def _populate_model_dropdown(self):
self._agent_input.populate_models( self._agent_input.populate_models(
self._agent.available_models, self._agent.model self._agent.available_models, self._agent.model
) )
from cht.config import AGENT_PERMISSION_MODE
self._agent_input.set_permission_mode(AGENT_PERMISSION_MODE)
def _check_agent_auth(self): def _check_agent_auth(self):
import os import os