audio and transcript

This commit is contained in:
2026-04-02 22:57:21 -03:00
parent 0b5575f3b3
commit d61e2a5492
13 changed files with 556 additions and 11 deletions

View File

@@ -15,7 +15,7 @@ from pathlib import Path
from threading import Thread
from typing import Callable
from cht.agent.base import AgentProvider, FrameRef, SessionContext
from cht.agent.base import AgentProvider, FrameRef, TranscriptRef, SessionContext
log = logging.getLogger(__name__)
@@ -98,6 +98,33 @@ def _load_frames(frames_dir: Path) -> list[FrameRef]:
return []
def _load_transcript(transcript_dir: Path) -> list[TranscriptRef]:
index_path = transcript_dir / "index.json"
if not index_path.exists():
return []
try:
entries = json.loads(index_path.read_text())
return [TranscriptRef(**e) for e in entries]
except Exception as e:
log.warning("Could not load transcript index: %s", e)
return []
def _parse_transcript_mentions(message: str, segments: list[TranscriptRef]) -> list[TranscriptRef]:
"""Extract @T references from message. Accepts @T0001, @t1, @T1."""
mentioned = []
seen = set()
for match in re.finditer(r"@[Tt](\d+)", message):
num = int(match.group(1))
tid = f"T{num:04d}"
if tid not in seen:
seg = next((s for s in segments if s.id == tid), None)
if seg:
mentioned.append(seg)
seen.add(tid)
return mentioned
class AgentRunner:
"""Runs agent queries in a background thread, streams chunks to a callback."""
@@ -152,12 +179,16 @@ class AgentRunner:
try:
provider = self._get_provider()
frames = _load_frames(stream_mgr.frames_dir)
mentioned = _parse_mentions(message, frames)
mentioned_frames = _parse_mentions(message, frames)
transcript = _load_transcript(stream_mgr.transcript_dir)
mentioned_transcripts = _parse_transcript_mentions(message, transcript)
context = SessionContext(
session_dir=stream_mgr.session_dir,
frames=frames,
duration=tracker.duration if tracker else 0.0,
mentioned_frames=mentioned,
mentioned_frames=mentioned_frames,
transcript_segments=transcript,
mentioned_transcripts=mentioned_transcripts,
)
for chunk in provider.stream(message, context):
on_chunk(chunk)