audio and transcript
This commit is contained in:
@@ -18,12 +18,22 @@ class FrameRef:
|
||||
timestamp: float # seconds into recording
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranscriptRef:
|
||||
id: str # "T0001"
|
||||
start: float # seconds into recording
|
||||
end: float # seconds into recording
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionContext:
|
||||
session_dir: Path
|
||||
frames: list[FrameRef] # all captured frames so far
|
||||
duration: float # current recording duration (seconds)
|
||||
mentioned_frames: list[FrameRef] = field(default_factory=list) # @-referenced in message
|
||||
mentioned_frames: list[FrameRef] = field(default_factory=list)
|
||||
transcript_segments: list[TranscriptRef] = field(default_factory=list)
|
||||
mentioned_transcripts: list[TranscriptRef] = field(default_factory=list)
|
||||
|
||||
|
||||
class AgentProvider(ABC):
|
||||
|
||||
@@ -47,6 +47,21 @@ def _build_prompt(message: str, context: SessionContext) -> str:
|
||||
fm, fs = divmod(int(f.timestamp), 60)
|
||||
lines.append(f" {f.id} at {fm:02d}:{fs:02d} — {f.path}")
|
||||
|
||||
# Transcript
|
||||
if context.transcript_segments:
|
||||
lines.append(f"\nTranscript ({len(context.transcript_segments)} segments):")
|
||||
for t in context.transcript_segments:
|
||||
tm1, ts1 = divmod(int(t.start), 60)
|
||||
tm2, ts2 = divmod(int(t.end), 60)
|
||||
lines.append(f" {t.id} [{tm1:02d}:{ts1:02d}-{tm2:02d}:{ts2:02d}] {t.text}")
|
||||
|
||||
if context.mentioned_transcripts:
|
||||
lines.append("\nTranscript segments referenced in this message:")
|
||||
for t in context.mentioned_transcripts:
|
||||
tm1, ts1 = divmod(int(t.start), 60)
|
||||
tm2, ts2 = divmod(int(t.end), 60)
|
||||
lines.append(f" {t.id} [{tm1:02d}:{ts1:02d}-{tm2:02d}:{ts2:02d}] {t.text}")
|
||||
|
||||
lines.append(f"\nUser message: {message}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -95,10 +95,17 @@ class OpenAICompatProvider(AgentProvider):
|
||||
|
||||
# Build context header
|
||||
m, s = divmod(int(context.duration), 60)
|
||||
ctx_text = (
|
||||
f"Recording duration: {m:02d}:{s:02d}\n"
|
||||
f"Total frames: {len(context.frames)}\n"
|
||||
)
|
||||
ctx_lines = [
|
||||
f"Recording duration: {m:02d}:{s:02d}",
|
||||
f"Total frames: {len(context.frames)}",
|
||||
]
|
||||
if context.transcript_segments:
|
||||
ctx_lines.append(f"\nTranscript ({len(context.transcript_segments)} segments):")
|
||||
for t in context.transcript_segments:
|
||||
tm1, ts1 = divmod(int(t.start), 60)
|
||||
tm2, ts2 = divmod(int(t.end), 60)
|
||||
ctx_lines.append(f" {t.id} [{tm1:02d}:{ts1:02d}-{tm2:02d}:{ts2:02d}] {t.text}")
|
||||
ctx_text = "\n".join(ctx_lines) + "\n"
|
||||
|
||||
frames_to_send = context.mentioned_frames
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import Callable
|
||||
|
||||
from cht.agent.base import AgentProvider, FrameRef, SessionContext
|
||||
from cht.agent.base import AgentProvider, FrameRef, TranscriptRef, SessionContext
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -98,6 +98,33 @@ def _load_frames(frames_dir: Path) -> list[FrameRef]:
|
||||
return []
|
||||
|
||||
|
||||
def _load_transcript(transcript_dir: Path) -> list[TranscriptRef]:
|
||||
index_path = transcript_dir / "index.json"
|
||||
if not index_path.exists():
|
||||
return []
|
||||
try:
|
||||
entries = json.loads(index_path.read_text())
|
||||
return [TranscriptRef(**e) for e in entries]
|
||||
except Exception as e:
|
||||
log.warning("Could not load transcript index: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def _parse_transcript_mentions(message: str, segments: list[TranscriptRef]) -> list[TranscriptRef]:
|
||||
"""Extract @T references from message. Accepts @T0001, @t1, @T1."""
|
||||
mentioned = []
|
||||
seen = set()
|
||||
for match in re.finditer(r"@[Tt](\d+)", message):
|
||||
num = int(match.group(1))
|
||||
tid = f"T{num:04d}"
|
||||
if tid not in seen:
|
||||
seg = next((s for s in segments if s.id == tid), None)
|
||||
if seg:
|
||||
mentioned.append(seg)
|
||||
seen.add(tid)
|
||||
return mentioned
|
||||
|
||||
|
||||
class AgentRunner:
|
||||
"""Runs agent queries in a background thread, streams chunks to a callback."""
|
||||
|
||||
@@ -152,12 +179,16 @@ class AgentRunner:
|
||||
try:
|
||||
provider = self._get_provider()
|
||||
frames = _load_frames(stream_mgr.frames_dir)
|
||||
mentioned = _parse_mentions(message, frames)
|
||||
mentioned_frames = _parse_mentions(message, frames)
|
||||
transcript = _load_transcript(stream_mgr.transcript_dir)
|
||||
mentioned_transcripts = _parse_transcript_mentions(message, transcript)
|
||||
context = SessionContext(
|
||||
session_dir=stream_mgr.session_dir,
|
||||
frames=frames,
|
||||
duration=tracker.duration if tracker else 0.0,
|
||||
mentioned_frames=mentioned,
|
||||
mentioned_frames=mentioned_frames,
|
||||
transcript_segments=transcript,
|
||||
mentioned_transcripts=mentioned_transcripts,
|
||||
)
|
||||
for chunk in provider.stream(message, context):
|
||||
on_chunk(chunk)
|
||||
|
||||
Reference in New Issue
Block a user