transcript updates

This commit is contained in:
2026-04-03 03:18:09 -03:00
parent db3b94a6a1
commit d14390a649
3 changed files with 64 additions and 9 deletions

View File

@@ -59,32 +59,75 @@ class TranscriberEngine:
return []
self._ensure_model()
try:
kwargs = {"beam_size": 5, "vad_filter": True}
kwargs = {
"beam_size": 5,
"vad_filter": True,
"condition_on_previous_text": True,
}
if self.language:
kwargs["language"] = self.language
# Feed last transcript text as context for better continuity
if self._segments:
kwargs["initial_prompt"] = self._segments[-1].text
segments_iter, info = self._model.transcribe(str(wav_path), **kwargs)
except Exception as e:
log.error("Whisper transcription failed: %s", e)
return []
# Group whisper segments: new T-ID every N lines or on silence gap (>1s)
from cht.config import TRANSCRIBE_LINES_PER_GROUP
lines_per_group = TRANSCRIBE_LINES_PER_GROUP
SILENCE_GAP_S = 1.0
raw_segs = []
for seg in segments_iter:
text = seg.text.strip()
if text:
raw_segs.append((time_offset + seg.start, time_offset + seg.end, text))
new_segments = []
with self._lock:
if self._stopped:
return []
for seg in segments_iter:
text = seg.text.strip()
if not text:
continue
group_start = None
group_end = None
group_lines = []
prev_end = None
def _flush():
nonlocal group_start, group_end, group_lines
if not group_lines:
return
tid = f"T{self._next_id:04d}"
self._next_id += 1
entry = TranscriptSegment(
id=tid,
start=time_offset + seg.start,
end=time_offset + seg.end,
text=text,
start=group_start,
end=group_end,
text=" ".join(group_lines),
)
self._segments.append(entry)
new_segments.append(entry)
group_lines = []
group_start = None
group_end = None
for start, end, text in raw_segs:
# Silence gap → flush current group
if prev_end is not None and start - prev_end > SILENCE_GAP_S:
_flush()
if group_start is None:
group_start = start
group_end = end
group_lines.append(text)
prev_end = end
if len(group_lines) >= lines_per_group:
_flush()
_flush()
return new_segments