transcript updates
This commit is contained in:
@@ -26,5 +26,7 @@ AUDIO_SAFETY_MARGIN = 2 # seconds safety margin (matches scene detector)
|
|||||||
WAVEFORM_BUCKET_MS = 50 # milliseconds per waveform peak bucket
|
WAVEFORM_BUCKET_MS = 50 # milliseconds per waveform peak bucket
|
||||||
|
|
||||||
# Transcription
|
# Transcription
|
||||||
WHISPER_MODEL = "small" # "small" for speed, "medium" for accuracy
|
WHISPER_MODEL = "medium" # "small" for speed, "medium" for accuracy
|
||||||
WHISPER_DEVICE = "cuda" # "cuda" or "cpu"
|
WHISPER_DEVICE = "cuda" # "cuda" or "cpu"
|
||||||
|
TRANSCRIBE_MIN_CHUNK_S = 5 # minimum seconds of audio before transcribing
|
||||||
|
TRANSCRIBE_LINES_PER_GROUP = 3 # whisper segments grouped per transcript ID (1-5)
|
||||||
|
|||||||
@@ -39,6 +39,16 @@ def list_sessions():
|
|||||||
return sessions
|
return sessions
|
||||||
|
|
||||||
|
|
||||||
|
def delete_sessions(session_ids):
|
||||||
|
"""Delete session directories by ID."""
|
||||||
|
import shutil
|
||||||
|
for sid in session_ids:
|
||||||
|
path = SESSIONS_DIR / sid
|
||||||
|
if path.exists() and path.is_dir():
|
||||||
|
shutil.rmtree(path)
|
||||||
|
log.info("Deleted session: %s", sid)
|
||||||
|
|
||||||
|
|
||||||
class StreamManager:
|
class StreamManager:
|
||||||
def __init__(self, session_id=None):
|
def __init__(self, session_id=None):
|
||||||
if session_id is None:
|
if session_id is None:
|
||||||
|
|||||||
@@ -59,32 +59,75 @@ class TranscriberEngine:
|
|||||||
return []
|
return []
|
||||||
self._ensure_model()
|
self._ensure_model()
|
||||||
try:
|
try:
|
||||||
kwargs = {"beam_size": 5, "vad_filter": True}
|
kwargs = {
|
||||||
|
"beam_size": 5,
|
||||||
|
"vad_filter": True,
|
||||||
|
"condition_on_previous_text": True,
|
||||||
|
}
|
||||||
if self.language:
|
if self.language:
|
||||||
kwargs["language"] = self.language
|
kwargs["language"] = self.language
|
||||||
|
# Feed last transcript text as context for better continuity
|
||||||
|
if self._segments:
|
||||||
|
kwargs["initial_prompt"] = self._segments[-1].text
|
||||||
segments_iter, info = self._model.transcribe(str(wav_path), **kwargs)
|
segments_iter, info = self._model.transcribe(str(wav_path), **kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("Whisper transcription failed: %s", e)
|
log.error("Whisper transcription failed: %s", e)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Group whisper segments: new T-ID every N lines or on silence gap (>1s)
|
||||||
|
from cht.config import TRANSCRIBE_LINES_PER_GROUP
|
||||||
|
lines_per_group = TRANSCRIBE_LINES_PER_GROUP
|
||||||
|
SILENCE_GAP_S = 1.0
|
||||||
|
|
||||||
|
raw_segs = []
|
||||||
|
for seg in segments_iter:
|
||||||
|
text = seg.text.strip()
|
||||||
|
if text:
|
||||||
|
raw_segs.append((time_offset + seg.start, time_offset + seg.end, text))
|
||||||
|
|
||||||
new_segments = []
|
new_segments = []
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if self._stopped:
|
if self._stopped:
|
||||||
return []
|
return []
|
||||||
for seg in segments_iter:
|
|
||||||
text = seg.text.strip()
|
group_start = None
|
||||||
if not text:
|
group_end = None
|
||||||
continue
|
group_lines = []
|
||||||
|
prev_end = None
|
||||||
|
|
||||||
|
def _flush():
|
||||||
|
nonlocal group_start, group_end, group_lines
|
||||||
|
if not group_lines:
|
||||||
|
return
|
||||||
tid = f"T{self._next_id:04d}"
|
tid = f"T{self._next_id:04d}"
|
||||||
self._next_id += 1
|
self._next_id += 1
|
||||||
entry = TranscriptSegment(
|
entry = TranscriptSegment(
|
||||||
id=tid,
|
id=tid,
|
||||||
start=time_offset + seg.start,
|
start=group_start,
|
||||||
end=time_offset + seg.end,
|
end=group_end,
|
||||||
text=text,
|
text=" ".join(group_lines),
|
||||||
)
|
)
|
||||||
self._segments.append(entry)
|
self._segments.append(entry)
|
||||||
new_segments.append(entry)
|
new_segments.append(entry)
|
||||||
|
group_lines = []
|
||||||
|
group_start = None
|
||||||
|
group_end = None
|
||||||
|
|
||||||
|
for start, end, text in raw_segs:
|
||||||
|
# Silence gap → flush current group
|
||||||
|
if prev_end is not None and start - prev_end > SILENCE_GAP_S:
|
||||||
|
_flush()
|
||||||
|
|
||||||
|
if group_start is None:
|
||||||
|
group_start = start
|
||||||
|
group_end = end
|
||||||
|
group_lines.append(text)
|
||||||
|
prev_end = end
|
||||||
|
|
||||||
|
if len(group_lines) >= lines_per_group:
|
||||||
|
_flush()
|
||||||
|
|
||||||
|
_flush()
|
||||||
|
|
||||||
return new_segments
|
return new_segments
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user