add root readme
This commit is contained in:
88
cht/summary/merger.py
Normal file
88
cht/summary/merger.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Interleave diarized audio segments with selected screen frames by timestamp.
|
||||
|
||||
Direct port of mts/meetus/transcript_merger.py:merge_transcripts (line 162).
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def merge(audio_segments: list[dict], frame_segments: list[dict],
|
||||
name_map: dict[str, str] | None = None) -> list[dict]:
|
||||
"""Combine and group by speaker; screen frames break speaker groups.
|
||||
|
||||
`audio_segments`: each {timestamp, text, speaker?}.
|
||||
`frame_segments`: each {timestamp, frame_path}.
|
||||
`name_map`: optional SPEAKER_xx → real name remap, applied to outputs.
|
||||
Returns merged list sorted by timestamp.
|
||||
"""
|
||||
name_map = name_map or {}
|
||||
|
||||
audio = [{**s, "type": "audio"} for s in audio_segments]
|
||||
screen = [{**s, "type": "screen"} for s in frame_segments]
|
||||
all_segs = sorted(audio + screen, key=lambda x: x["timestamp"])
|
||||
|
||||
grouped: list[dict] = []
|
||||
current = None
|
||||
|
||||
def _label(speaker):
|
||||
if not speaker:
|
||||
return None
|
||||
return name_map.get(speaker, speaker)
|
||||
|
||||
for seg in all_segs:
|
||||
if seg["type"] == "screen":
|
||||
if current is not None:
|
||||
grouped.append(current)
|
||||
current = None
|
||||
grouped.append(seg)
|
||||
continue
|
||||
|
||||
speaker = _label(seg.get("speaker"))
|
||||
if current is None:
|
||||
current = {
|
||||
"timestamp": seg["timestamp"],
|
||||
"text": seg["text"],
|
||||
"speaker": speaker,
|
||||
"type": "audio",
|
||||
}
|
||||
elif speaker == current.get("speaker"):
|
||||
current["text"] += " " + seg["text"]
|
||||
else:
|
||||
grouped.append(current)
|
||||
current = {
|
||||
"timestamp": seg["timestamp"],
|
||||
"text": seg["text"],
|
||||
"speaker": speaker,
|
||||
"type": "audio",
|
||||
}
|
||||
|
||||
if current is not None:
|
||||
grouped.append(current)
|
||||
return grouped
|
||||
|
||||
|
||||
def whisperx_to_audio_segments(diarized: dict) -> list[dict]:
|
||||
"""Convert whisperx JSON segments to the merger's audio format."""
|
||||
out = []
|
||||
for seg in diarized.get("segments", []):
|
||||
text = (seg.get("text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
out.append({
|
||||
"timestamp": float(seg.get("start", 0.0)),
|
||||
"text": text,
|
||||
"speaker": seg.get("speaker"),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def collect_speakers(diarized: dict) -> list[str]:
|
||||
"""Distinct SPEAKER_xx labels found in the diarization, sorted."""
|
||||
seen = set()
|
||||
for seg in diarized.get("segments", []):
|
||||
sp = seg.get("speaker")
|
||||
if sp:
|
||||
seen.add(sp)
|
||||
return sorted(seen)
|
||||
Reference in New Issue
Block a user