Files
mediaproc/core/schema/serializers/pipeline.py
2026-03-30 09:53:10 -03:00

93 lines
2.9 KiB
Python

"""
Serializers for detection pipeline runtime models.
Special handling:
- Frame.image (np.ndarray, ephemeral — only metadata serialized)
- TextCandidate.frame (object ref → frame_sequence integer)
Everything else uses dataclasses.asdict() via safe_construct.
"""
from __future__ import annotations
import dataclasses
from core.detect.models import (
BoundingBox,
BrandDetection,
BrandStats,
DetectionReport,
Frame,
PipelineStats,
TextCandidate,
)
from ._common import safe_construct, serialize_dataclass, serialize_dataclass_list
# ---------------------------------------------------------------------------
# Frame — metadata only (image is ephemeral, re-extracted from chunks)
# ---------------------------------------------------------------------------
def serialize_frame_meta(frame: Frame) -> dict:
"""Serialize Frame metadata only (no image)."""
result = dataclasses.asdict(frame)
del result["image"]
return result
def serialize_frames_meta(frames: list[Frame]) -> list[dict]:
"""Serialize frame metadata for all frames."""
return [serialize_frame_meta(f) for f in frames]
# ---------------------------------------------------------------------------
# TextCandidate — frame ref is an object, stored as sequence int
# ---------------------------------------------------------------------------
def serialize_text_candidate(tc: TextCandidate) -> dict:
bbox_dict = dataclasses.asdict(tc.bbox)
return {
"frame_sequence": tc.frame.sequence,
"bbox": bbox_dict,
"text": tc.text,
"ocr_confidence": tc.ocr_confidence,
}
def serialize_text_candidates(candidates: list[TextCandidate]) -> list[dict]:
return [serialize_text_candidate(tc) for tc in candidates]
def deserialize_text_candidate(data: dict, frame_map: dict[int, Frame]) -> TextCandidate:
frame = frame_map[data["frame_sequence"]]
bbox = safe_construct(BoundingBox, data["bbox"])
return TextCandidate(
frame=frame,
bbox=bbox,
text=data["text"],
ocr_confidence=data["ocr_confidence"],
)
def deserialize_text_candidates(data: list[dict], frame_map: dict[int, Frame]) -> list[TextCandidate]:
return [deserialize_text_candidate(d, frame_map) for d in data]
# ---------------------------------------------------------------------------
# BoundingBox, BrandDetection, PipelineStats, etc — standard dataclasses
# ---------------------------------------------------------------------------
def deserialize_bounding_box(data: dict) -> BoundingBox:
return safe_construct(BoundingBox, data)
def deserialize_brand_detection(data: dict) -> BrandDetection:
return safe_construct(BrandDetection, data)
def deserialize_pipeline_stats(data: dict) -> PipelineStats:
return safe_construct(PipelineStats, data)
def deserialize_detection_report(data: dict) -> DetectionReport:
return safe_construct(DetectionReport, data)