105 lines
3.3 KiB
Python
105 lines
3.3 KiB
Python
"""
|
|
Serializers for detection pipeline runtime models.
|
|
|
|
Special handling:
|
|
- Frame.image (np.ndarray → S3, excluded from JSON)
|
|
- TextCandidate.frame (object ref → frame_sequence integer)
|
|
Everything else uses dataclasses.asdict() via safe_construct.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import dataclasses
|
|
|
|
from core.schema.models.pipeline import (
|
|
BoundingBox,
|
|
BrandDetection,
|
|
BrandStats,
|
|
DetectionReport,
|
|
Frame,
|
|
PipelineStats,
|
|
TextCandidate,
|
|
)
|
|
from ._common import safe_construct, serialize_dataclass, serialize_dataclass_list
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Frame — image goes to S3 separately
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def serialize_frame_meta(frame: Frame) -> dict:
|
|
"""Serialize Frame metadata only (no image)."""
|
|
result = dataclasses.asdict(frame)
|
|
del result["image"]
|
|
return result
|
|
|
|
|
|
def serialize_frames_with_upload(frames: list[Frame], job_id: str) -> tuple[list[dict], dict[int, str]]:
|
|
"""Upload frame images to S3, return metadata + manifest."""
|
|
from detect.checkpoint.frames import save_frames
|
|
|
|
manifest = save_frames(job_id, frames)
|
|
meta = [serialize_frame_meta(f) for f in frames]
|
|
return meta, manifest
|
|
|
|
|
|
def deserialize_frames_with_download(meta: list[dict], manifest: dict, job_id: str) -> list[Frame]:
|
|
"""Load frames from S3 + metadata."""
|
|
from detect.checkpoint.frames import load_frames
|
|
|
|
int_manifest = {int(k): v for k, v in manifest.items()}
|
|
return load_frames(int_manifest, meta)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TextCandidate — frame ref is an object, stored as sequence int
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def serialize_text_candidate(tc: TextCandidate) -> dict:
|
|
bbox_dict = dataclasses.asdict(tc.bbox)
|
|
return {
|
|
"frame_sequence": tc.frame.sequence,
|
|
"bbox": bbox_dict,
|
|
"text": tc.text,
|
|
"ocr_confidence": tc.ocr_confidence,
|
|
}
|
|
|
|
|
|
def serialize_text_candidates(candidates: list[TextCandidate]) -> list[dict]:
|
|
return [serialize_text_candidate(tc) for tc in candidates]
|
|
|
|
|
|
def deserialize_text_candidate(data: dict, frame_map: dict[int, Frame]) -> TextCandidate:
|
|
frame = frame_map[data["frame_sequence"]]
|
|
bbox = safe_construct(BoundingBox, data["bbox"])
|
|
return TextCandidate(
|
|
frame=frame,
|
|
bbox=bbox,
|
|
text=data["text"],
|
|
ocr_confidence=data["ocr_confidence"],
|
|
)
|
|
|
|
|
|
def deserialize_text_candidates(data: list[dict], frame_map: dict[int, Frame]) -> list[TextCandidate]:
|
|
return [deserialize_text_candidate(d, frame_map) for d in data]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# BoundingBox, BrandDetection, PipelineStats, etc — standard dataclasses
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def deserialize_bounding_box(data: dict) -> BoundingBox:
|
|
return safe_construct(BoundingBox, data)
|
|
|
|
|
|
def deserialize_brand_detection(data: dict) -> BrandDetection:
|
|
return safe_construct(BrandDetection, data)
|
|
|
|
|
|
def deserialize_pipeline_stats(data: dict) -> PipelineStats:
|
|
return safe_construct(PipelineStats, data)
|
|
|
|
|
|
def deserialize_detection_report(data: dict) -> DetectionReport:
|
|
return safe_construct(DetectionReport, data)
|