""" Serializers for detection pipeline runtime models. Mirrors core/schema/models/detect_pipeline.py. Special handling: - Frame.image (np.ndarray → S3, excluded from JSON) - TextCandidate.frame (object ref → frame_sequence integer) Everything else uses dataclasses.asdict() via safe_construct. """ from __future__ import annotations import dataclasses from core.schema.models.detect_pipeline import ( BoundingBox, BrandDetection, BrandStats, DetectionReport, Frame, PipelineStats, TextCandidate, ) from ._common import safe_construct, serialize_dataclass, serialize_dataclass_list # --------------------------------------------------------------------------- # Frame — image goes to S3 separately # --------------------------------------------------------------------------- def serialize_frame_meta(frame: Frame) -> dict: """Serialize Frame metadata only (no image).""" result = dataclasses.asdict(frame) del result["image"] return result def serialize_frames_with_upload(frames: list[Frame], job_id: str) -> tuple[list[dict], dict[int, str]]: """Upload frame images to S3, return metadata + manifest.""" from detect.checkpoint.frames import save_frames manifest = save_frames(job_id, frames) meta = [serialize_frame_meta(f) for f in frames] return meta, manifest def deserialize_frames_with_download(meta: list[dict], manifest: dict, job_id: str) -> list[Frame]: """Load frames from S3 + metadata.""" from detect.checkpoint.frames import load_frames int_manifest = {int(k): v for k, v in manifest.items()} return load_frames(int_manifest, meta) # --------------------------------------------------------------------------- # TextCandidate — frame ref is an object, stored as sequence int # --------------------------------------------------------------------------- def serialize_text_candidate(tc: TextCandidate) -> dict: bbox_dict = dataclasses.asdict(tc.bbox) result = { "frame_sequence": tc.frame.sequence, "bbox": bbox_dict, "text": tc.text, "ocr_confidence": tc.ocr_confidence, } return result def serialize_text_candidates(candidates: list[TextCandidate]) -> list[dict]: return [serialize_text_candidate(tc) for tc in candidates] def deserialize_text_candidate(data: dict, frame_map: dict[int, Frame]) -> TextCandidate: frame = frame_map[data["frame_sequence"]] bbox = safe_construct(BoundingBox, data["bbox"]) candidate = TextCandidate( frame=frame, bbox=bbox, text=data["text"], ocr_confidence=data["ocr_confidence"], ) return candidate def deserialize_text_candidates(data: list[dict], frame_map: dict[int, Frame]) -> list[TextCandidate]: return [deserialize_text_candidate(d, frame_map) for d in data] # --------------------------------------------------------------------------- # BoundingBox, BrandDetection, PipelineStats, etc — standard dataclasses # --------------------------------------------------------------------------- def deserialize_bounding_box(data: dict) -> BoundingBox: return safe_construct(BoundingBox, data) def deserialize_brand_detection(data: dict) -> BrandDetection: return safe_construct(BrandDetection, data) def deserialize_pipeline_stats(data: dict) -> PipelineStats: return safe_construct(PipelineStats, data) def deserialize_detection_report(data: dict) -> DetectionReport: return safe_construct(DetectionReport, data)