major refactor
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
API endpoints for checkpoint inspection, replay, retry, and GPU proxy.
|
||||
|
||||
GET /detect/checkpoints/{job_id} — list available checkpoints
|
||||
GET /detect/checkpoints/{timeline_id} — list available checkpoints
|
||||
POST /detect/replay — replay from a stage with config overrides
|
||||
POST /detect/retry — queue async retry with different provider
|
||||
POST /detect/replay-stage — replay single stage (fast path)
|
||||
@@ -31,7 +31,7 @@ class CheckpointInfo(BaseModel):
|
||||
|
||||
|
||||
class ScenarioInfo(BaseModel):
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
stage: str
|
||||
scenario_label: str
|
||||
profile_name: str
|
||||
@@ -41,21 +41,21 @@ class ScenarioInfo(BaseModel):
|
||||
|
||||
|
||||
class ReplayRequest(BaseModel):
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
start_stage: str
|
||||
config_overrides: dict | None = None
|
||||
|
||||
|
||||
class ReplayResponse(BaseModel):
|
||||
status: str
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
start_stage: str
|
||||
detections: int = 0
|
||||
brands_found: int = 0
|
||||
|
||||
|
||||
class RetryRequest(BaseModel):
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
config_overrides: dict | None = None
|
||||
start_stage: str = "escalate_vlm"
|
||||
schedule_seconds: float | None = None # delay before execution (off-peak)
|
||||
@@ -64,11 +64,11 @@ class RetryRequest(BaseModel):
|
||||
class RetryResponse(BaseModel):
|
||||
status: str
|
||||
task_id: str
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
|
||||
|
||||
class ReplaySingleStageRequest(BaseModel):
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
stage: str
|
||||
frame_refs: list[int] | None = None
|
||||
config_overrides: dict | None = None
|
||||
@@ -102,15 +102,15 @@ class ReplaySingleStageResponse(BaseModel):
|
||||
|
||||
# --- Endpoints ---
|
||||
|
||||
@router.get("/checkpoints/{job_id}")
|
||||
def list_checkpoints(job_id: str) -> list[CheckpointInfo]:
|
||||
@router.get("/checkpoints/{timeline_id}")
|
||||
def list_checkpoints(timeline_id: str) -> list[CheckpointInfo]:
|
||||
"""List available checkpoint stages for a job."""
|
||||
from detect.checkpoint import list_checkpoints as _list
|
||||
|
||||
try:
|
||||
stages = _list(job_id)
|
||||
stages = _list(timeline_id)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=404, detail=f"No checkpoints for job {job_id}: {e}")
|
||||
raise HTTPException(status_code=404, detail=f"No checkpoints for job {timeline_id}: {e}")
|
||||
|
||||
result = [CheckpointInfo(stage=s) for s in stages]
|
||||
return result
|
||||
@@ -123,7 +123,7 @@ class CheckpointFrameInfo(BaseModel):
|
||||
|
||||
|
||||
class CheckpointData(BaseModel):
|
||||
job_id: str
|
||||
timeline_id: str
|
||||
stage: str
|
||||
profile_name: str
|
||||
video_path: str
|
||||
@@ -135,26 +135,32 @@ class CheckpointData(BaseModel):
|
||||
stage_output_key: str = ""
|
||||
|
||||
|
||||
@router.get("/checkpoints/{job_id}/{stage}", response_model=CheckpointData)
|
||||
def get_checkpoint_data(job_id: str, stage: str):
|
||||
@router.get("/checkpoints/{timeline_id}/{stage}", response_model=CheckpointData)
|
||||
def get_checkpoint_data(timeline_id: str, stage: str):
|
||||
"""Load checkpoint frames + metadata for the editor UI."""
|
||||
from core.db.detect import get_stage_checkpoint
|
||||
from uuid import UUID
|
||||
from core.db.tables import Timeline, Checkpoint
|
||||
from core.db.connection import get_session
|
||||
from core.db.checkpoint import list_checkpoints
|
||||
from detect.checkpoint.frames import load_frames_b64
|
||||
|
||||
checkpoint = get_stage_checkpoint(job_id, stage)
|
||||
if not checkpoint:
|
||||
raise HTTPException(status_code=404, detail=f"No checkpoint for {job_id}/{stage}")
|
||||
with get_session() as session:
|
||||
timeline = session.get(Timeline, UUID(timeline_id))
|
||||
if not timeline:
|
||||
raise HTTPException(status_code=404, detail=f"Timeline not found: {timeline_id}")
|
||||
|
||||
raw_manifest = checkpoint.frames_manifest or {}
|
||||
manifest = {int(k): v for k, v in raw_manifest.items()}
|
||||
frame_metadata = checkpoint.frames_meta or []
|
||||
checkpoints = list_checkpoints(session, UUID(timeline_id))
|
||||
if not checkpoints:
|
||||
raise HTTPException(status_code=404, detail=f"No checkpoints for timeline {timeline_id}")
|
||||
# Prefer a checkpoint that has this stage's output; fall back to latest
|
||||
checkpoint = next(
|
||||
(c for c in reversed(checkpoints) if stage in (c.stage_outputs or {})),
|
||||
checkpoints[-1],
|
||||
)
|
||||
|
||||
# Only load filtered frames if available, otherwise all
|
||||
filtered = set(checkpoint.filtered_frame_sequences or [])
|
||||
if filtered:
|
||||
manifest = {k: v for k, v in manifest.items() if k in filtered}
|
||||
|
||||
frames_b64 = load_frames_b64(manifest, frame_metadata)
|
||||
raw_manifest = timeline.frames_manifest or {}
|
||||
manifest = {int(k): v for k, v in raw_manifest.items()}
|
||||
frames_b64 = load_frames_b64(manifest, timeline.frames_meta or [])
|
||||
|
||||
frame_list = [
|
||||
CheckpointFrameInfo(seq=f["seq"], timestamp=f["timestamp"], jpeg_b64=f["jpeg_b64"])
|
||||
@@ -162,38 +168,44 @@ def get_checkpoint_data(job_id: str, stage: str):
|
||||
]
|
||||
|
||||
return CheckpointData(
|
||||
job_id=str(checkpoint.job_id),
|
||||
stage=checkpoint.stage,
|
||||
profile_name=checkpoint.profile_name,
|
||||
video_path=checkpoint.video_path,
|
||||
timeline_id=timeline_id,
|
||||
stage=stage,
|
||||
profile_name=timeline.profile_name,
|
||||
video_path=timeline.source_video,
|
||||
is_scenario=checkpoint.is_scenario,
|
||||
scenario_label=checkpoint.scenario_label,
|
||||
frames=frame_list,
|
||||
stats=checkpoint.stats or {},
|
||||
config_snapshot=checkpoint.config_snapshot or {},
|
||||
stage_output_key=checkpoint.stage_output_key or "",
|
||||
config_snapshot=checkpoint.config_overrides or {},
|
||||
stage_output_key=stage,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/scenarios", response_model=list[ScenarioInfo])
|
||||
def list_scenarios_endpoint():
|
||||
"""List all available scenarios (bookmarked checkpoints)."""
|
||||
from core.db.detect import list_scenarios
|
||||
from core.db.tables import Timeline
|
||||
from core.db.connection import get_session
|
||||
from core.db.checkpoint import list_scenarios
|
||||
|
||||
scenarios = list_scenarios()
|
||||
result = []
|
||||
for s in scenarios:
|
||||
manifest = s.frames_manifest or {}
|
||||
info = ScenarioInfo(
|
||||
job_id=str(s.job_id),
|
||||
stage=s.stage,
|
||||
scenario_label=s.scenario_label,
|
||||
profile_name=s.profile_name,
|
||||
video_path=s.video_path,
|
||||
frame_count=len(manifest),
|
||||
created_at=str(s.created_at) if s.created_at else "",
|
||||
)
|
||||
result.append(info)
|
||||
with get_session() as session:
|
||||
scenarios = list_scenarios(session)
|
||||
result = []
|
||||
for s in scenarios:
|
||||
timeline = session.get(Timeline, s.timeline_id)
|
||||
if not timeline:
|
||||
continue
|
||||
last_stage = next(reversed(s.stage_outputs), "") if s.stage_outputs else ""
|
||||
info = ScenarioInfo(
|
||||
timeline_id=str(s.timeline_id),
|
||||
stage=last_stage,
|
||||
scenario_label=s.scenario_label,
|
||||
profile_name=timeline.profile_name,
|
||||
video_path=timeline.source_video,
|
||||
frame_count=len(timeline.frames_manifest or {}),
|
||||
created_at=str(s.created_at) if s.created_at else "",
|
||||
)
|
||||
result.append(info)
|
||||
return result
|
||||
|
||||
|
||||
@@ -204,7 +216,7 @@ def replay(req: ReplayRequest):
|
||||
|
||||
try:
|
||||
result = replay_from(
|
||||
job_id=req.job_id,
|
||||
timeline_id=req.timeline_id,
|
||||
start_stage=req.start_stage,
|
||||
config_overrides=req.config_overrides,
|
||||
)
|
||||
@@ -219,7 +231,7 @@ def replay(req: ReplayRequest):
|
||||
|
||||
response = ReplayResponse(
|
||||
status="completed",
|
||||
job_id=req.job_id,
|
||||
timeline_id=req.timeline_id,
|
||||
start_stage=req.start_stage,
|
||||
detections=len(detections),
|
||||
brands_found=brands_found,
|
||||
@@ -233,7 +245,7 @@ def retry(req: RetryRequest):
|
||||
from detect.checkpoint.tasks import retry_candidates
|
||||
|
||||
kwargs = {
|
||||
"job_id": req.job_id,
|
||||
"timeline_id": req.timeline_id,
|
||||
"config_overrides": req.config_overrides,
|
||||
"start_stage": req.start_stage,
|
||||
}
|
||||
@@ -246,7 +258,7 @@ def retry(req: RetryRequest):
|
||||
response = RetryResponse(
|
||||
status="queued",
|
||||
task_id=task.id,
|
||||
job_id=req.job_id,
|
||||
timeline_id=req.timeline_id,
|
||||
)
|
||||
return response
|
||||
|
||||
@@ -258,7 +270,7 @@ def replay_single_stage(req: ReplaySingleStageRequest):
|
||||
|
||||
try:
|
||||
result = _replay(
|
||||
job_id=req.job_id,
|
||||
timeline_id=req.timeline_id,
|
||||
stage=req.stage,
|
||||
frame_refs=req.frame_refs,
|
||||
config_overrides=req.config_overrides,
|
||||
|
||||
@@ -25,11 +25,10 @@ from .grpc import (
|
||||
ProgressUpdate,
|
||||
WorkerStatus,
|
||||
)
|
||||
from .job import (
|
||||
Job, JobStatus, RunType,
|
||||
Timeline, Checkpoint,
|
||||
BrandSource, Brand,
|
||||
)
|
||||
from .job import Job, JobStatus, RunType
|
||||
from .timeline import Timeline
|
||||
from .checkpoint import Checkpoint
|
||||
from .brand import BrandSource, Brand
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
|
||||
38
core/schema/models/brand.py
Normal file
38
core/schema/models/brand.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Brand schema — source of truth for brand discovery."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
class BrandSource(str, Enum):
|
||||
OCR = "ocr"
|
||||
VLM = "local_vlm"
|
||||
CLOUD = "cloud_llm"
|
||||
MANUAL = "manual"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Brand:
|
||||
"""
|
||||
A brand discovered or registered in the system.
|
||||
|
||||
Airings track where/when the brand appeared — each airing
|
||||
references a timeline and a frame range.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
canonical_name: str
|
||||
aliases: List[str] = field(default_factory=list)
|
||||
source: BrandSource = BrandSource.OCR # how first discovered
|
||||
confirmed: bool = False
|
||||
|
||||
# Airings — JSONB array of appearances
|
||||
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
|
||||
airings: List[Dict[str, Any]] = field(default_factory=list)
|
||||
total_airings: int = 0
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
38
core/schema/models/checkpoint.py
Normal file
38
core/schema/models/checkpoint.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Checkpoint schema — source of truth for pipeline state snapshots."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""
|
||||
A snapshot of pipeline state on a timeline.
|
||||
|
||||
Stage outputs stored as JSONB — each stage serializes to JSON,
|
||||
the checkpoint stores it without knowing the shape.
|
||||
|
||||
parent_id forms a tree: multiple children from the same parent
|
||||
= different config tries from the same starting point.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Config that produced this checkpoint
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Scenario bookmark
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
@@ -1,177 +0,0 @@
|
||||
"""
|
||||
Detection Job and Checkpoint Schema Definitions
|
||||
|
||||
Source of truth for detection pipeline job tracking and stage checkpoints.
|
||||
Follows the TranscodeJob/ChunkJob pattern.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
class DetectJobStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class RunType(str, Enum):
|
||||
INITIAL = "initial"
|
||||
REPLAY = "replay"
|
||||
RETRY = "retry"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectJob:
|
||||
"""
|
||||
A detection pipeline job.
|
||||
|
||||
Each invocation of the pipeline (initial run, replay, retry) creates a DetectJob.
|
||||
Jobs for the same source video are linked via parent_job_id.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
|
||||
# Input
|
||||
source_asset_id: UUID
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
|
||||
# Run lineage
|
||||
parent_job_id: Optional[UUID] = None # links all runs for the same source
|
||||
run_type: RunType = RunType.INITIAL
|
||||
replay_from_stage: Optional[str] = None # null for initial runs
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Status
|
||||
status: DetectJobStatus = DetectJobStatus.PENDING
|
||||
current_stage: Optional[str] = None
|
||||
progress: float = 0.0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
# Results summary
|
||||
total_detections: int = 0
|
||||
brands_found: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
estimated_cost_usd: float = 0.0
|
||||
|
||||
# Worker tracking
|
||||
celery_task_id: Optional[str] = None
|
||||
priority: int = 0
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Timeline:
|
||||
"""
|
||||
The frame sequence from a source video.
|
||||
|
||||
Independent of stages — exists before any stage runs.
|
||||
Stages annotate the timeline, they don't own it.
|
||||
Frames are stored in MinIO as JPEGs.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: Optional[UUID] = None
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
|
||||
# Frame metadata (images in MinIO, metadata here)
|
||||
frames_prefix: str = "" # s3: timelines/{id}/frames/
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""
|
||||
A snapshot of pipeline state on a timeline.
|
||||
|
||||
Stage outputs stored as JSONB — each stage serializes to JSON,
|
||||
the checkpoint stores it without knowing the shape.
|
||||
|
||||
parent_id forms a tree: multiple children from the same parent
|
||||
= different config tries from the same starting point.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Config that produced this checkpoint
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Scenario bookmark
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class BrandSource(str, Enum):
|
||||
"""How a brand was first identified."""
|
||||
OCR = "ocr"
|
||||
VLM = "local_vlm"
|
||||
CLOUD = "cloud_llm"
|
||||
MANUAL = "manual" # user-added via UI
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnownBrand:
|
||||
"""
|
||||
A brand discovered or registered in the system.
|
||||
|
||||
Global — not per-source. Accumulates across all pipeline runs.
|
||||
Aliases enable fuzzy matching without re-escalating to VLM.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
canonical_name: str # normalized display name
|
||||
aliases: List[str] = field(default_factory=list) # known spellings/variants
|
||||
first_source: BrandSource = BrandSource.OCR
|
||||
total_occurrences: int = 0
|
||||
confirmed: bool = False # manually confirmed by user
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceBrandSighting:
|
||||
"""
|
||||
A brand seen in a specific source (video/asset).
|
||||
|
||||
Per-source session cache — avoids re-escalating the same brand
|
||||
on subsequent frames or re-runs of the same source.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: UUID # the video this sighting belongs to
|
||||
brand_id: UUID # FK to KnownBrand
|
||||
brand_name: str # denormalized for fast lookup
|
||||
first_seen_timestamp: float = 0.0
|
||||
last_seen_timestamp: float = 0.0
|
||||
occurrences: int = 0
|
||||
detection_source: BrandSource = BrandSource.OCR
|
||||
avg_confidence: float = 0.0
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
@@ -1,14 +1,9 @@
|
||||
"""
|
||||
Job, Timeline, and Checkpoint Schema Definitions
|
||||
|
||||
Source of truth for pipeline jobs, timelines, and checkpoints.
|
||||
Generates: SQLModel (core/db/models.py), TypeScript via modelgen.
|
||||
"""
|
||||
"""Job schema — source of truth for pipeline jobs."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
@@ -68,91 +63,3 @@ class Job:
|
||||
created_at: Optional[datetime] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Timeline:
|
||||
"""
|
||||
The frame sequence from a source video.
|
||||
|
||||
Independent of stages — exists before any stage runs.
|
||||
Frames stored in MinIO as JPEGs, metadata here.
|
||||
One timeline per job.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: Optional[UUID] = None
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
|
||||
frames_prefix: str = "" # s3: timeline/{id}/frames/
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""
|
||||
A snapshot of pipeline state on a timeline.
|
||||
|
||||
Stage outputs stored as JSONB — each stage serializes to JSON,
|
||||
the checkpoint stores it without knowing the shape.
|
||||
|
||||
parent_id forms a tree: multiple children from the same parent
|
||||
= different config tries from the same starting point.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Config that produced this checkpoint
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Scenario bookmark
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
# --- Brands ---
|
||||
|
||||
class BrandSource(str, Enum):
|
||||
OCR = "ocr"
|
||||
VLM = "local_vlm"
|
||||
CLOUD = "cloud_llm"
|
||||
MANUAL = "manual"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Brand:
|
||||
"""
|
||||
A brand discovered or registered in the system.
|
||||
|
||||
Airings track where/when the brand appeared — each airing
|
||||
references a timeline and a frame range.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
canonical_name: str
|
||||
aliases: List[str] = field(default_factory=list)
|
||||
source: BrandSource = BrandSource.OCR # how first discovered
|
||||
confirmed: bool = False
|
||||
|
||||
# Airings — JSONB array of appearances
|
||||
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
|
||||
airings: List[Dict[str, Any]] = field(default_factory=list)
|
||||
total_airings: int = 0
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
"""
|
||||
Detection pipeline runtime models.
|
||||
|
||||
These are the data structures that flow between LangGraph nodes.
|
||||
They contain runtime types (np.ndarray) so they are NOT generated
|
||||
by modelgen — they live here for the schema to be the complete
|
||||
map of the application, but modelgen skips them.
|
||||
|
||||
Wire-format models (SSE events) are in detect.py.
|
||||
DB models (jobs, checkpoints) are in detect_jobs.py.
|
||||
These are the data structures that flow between pipeline stages.
|
||||
They contain runtime types (np.ndarray) so modelgen skips them —
|
||||
not generated to SQLModel or TypeScript.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -89,10 +85,3 @@ class DetectionReport:
|
||||
brands: dict[str, BrandStats] = field(default_factory=dict)
|
||||
timeline: list[BrandDetection] = field(default_factory=list)
|
||||
pipeline_stats: PipelineStats = field(default_factory=PipelineStats)
|
||||
|
||||
|
||||
# Not in DATACLASSES — modelgen skips these (they contain np.ndarray)
|
||||
RUNTIME_MODELS = [
|
||||
Frame, BoundingBox, TextCandidate, BrandDetection,
|
||||
BrandStats, PipelineStats, DetectionReport,
|
||||
]
|
||||
29
core/schema/models/timeline.py
Normal file
29
core/schema/models/timeline.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Timeline schema — source of truth for frame sequences."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
@dataclass
|
||||
class Timeline:
|
||||
"""
|
||||
The frame sequence from a source video.
|
||||
|
||||
Independent of stages — exists before any stage runs.
|
||||
Frames stored in MinIO as JPEGs, metadata here.
|
||||
One timeline per job.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: Optional[UUID] = None
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
|
||||
frames_prefix: str = "" # s3: timeline/{id}/frames/
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
@@ -1,8 +1,6 @@
|
||||
"""
|
||||
Serializers for detection pipeline runtime models.
|
||||
|
||||
Mirrors core/schema/models/detect_pipeline.py.
|
||||
|
||||
Special handling:
|
||||
- Frame.image (np.ndarray → S3, excluded from JSON)
|
||||
- TextCandidate.frame (object ref → frame_sequence integer)
|
||||
@@ -13,7 +11,7 @@ from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
|
||||
from core.schema.models.detect_pipeline import (
|
||||
from core.schema.models.pipeline import (
|
||||
BoundingBox,
|
||||
BrandDetection,
|
||||
BrandStats,
|
||||
@@ -59,13 +57,12 @@ def deserialize_frames_with_download(meta: list[dict], manifest: dict, job_id: s
|
||||
|
||||
def serialize_text_candidate(tc: TextCandidate) -> dict:
|
||||
bbox_dict = dataclasses.asdict(tc.bbox)
|
||||
result = {
|
||||
return {
|
||||
"frame_sequence": tc.frame.sequence,
|
||||
"bbox": bbox_dict,
|
||||
"text": tc.text,
|
||||
"ocr_confidence": tc.ocr_confidence,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def serialize_text_candidates(candidates: list[TextCandidate]) -> list[dict]:
|
||||
@@ -75,13 +72,12 @@ def serialize_text_candidates(candidates: list[TextCandidate]) -> list[dict]:
|
||||
def deserialize_text_candidate(data: dict, frame_map: dict[int, Frame]) -> TextCandidate:
|
||||
frame = frame_map[data["frame_sequence"]]
|
||||
bbox = safe_construct(BoundingBox, data["bbox"])
|
||||
candidate = TextCandidate(
|
||||
return TextCandidate(
|
||||
frame=frame,
|
||||
bbox=bbox,
|
||||
text=data["text"],
|
||||
ocr_confidence=data["ocr_confidence"],
|
||||
)
|
||||
return candidate
|
||||
|
||||
|
||||
def deserialize_text_candidates(data: list[dict], frame_map: dict[int, Frame]) -> list[TextCandidate]:
|
||||
Reference in New Issue
Block a user