""" Internal data models for the chunker pipeline. These are pipeline-internal dataclasses, not schema models. Schema-level ChunkJob is in core/schema/models/jobs.py. Demonstrates: Core data structures (Interview Topic 5). """ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional @dataclass class Chunk: """A time-based segment of the source media file.""" sequence: int start_time: float # seconds end_time: float # seconds source_path: str # path to source file duration: float # end_time - start_time checksum: str = "" # computed after extraction @dataclass class ChunkResult: """Result of processing a single chunk.""" sequence: int success: bool checksum_valid: bool = True processing_time: float = 0.0 error: Optional[str] = None retries: int = 0 worker_id: Optional[str] = None output_file: Optional[str] = None @dataclass class PipelineResult: """Aggregate result of the entire pipeline run.""" total_chunks: int = 0 processed: int = 0 failed: int = 0 retries: int = 0 elapsed_time: float = 0.0 throughput_mbps: float = 0.0 worker_stats: Dict[str, Any] = field(default_factory=dict) errors: List[str] = field(default_factory=list) chunks_in_order: bool = True output_dir: Optional[str] = None chunk_files: List[str] = field(default_factory=list)