55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
"""
|
|
Internal data models for the chunker pipeline.
|
|
|
|
These are pipeline-internal dataclasses, not schema models.
|
|
Schema-level ChunkJob is in core/schema/models/jobs.py.
|
|
|
|
Demonstrates: Core data structures (Interview Topic 5).
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
@dataclass
|
|
class Chunk:
|
|
"""A time-based segment of the source media file."""
|
|
|
|
sequence: int
|
|
start_time: float # seconds
|
|
end_time: float # seconds
|
|
source_path: str # path to source file
|
|
duration: float # end_time - start_time
|
|
checksum: str = "" # computed after extraction
|
|
|
|
|
|
@dataclass
|
|
class ChunkResult:
|
|
"""Result of processing a single chunk."""
|
|
|
|
sequence: int
|
|
success: bool
|
|
checksum_valid: bool = True
|
|
processing_time: float = 0.0
|
|
error: Optional[str] = None
|
|
retries: int = 0
|
|
worker_id: Optional[str] = None
|
|
output_file: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class PipelineResult:
|
|
"""Aggregate result of the entire pipeline run."""
|
|
|
|
total_chunks: int = 0
|
|
processed: int = 0
|
|
failed: int = 0
|
|
retries: int = 0
|
|
elapsed_time: float = 0.0
|
|
throughput_mbps: float = 0.0
|
|
worker_stats: Dict[str, Any] = field(default_factory=dict)
|
|
errors: List[str] = field(default_factory=list)
|
|
chunks_in_order: bool = True
|
|
output_dir: Optional[str] = None
|
|
chunk_files: List[str] = field(default_factory=list)
|