chunker and ui
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/chunker/__init__.py
Normal file
0
tests/chunker/__init__.py
Normal file
76
tests/chunker/conftest.py
Normal file
76
tests/chunker/conftest.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Shared fixtures for chunker tests.
|
||||
|
||||
Demonstrates: TDD and unit testing best practices (Interview Topic 8) — fixtures, temp files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.models import Chunk, ChunkResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_file():
|
||||
"""Create a temporary file with known content, cleaned up after test."""
|
||||
files = []
|
||||
|
||||
def _create(content: bytes = b"x" * 4096):
|
||||
f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
||||
f.write(content)
|
||||
f.close()
|
||||
files.append(f.name)
|
||||
return f.name
|
||||
|
||||
yield _create
|
||||
|
||||
for path in files:
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_chunk(temp_file):
|
||||
"""Create a sample time-based Chunk with valid time range."""
|
||||
path = temp_file(b"x" * 1024)
|
||||
return Chunk(
|
||||
sequence=0,
|
||||
start_time=0.0,
|
||||
end_time=10.0,
|
||||
source_path=path,
|
||||
duration=10.0,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_chunk(temp_file):
|
||||
"""Factory fixture for creating time-based chunks with specific sequence numbers."""
|
||||
path = temp_file(b"x" * 1024)
|
||||
|
||||
def _make(sequence: int, duration: float = 10.0) -> Chunk:
|
||||
start = sequence * duration
|
||||
return Chunk(
|
||||
sequence=sequence,
|
||||
start_time=start,
|
||||
end_time=start + duration,
|
||||
source_path=path,
|
||||
duration=duration,
|
||||
)
|
||||
|
||||
return _make
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_result():
|
||||
"""Factory fixture for creating ChunkResults."""
|
||||
|
||||
def _make(sequence: int, success: bool = True, processing_time: float = 0.01) -> ChunkResult:
|
||||
return ChunkResult(
|
||||
sequence=sequence,
|
||||
success=success,
|
||||
processing_time=processing_time,
|
||||
)
|
||||
|
||||
return _make
|
||||
149
tests/chunker/test_chunker.py
Normal file
149
tests/chunker/test_chunker.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Tests for Chunker — time-based segmentation, chunk counts, sequence numbers, generator behavior.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — parametrized tests, edge cases, mocking.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker import Chunker
|
||||
from core.chunker.exceptions import ChunkReadError
|
||||
|
||||
|
||||
def mock_probe(duration):
|
||||
"""Create a mock probe_file that returns the given duration."""
|
||||
result = MagicMock()
|
||||
result.duration = duration
|
||||
return result
|
||||
|
||||
|
||||
class TestChunker:
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_basic_chunking(self, mock_pf, temp_file):
|
||||
"""File splits into expected number of time-based chunks."""
|
||||
path = temp_file(b"x" * 1000)
|
||||
mock_pf.return_value = mock_probe(30.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
chunks = list(chunker.chunks())
|
||||
|
||||
assert len(chunks) == 3
|
||||
assert chunks[0].start_time == 0.0
|
||||
assert chunks[0].end_time == 10.0
|
||||
assert chunks[0].duration == 10.0
|
||||
assert chunks[1].start_time == 10.0
|
||||
assert chunks[2].start_time == 20.0
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_sequence_numbers(self, mock_pf, temp_file):
|
||||
"""Chunks have sequential sequence numbers starting at 0."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(40.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
chunks = list(chunker.chunks())
|
||||
sequences = [c.sequence for c in chunks]
|
||||
|
||||
assert sequences == [0, 1, 2, 3]
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_time_ranges(self, mock_pf, temp_file):
|
||||
"""Each chunk has correct start_time and end_time."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(25.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
chunks = list(chunker.chunks())
|
||||
|
||||
assert chunks[0].start_time == 0.0
|
||||
assert chunks[0].end_time == 10.0
|
||||
assert chunks[1].start_time == 10.0
|
||||
assert chunks[1].end_time == 20.0
|
||||
assert chunks[2].start_time == 20.0
|
||||
assert chunks[2].end_time == 25.0 # last chunk shorter
|
||||
assert chunks[2].duration == 5.0
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_expected_chunks_property(self, mock_pf, temp_file):
|
||||
"""expected_chunks calculates correctly before iteration."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(25.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
assert chunker.expected_chunks == 3 # ceil(25/10)
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_source_path_on_chunks(self, mock_pf, temp_file):
|
||||
"""Each chunk carries the source file path."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(10.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
chunks = list(chunker.chunks())
|
||||
|
||||
assert all(c.source_path == path for c in chunks)
|
||||
|
||||
def test_file_not_found(self):
|
||||
"""Non-existent file raises ChunkReadError."""
|
||||
with pytest.raises(ChunkReadError, match="File not found"):
|
||||
Chunker("/nonexistent/file.mp4")
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_invalid_chunk_duration(self, mock_pf, temp_file):
|
||||
"""Zero or negative chunk_duration raises ValueError."""
|
||||
path = temp_file(b"x" * 100)
|
||||
|
||||
with pytest.raises(ValueError, match="chunk_duration must be positive"):
|
||||
Chunker(path, chunk_duration=0)
|
||||
|
||||
with pytest.raises(ValueError, match="chunk_duration must be positive"):
|
||||
Chunker(path, chunk_duration=-1)
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_generator_laziness(self, mock_pf, temp_file):
|
||||
"""Chunks are yielded lazily, not pre-loaded."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(30.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
gen = chunker.chunks()
|
||||
first = next(gen)
|
||||
assert first.sequence == 0
|
||||
# Generator is not exhausted — remaining chunks still pending
|
||||
|
||||
@pytest.mark.parametrize("duration,chunk_dur,expected", [
|
||||
(10.0, 10.0, 1),
|
||||
(10.1, 10.0, 2),
|
||||
(1.0, 1.0, 1),
|
||||
(100.0, 1.0, 100),
|
||||
(5.0, 100.0, 1),
|
||||
])
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_expected_chunks_parametrized(self, mock_pf, temp_file, duration, chunk_dur, expected):
|
||||
"""Parametrized: various duration/chunk_duration combos."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(duration)
|
||||
chunker = Chunker(path, chunk_duration=chunk_dur)
|
||||
assert chunker.expected_chunks == expected
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_exact_multiple(self, mock_pf, temp_file):
|
||||
"""Duration exactly divisible by chunk_duration."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(30.0)
|
||||
|
||||
chunker = Chunker(path, chunk_duration=10.0)
|
||||
chunks = list(chunker.chunks())
|
||||
assert len(chunks) == 3
|
||||
assert all(c.duration == 10.0 for c in chunks)
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_probe_failure(self, mock_pf, temp_file):
|
||||
"""Probe failure raises ChunkReadError."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.side_effect = Exception("ffprobe failed")
|
||||
|
||||
with pytest.raises(ChunkReadError, match="Failed to probe"):
|
||||
Chunker(path, chunk_duration=10.0)
|
||||
103
tests/chunker/test_collector.py
Normal file
103
tests/chunker/test_collector.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Tests for ResultCollector — ordered reassembly, out-of-order buffering, duplicates.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — testing algorithms (heapq reassembly).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.collector import ResultCollector
|
||||
from core.chunker.exceptions import ReassemblyError
|
||||
|
||||
|
||||
class TestResultCollector:
|
||||
def test_in_order_emission(self, make_result):
|
||||
"""Results arriving in order are emitted immediately."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
|
||||
emitted = collector.add(make_result(0))
|
||||
assert len(emitted) == 1
|
||||
assert emitted[0].sequence == 0
|
||||
|
||||
emitted = collector.add(make_result(1))
|
||||
assert len(emitted) == 1
|
||||
|
||||
emitted = collector.add(make_result(2))
|
||||
assert len(emitted) == 1
|
||||
|
||||
assert collector.is_complete
|
||||
|
||||
def test_out_of_order_buffering(self, make_result):
|
||||
"""Out-of-order results are buffered until gaps fill."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
|
||||
# Arrive: 2, 0, 1
|
||||
emitted = collector.add(make_result(2))
|
||||
assert len(emitted) == 0
|
||||
assert collector.buffered_count == 1
|
||||
|
||||
emitted = collector.add(make_result(0))
|
||||
assert len(emitted) == 1 # Only 0 emitted, 1 still missing
|
||||
|
||||
emitted = collector.add(make_result(1))
|
||||
assert len(emitted) == 2 # 1 and 2 now emittable
|
||||
assert collector.is_complete
|
||||
|
||||
def test_reverse_order(self, make_result):
|
||||
"""All results arrive in reverse — only last add emits everything."""
|
||||
collector = ResultCollector(total_chunks=4)
|
||||
|
||||
for seq in [3, 2, 1]:
|
||||
emitted = collector.add(make_result(seq))
|
||||
assert len(emitted) == 0
|
||||
|
||||
emitted = collector.add(make_result(0))
|
||||
assert len(emitted) == 4
|
||||
assert collector.is_complete
|
||||
|
||||
def test_duplicate_raises(self, make_result):
|
||||
"""Duplicate sequence number raises ReassemblyError."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
collector.add(make_result(0))
|
||||
|
||||
with pytest.raises(ReassemblyError, match="Duplicate"):
|
||||
collector.add(make_result(0))
|
||||
|
||||
def test_emitted_count(self, make_result):
|
||||
"""emitted_count tracks correctly."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
assert collector.emitted_count == 0
|
||||
|
||||
collector.add(make_result(0))
|
||||
assert collector.emitted_count == 1
|
||||
|
||||
collector.add(make_result(2)) # buffered
|
||||
assert collector.emitted_count == 1
|
||||
|
||||
collector.add(make_result(1)) # releases 1 and 2
|
||||
assert collector.emitted_count == 3
|
||||
|
||||
def test_get_ordered_results(self, make_result):
|
||||
"""get_ordered_results returns all emitted results in order."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
collector.add(make_result(2))
|
||||
collector.add(make_result(0))
|
||||
collector.add(make_result(1))
|
||||
|
||||
ordered = collector.get_ordered_results()
|
||||
assert [r.sequence for r in ordered] == [0, 1, 2]
|
||||
|
||||
def test_avg_processing_time(self, make_result):
|
||||
"""Average processing time from sliding window."""
|
||||
collector = ResultCollector(total_chunks=2)
|
||||
collector.add(make_result(0, processing_time=0.1))
|
||||
collector.add(make_result(1, processing_time=0.3))
|
||||
|
||||
assert abs(collector.avg_processing_time - 0.2) < 0.001
|
||||
|
||||
def test_not_complete_when_partial(self, make_result):
|
||||
"""is_complete is False until all chunks emitted."""
|
||||
collector = ResultCollector(total_chunks=3)
|
||||
collector.add(make_result(0))
|
||||
collector.add(make_result(1))
|
||||
assert not collector.is_complete
|
||||
69
tests/chunker/test_exceptions.py
Normal file
69
tests/chunker/test_exceptions.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Tests for exception hierarchy — catch patterns, attributes.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — testing exception design.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.exceptions import (
|
||||
ChunkChecksumError,
|
||||
ChunkError,
|
||||
ChunkReadError,
|
||||
PipelineError,
|
||||
ProcessingError,
|
||||
ProcessorFailureError,
|
||||
ProcessorTimeoutError,
|
||||
ReassemblyError,
|
||||
)
|
||||
|
||||
|
||||
class TestExceptionHierarchy:
|
||||
"""Verify the exception class hierarchy and catch patterns."""
|
||||
|
||||
def test_pipeline_error_is_base(self):
|
||||
"""All chunker exceptions inherit from PipelineError."""
|
||||
assert issubclass(ChunkError, PipelineError)
|
||||
assert issubclass(ProcessingError, PipelineError)
|
||||
assert issubclass(ReassemblyError, PipelineError)
|
||||
|
||||
def test_chunk_error_subtypes(self):
|
||||
"""ChunkReadError and ChunkChecksumError are ChunkErrors."""
|
||||
assert issubclass(ChunkReadError, ChunkError)
|
||||
assert issubclass(ChunkChecksumError, ChunkError)
|
||||
|
||||
def test_processing_error_subtypes(self):
|
||||
"""ProcessorTimeoutError and ProcessorFailureError are ProcessingErrors."""
|
||||
assert issubclass(ProcessorTimeoutError, ProcessingError)
|
||||
assert issubclass(ProcessorFailureError, ProcessingError)
|
||||
|
||||
def test_catch_pipeline_error_catches_all(self):
|
||||
"""Catching PipelineError catches any subtype."""
|
||||
with pytest.raises(PipelineError):
|
||||
raise ChunkReadError("test")
|
||||
|
||||
with pytest.raises(PipelineError):
|
||||
raise ReassemblyError("test")
|
||||
|
||||
def test_checksum_error_attributes(self):
|
||||
"""ChunkChecksumError carries sequence, expected, actual."""
|
||||
err = ChunkChecksumError(sequence=5, expected="aaa", actual="bbb")
|
||||
assert err.sequence == 5
|
||||
assert err.expected == "aaa"
|
||||
assert err.actual == "bbb"
|
||||
assert "5" in str(err)
|
||||
|
||||
def test_timeout_error_attributes(self):
|
||||
"""ProcessorTimeoutError carries sequence and timeout."""
|
||||
err = ProcessorTimeoutError(sequence=3, timeout=30.0)
|
||||
assert err.sequence == 3
|
||||
assert err.timeout == 30.0
|
||||
|
||||
def test_failure_error_attributes(self):
|
||||
"""ProcessorFailureError carries sequence, retries, original error."""
|
||||
original = RuntimeError("boom")
|
||||
err = ProcessorFailureError(sequence=1, retries=3, original_error=original)
|
||||
assert err.sequence == 1
|
||||
assert err.retries == 3
|
||||
assert err.original_error is original
|
||||
assert "boom" in str(err)
|
||||
144
tests/chunker/test_pipeline.py
Normal file
144
tests/chunker/test_pipeline.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Tests for Pipeline — end-to-end orchestration, stats, error handling.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — integration testing with mocked FFmpeg probe.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker import Pipeline
|
||||
from core.chunker.exceptions import PipelineError
|
||||
|
||||
|
||||
def mock_probe(duration):
|
||||
"""Create a mock ProbeResult with the given duration."""
|
||||
result = MagicMock()
|
||||
result.duration = duration
|
||||
return result
|
||||
|
||||
|
||||
class TestPipeline:
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_end_to_end(self, mock_pf, temp_file):
|
||||
"""Full pipeline processes a file successfully."""
|
||||
path = temp_file(b"x" * 4096)
|
||||
mock_pf.return_value = mock_probe(40.0)
|
||||
|
||||
result = Pipeline(
|
||||
source=path,
|
||||
chunk_duration=10.0,
|
||||
num_workers=2,
|
||||
processor_type="checksum",
|
||||
).run()
|
||||
|
||||
assert result.total_chunks == 4
|
||||
assert result.processed == 4
|
||||
assert result.failed == 0
|
||||
assert result.elapsed_time > 0
|
||||
assert result.chunks_in_order is True
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_throughput_calculated(self, mock_pf, temp_file):
|
||||
"""Pipeline calculates throughput."""
|
||||
path = temp_file(b"x" * 10000)
|
||||
mock_pf.return_value = mock_probe(30.0)
|
||||
|
||||
result = Pipeline(source=path, chunk_duration=10.0, num_workers=2).run()
|
||||
|
||||
assert result.throughput_mbps > 0
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_worker_stats(self, mock_pf, temp_file):
|
||||
"""Pipeline reports per-worker stats."""
|
||||
path = temp_file(b"x" * 4000)
|
||||
mock_pf.return_value = mock_probe(40.0)
|
||||
|
||||
result = Pipeline(
|
||||
source=path, chunk_duration=10.0, num_workers=2
|
||||
).run()
|
||||
|
||||
assert len(result.worker_stats) == 2
|
||||
for worker_id, stats in result.worker_stats.items():
|
||||
assert "processed" in stats
|
||||
assert "errors" in stats
|
||||
|
||||
def test_nonexistent_file(self):
|
||||
"""Non-existent file raises PipelineError."""
|
||||
with pytest.raises(PipelineError):
|
||||
Pipeline(source="/nonexistent/file.mp4").run()
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_event_callback(self, mock_pf, temp_file):
|
||||
"""Pipeline emits events through callback."""
|
||||
path = temp_file(b"x" * 2048)
|
||||
mock_pf.return_value = mock_probe(20.0)
|
||||
events = []
|
||||
|
||||
def capture(event_type, data):
|
||||
events.append(event_type)
|
||||
|
||||
Pipeline(
|
||||
source=path,
|
||||
chunk_duration=10.0,
|
||||
num_workers=1,
|
||||
event_callback=capture,
|
||||
).run()
|
||||
|
||||
assert "pipeline_start" in events
|
||||
assert "pipeline_complete" in events
|
||||
assert "chunk_queued" in events
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_simulated_decode_processor(self, mock_pf, temp_file):
|
||||
"""Pipeline works with simulated_decode processor."""
|
||||
path = temp_file(b"x" * 2048)
|
||||
mock_pf.return_value = mock_probe(20.0)
|
||||
|
||||
result = Pipeline(
|
||||
source=path,
|
||||
chunk_duration=10.0,
|
||||
num_workers=2,
|
||||
processor_type="simulated_decode",
|
||||
).run()
|
||||
|
||||
assert result.total_chunks == 2
|
||||
assert result.failed == 0
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_single_chunk_file(self, mock_pf, temp_file):
|
||||
"""Duration shorter than chunk_duration produces one chunk."""
|
||||
path = temp_file(b"x" * 100)
|
||||
mock_pf.return_value = mock_probe(5.0)
|
||||
|
||||
result = Pipeline(source=path, chunk_duration=10.0).run()
|
||||
|
||||
assert result.total_chunks == 1
|
||||
assert result.processed == 1
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_retries_tracked(self, mock_pf, temp_file):
|
||||
"""Pipeline result tracks total retries."""
|
||||
path = temp_file(b"x" * 2048)
|
||||
mock_pf.return_value = mock_probe(20.0)
|
||||
|
||||
result = Pipeline(source=path, chunk_duration=10.0).run()
|
||||
|
||||
assert result.retries >= 0 # Might be 0 if no failures
|
||||
|
||||
@patch("core.chunker.chunker.probe_file")
|
||||
def test_output_dir_and_chunk_files(self, mock_pf, temp_file):
|
||||
"""Pipeline tracks output_dir and chunk_files when set."""
|
||||
path = temp_file(b"x" * 1024)
|
||||
mock_pf.return_value = mock_probe(10.0)
|
||||
|
||||
result = Pipeline(
|
||||
source=path,
|
||||
chunk_duration=10.0,
|
||||
processor_type="checksum",
|
||||
).run()
|
||||
|
||||
# No output_dir set, so chunk_files should be empty
|
||||
assert result.output_dir is None
|
||||
assert result.chunk_files == []
|
||||
98
tests/chunker/test_processor.py
Normal file
98
tests/chunker/test_processor.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Tests for Processor implementations — ChecksumProcessor, SimulatedDecodeProcessor, CompositeProcessor.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — ABC contract, parametrized tests.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.exceptions import ChunkChecksumError
|
||||
from core.chunker.models import Chunk
|
||||
from core.chunker.processor import (
|
||||
ChecksumProcessor,
|
||||
CompositeProcessor,
|
||||
Processor,
|
||||
SimulatedDecodeProcessor,
|
||||
)
|
||||
|
||||
|
||||
class TestChecksumProcessor:
|
||||
def test_valid_time_range(self, sample_chunk):
|
||||
"""Valid time range passes."""
|
||||
proc = ChecksumProcessor()
|
||||
result = proc.process(sample_chunk)
|
||||
assert result.success is True
|
||||
assert result.checksum_valid is True
|
||||
assert result.processing_time > 0
|
||||
|
||||
def test_invalid_time_range(self):
|
||||
"""Invalid time range raises ChunkChecksumError."""
|
||||
chunk = Chunk(
|
||||
sequence=0,
|
||||
start_time=10.0,
|
||||
end_time=10.0, # zero duration
|
||||
source_path="/fake.mp4",
|
||||
duration=0.0,
|
||||
)
|
||||
proc = ChecksumProcessor()
|
||||
with pytest.raises(ChunkChecksumError) as exc_info:
|
||||
proc.process(chunk)
|
||||
assert exc_info.value.sequence == 0
|
||||
|
||||
def test_sequence_preserved(self, make_chunk):
|
||||
"""Result carries the chunk's sequence number."""
|
||||
chunk = make_chunk(42)
|
||||
proc = ChecksumProcessor()
|
||||
result = proc.process(chunk)
|
||||
assert result.sequence == 42
|
||||
|
||||
|
||||
class TestSimulatedDecodeProcessor:
|
||||
def test_processes_successfully(self, sample_chunk):
|
||||
"""Simulated decode always succeeds."""
|
||||
proc = SimulatedDecodeProcessor(ms_per_second=1.0)
|
||||
result = proc.process(sample_chunk)
|
||||
assert result.success is True
|
||||
assert result.processing_time > 0
|
||||
|
||||
def test_time_proportional_to_duration(self):
|
||||
"""Longer chunks take longer."""
|
||||
short = Chunk(0, 0.0, 1.0, "/fake.mp4", 1.0)
|
||||
long = Chunk(1, 0.0, 10.0, "/fake.mp4", 10.0)
|
||||
|
||||
proc = SimulatedDecodeProcessor(ms_per_second=50.0)
|
||||
r_short = proc.process(short)
|
||||
r_long = proc.process(long)
|
||||
|
||||
assert r_long.processing_time > r_short.processing_time
|
||||
|
||||
|
||||
class TestCompositeProcessor:
|
||||
def test_chains_processors(self, sample_chunk):
|
||||
"""Composite runs all processors in sequence."""
|
||||
proc = CompositeProcessor([
|
||||
ChecksumProcessor(),
|
||||
SimulatedDecodeProcessor(ms_per_second=1.0),
|
||||
])
|
||||
result = proc.process(sample_chunk)
|
||||
assert result.success is True
|
||||
|
||||
def test_stops_on_failure(self):
|
||||
"""If first processor raises, composite propagates the error."""
|
||||
bad_chunk = Chunk(0, 10.0, 10.0, "/fake.mp4", 0.0) # invalid range
|
||||
proc = CompositeProcessor([
|
||||
ChecksumProcessor(),
|
||||
SimulatedDecodeProcessor(ms_per_second=1.0),
|
||||
])
|
||||
with pytest.raises(ChunkChecksumError):
|
||||
proc.process(bad_chunk)
|
||||
|
||||
def test_requires_at_least_one(self):
|
||||
"""Empty processor list raises ValueError."""
|
||||
with pytest.raises(ValueError, match="at least one"):
|
||||
CompositeProcessor([])
|
||||
|
||||
def test_is_processor(self):
|
||||
"""CompositeProcessor is a Processor."""
|
||||
proc = CompositeProcessor([ChecksumProcessor()])
|
||||
assert isinstance(proc, Processor)
|
||||
115
tests/chunker/test_queue.py
Normal file
115
tests/chunker/test_queue.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Tests for ChunkQueue — backpressure, sentinel shutdown, timeout behavior.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — concurrency testing.
|
||||
"""
|
||||
|
||||
import queue
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.queue import ChunkQueue
|
||||
|
||||
|
||||
class TestChunkQueue:
|
||||
def test_put_and_get(self, make_chunk):
|
||||
"""Basic put/get cycle."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
chunk = make_chunk(0)
|
||||
q.put(chunk)
|
||||
result = q.get(timeout=1.0)
|
||||
assert result.sequence == 0
|
||||
|
||||
def test_fifo_order(self, make_chunk):
|
||||
"""Items come out in FIFO order."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
for i in range(3):
|
||||
q.put(make_chunk(i))
|
||||
|
||||
for i in range(3):
|
||||
assert q.get(timeout=1.0).sequence == i
|
||||
|
||||
def test_close_returns_none(self, make_chunk):
|
||||
"""After close(), get() returns None (sentinel)."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.put(make_chunk(0))
|
||||
q.close()
|
||||
|
||||
result = q.get(timeout=1.0)
|
||||
assert result.sequence == 0
|
||||
|
||||
# Next get should hit sentinel
|
||||
result = q.get(timeout=1.0)
|
||||
assert result is None
|
||||
|
||||
def test_close_propagates_to_multiple_consumers(self, make_chunk):
|
||||
"""Sentinel propagates: multiple consumers all get None."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.close()
|
||||
|
||||
# Multiple consumers should all see None
|
||||
assert q.get(timeout=1.0) is None
|
||||
assert q.get(timeout=1.0) is None
|
||||
|
||||
def test_is_closed(self):
|
||||
"""is_closed reflects state."""
|
||||
q = ChunkQueue()
|
||||
assert not q.is_closed
|
||||
q.close()
|
||||
assert q.is_closed
|
||||
|
||||
def test_qsize(self, make_chunk):
|
||||
"""qsize tracks approximate queue depth."""
|
||||
q = ChunkQueue(maxsize=10)
|
||||
assert q.qsize() == 0
|
||||
|
||||
q.put(make_chunk(0))
|
||||
q.put(make_chunk(1))
|
||||
assert q.qsize() == 2
|
||||
|
||||
q.get(timeout=1.0)
|
||||
assert q.qsize() == 1
|
||||
|
||||
def test_backpressure_blocks(self, make_chunk):
|
||||
"""Put blocks when queue is full (backpressure)."""
|
||||
q = ChunkQueue(maxsize=2)
|
||||
q.put(make_chunk(0))
|
||||
q.put(make_chunk(1))
|
||||
|
||||
# Queue is full — put with short timeout should raise
|
||||
with pytest.raises(queue.Full):
|
||||
q.put(make_chunk(2), timeout=0.05)
|
||||
|
||||
def test_get_timeout(self):
|
||||
"""Get on empty queue with timeout raises Empty."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
|
||||
with pytest.raises(queue.Empty):
|
||||
q.get(timeout=0.05)
|
||||
|
||||
def test_concurrent_put_get(self, make_chunk):
|
||||
"""Producer/consumer threads work correctly."""
|
||||
q = ChunkQueue(maxsize=3)
|
||||
results = []
|
||||
|
||||
def producer():
|
||||
for i in range(10):
|
||||
q.put(make_chunk(i))
|
||||
q.close()
|
||||
|
||||
def consumer():
|
||||
while True:
|
||||
item = q.get(timeout=2.0)
|
||||
if item is None:
|
||||
break
|
||||
results.append(item.sequence)
|
||||
|
||||
t1 = threading.Thread(target=producer)
|
||||
t2 = threading.Thread(target=consumer)
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join(timeout=5.0)
|
||||
t2.join(timeout=5.0)
|
||||
|
||||
assert sorted(results) == list(range(10))
|
||||
127
tests/chunker/test_worker.py
Normal file
127
tests/chunker/test_worker.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""
|
||||
Tests for Worker — processing, retry with backoff, error handling.
|
||||
|
||||
Demonstrates: TDD (Interview Topic 8) — mocking processors, testing retry logic.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from core.chunker.models import Chunk, ChunkResult
|
||||
from core.chunker.processor import Processor
|
||||
from core.chunker.queue import ChunkQueue
|
||||
from core.chunker.worker import Worker
|
||||
|
||||
|
||||
class FailNTimesProcessor(Processor):
|
||||
"""Test processor that fails N times then succeeds."""
|
||||
|
||||
def __init__(self, fail_count: int):
|
||||
self.fail_count = fail_count
|
||||
self.call_count = 0
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
self.call_count += 1
|
||||
if self.call_count <= self.fail_count:
|
||||
raise RuntimeError(f"Simulated failure #{self.call_count}")
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=True,
|
||||
processing_time=0.001,
|
||||
)
|
||||
|
||||
|
||||
class AlwaysFailProcessor(Processor):
|
||||
"""Test processor that always fails."""
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
raise RuntimeError("Always fails")
|
||||
|
||||
|
||||
class TestWorker:
|
||||
def test_processes_chunks(self, make_chunk):
|
||||
"""Worker processes all chunks from queue."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
for i in range(3):
|
||||
q.put(make_chunk(i))
|
||||
q.close()
|
||||
|
||||
from core.chunker.processor import ChecksumProcessor
|
||||
worker = Worker("w-0", q, ChecksumProcessor(), max_retries=0)
|
||||
results = worker.run()
|
||||
|
||||
assert len(results) == 3
|
||||
assert all(r.success for r in results)
|
||||
|
||||
def test_retry_on_failure(self, make_chunk):
|
||||
"""Worker retries on processor failure."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.put(make_chunk(0))
|
||||
q.close()
|
||||
|
||||
proc = FailNTimesProcessor(fail_count=2)
|
||||
worker = Worker("w-0", q, proc, max_retries=3)
|
||||
results = worker.run()
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].success is True
|
||||
assert results[0].retries == 2
|
||||
assert proc.call_count == 3 # 2 failures + 1 success
|
||||
|
||||
def test_max_retries_exceeded(self, make_chunk):
|
||||
"""Worker gives up after max retries."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.put(make_chunk(0))
|
||||
q.close()
|
||||
|
||||
worker = Worker("w-0", q, AlwaysFailProcessor(), max_retries=2)
|
||||
results = worker.run()
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].success is False
|
||||
assert results[0].error is not None
|
||||
assert worker.error_count == 1
|
||||
|
||||
def test_worker_id_on_results(self, make_chunk):
|
||||
"""Worker stamps its ID on results."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.put(make_chunk(0))
|
||||
q.close()
|
||||
|
||||
from core.chunker.processor import ChecksumProcessor
|
||||
worker = Worker("worker-7", q, ChecksumProcessor())
|
||||
results = worker.run()
|
||||
|
||||
assert results[0].worker_id == "worker-7"
|
||||
|
||||
def test_event_callback(self, make_chunk):
|
||||
"""Worker emits events via callback."""
|
||||
q = ChunkQueue(maxsize=5)
|
||||
q.put(make_chunk(0))
|
||||
q.close()
|
||||
|
||||
events = []
|
||||
callback = MagicMock(side_effect=lambda t, d: events.append((t, d)))
|
||||
|
||||
from core.chunker.processor import ChecksumProcessor
|
||||
worker = Worker("w-0", q, ChecksumProcessor(), event_callback=callback)
|
||||
worker.run()
|
||||
|
||||
event_types = [e[0] for e in events]
|
||||
assert "worker_status" in event_types
|
||||
assert "chunk_processing" in event_types
|
||||
assert "chunk_done" in event_types
|
||||
|
||||
def test_processed_count(self, make_chunk):
|
||||
"""Worker tracks processed count."""
|
||||
q = ChunkQueue(maxsize=10)
|
||||
for i in range(5):
|
||||
q.put(make_chunk(i))
|
||||
q.close()
|
||||
|
||||
from core.chunker.processor import ChecksumProcessor
|
||||
worker = Worker("w-0", q, ChecksumProcessor())
|
||||
worker.run()
|
||||
|
||||
assert worker.processed_count == 5
|
||||
Reference in New Issue
Block a user