chunker and ui

2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/chunker/init.py
+++ b/tests/chunker/init.py
--- a/tests/chunker/conftest.py
+++ b/tests/chunker/conftest.py
@@ -0,0 +1,76 @@
+"""
+Shared fixtures for chunker tests.
+
+Demonstrates: TDD and unit testing best practices (Interview Topic 8) — fixtures, temp files.
+"""
+
+import os
+import tempfile
+
+import pytest
+
+from core.chunker.models import Chunk, ChunkResult
+
+
+@pytest.fixture
+def temp_file():
+    """Create a temporary file with known content, cleaned up after test."""
+    files = []
+
+    def _create(content: bytes = b"x" * 4096):
+        f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        f.write(content)
+        f.close()
+        files.append(f.name)
+        return f.name
+
+    yield _create
+
+    for path in files:
+        if os.path.exists(path):
+            os.unlink(path)
+
+
+@pytest.fixture
+def sample_chunk(temp_file):
+    """Create a sample time-based Chunk with valid time range."""
+    path = temp_file(b"x" * 1024)
+    return Chunk(
+        sequence=0,
+        start_time=0.0,
+        end_time=10.0,
+        source_path=path,
+        duration=10.0,
+    )
+
+
+@pytest.fixture
+def make_chunk(temp_file):
+    """Factory fixture for creating time-based chunks with specific sequence numbers."""
+    path = temp_file(b"x" * 1024)
+
+    def _make(sequence: int, duration: float = 10.0) -> Chunk:
+        start = sequence * duration
+        return Chunk(
+            sequence=sequence,
+            start_time=start,
+            end_time=start + duration,
+            source_path=path,
+            duration=duration,
+        )
+
+    return _make
+
+
+@pytest.fixture
+def make_result():
+    """Factory fixture for creating ChunkResults."""
+
+    def _make(sequence: int, success: bool = True, processing_time: float = 0.01) -> ChunkResult:
+        return ChunkResult(
+            sequence=sequence,
+            success=success,
+            processing_time=processing_time,
+        )
+
+    return _make
--- a/tests/chunker/test_chunker.py
+++ b/tests/chunker/test_chunker.py
@@ -0,0 +1,149 @@
+"""
+Tests for Chunker — time-based segmentation, chunk counts, sequence numbers, generator behavior.
+
+Demonstrates: TDD (Interview Topic 8) — parametrized tests, edge cases, mocking.
+"""
+
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from core.chunker import Chunker
+from core.chunker.exceptions import ChunkReadError
+
+
+def mock_probe(duration):
+    """Create a mock probe_file that returns the given duration."""
+    result = MagicMock()
+    result.duration = duration
+    return result
+
+
+class TestChunker:
+    @patch("core.chunker.chunker.probe_file")
+    def test_basic_chunking(self, mock_pf, temp_file):
+        """File splits into expected number of time-based chunks."""
+        path = temp_file(b"x" * 1000)
+        mock_pf.return_value = mock_probe(30.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        chunks = list(chunker.chunks())
+
+        assert len(chunks) == 3
+        assert chunks[0].start_time == 0.0
+        assert chunks[0].end_time == 10.0
+        assert chunks[0].duration == 10.0
+        assert chunks[1].start_time == 10.0
+        assert chunks[2].start_time == 20.0
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_sequence_numbers(self, mock_pf, temp_file):
+        """Chunks have sequential sequence numbers starting at 0."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(40.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        chunks = list(chunker.chunks())
+        sequences = [c.sequence for c in chunks]
+
+        assert sequences == [0, 1, 2, 3]
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_time_ranges(self, mock_pf, temp_file):
+        """Each chunk has correct start_time and end_time."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(25.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        chunks = list(chunker.chunks())
+
+        assert chunks[0].start_time == 0.0
+        assert chunks[0].end_time == 10.0
+        assert chunks[1].start_time == 10.0
+        assert chunks[1].end_time == 20.0
+        assert chunks[2].start_time == 20.0
+        assert chunks[2].end_time == 25.0  # last chunk shorter
+        assert chunks[2].duration == 5.0
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_expected_chunks_property(self, mock_pf, temp_file):
+        """expected_chunks calculates correctly before iteration."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(25.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        assert chunker.expected_chunks == 3  # ceil(25/10)
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_source_path_on_chunks(self, mock_pf, temp_file):
+        """Each chunk carries the source file path."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(10.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        chunks = list(chunker.chunks())
+
+        assert all(c.source_path == path for c in chunks)
+
+    def test_file_not_found(self):
+        """Non-existent file raises ChunkReadError."""
+        with pytest.raises(ChunkReadError, match="File not found"):
+            Chunker("/nonexistent/file.mp4")
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_invalid_chunk_duration(self, mock_pf, temp_file):
+        """Zero or negative chunk_duration raises ValueError."""
+        path = temp_file(b"x" * 100)
+
+        with pytest.raises(ValueError, match="chunk_duration must be positive"):
+            Chunker(path, chunk_duration=0)
+
+        with pytest.raises(ValueError, match="chunk_duration must be positive"):
+            Chunker(path, chunk_duration=-1)
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_generator_laziness(self, mock_pf, temp_file):
+        """Chunks are yielded lazily, not pre-loaded."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(30.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        gen = chunker.chunks()
+        first = next(gen)
+        assert first.sequence == 0
+        # Generator is not exhausted — remaining chunks still pending
+
+    @pytest.mark.parametrize("duration,chunk_dur,expected", [
+        (10.0, 10.0, 1),
+        (10.1, 10.0, 2),
+        (1.0, 1.0, 1),
+        (100.0, 1.0, 100),
+        (5.0, 100.0, 1),
+    ])
+    @patch("core.chunker.chunker.probe_file")
+    def test_expected_chunks_parametrized(self, mock_pf, temp_file, duration, chunk_dur, expected):
+        """Parametrized: various duration/chunk_duration combos."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(duration)
+        chunker = Chunker(path, chunk_duration=chunk_dur)
+        assert chunker.expected_chunks == expected
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_exact_multiple(self, mock_pf, temp_file):
+        """Duration exactly divisible by chunk_duration."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(30.0)
+
+        chunker = Chunker(path, chunk_duration=10.0)
+        chunks = list(chunker.chunks())
+        assert len(chunks) == 3
+        assert all(c.duration == 10.0 for c in chunks)
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_probe_failure(self, mock_pf, temp_file):
+        """Probe failure raises ChunkReadError."""
+        path = temp_file(b"x" * 100)
+        mock_pf.side_effect = Exception("ffprobe failed")
+
+        with pytest.raises(ChunkReadError, match="Failed to probe"):
+            Chunker(path, chunk_duration=10.0)
--- a/tests/chunker/test_collector.py
+++ b/tests/chunker/test_collector.py
@@ -0,0 +1,103 @@
+"""
+Tests for ResultCollector — ordered reassembly, out-of-order buffering, duplicates.
+
+Demonstrates: TDD (Interview Topic 8) — testing algorithms (heapq reassembly).
+"""
+
+import pytest
+
+from core.chunker.collector import ResultCollector
+from core.chunker.exceptions import ReassemblyError
+
+
+class TestResultCollector:
+    def test_in_order_emission(self, make_result):
+        """Results arriving in order are emitted immediately."""
+        collector = ResultCollector(total_chunks=3)
+
+        emitted = collector.add(make_result(0))
+        assert len(emitted) == 1
+        assert emitted[0].sequence == 0
+
+        emitted = collector.add(make_result(1))
+        assert len(emitted) == 1
+
+        emitted = collector.add(make_result(2))
+        assert len(emitted) == 1
+
+        assert collector.is_complete
+
+    def test_out_of_order_buffering(self, make_result):
+        """Out-of-order results are buffered until gaps fill."""
+        collector = ResultCollector(total_chunks=3)
+
+        # Arrive: 2, 0, 1
+        emitted = collector.add(make_result(2))
+        assert len(emitted) == 0
+        assert collector.buffered_count == 1
+
+        emitted = collector.add(make_result(0))
+        assert len(emitted) == 1  # Only 0 emitted, 1 still missing
+
+        emitted = collector.add(make_result(1))
+        assert len(emitted) == 2  # 1 and 2 now emittable
+        assert collector.is_complete
+
+    def test_reverse_order(self, make_result):
+        """All results arrive in reverse — only last add emits everything."""
+        collector = ResultCollector(total_chunks=4)
+
+        for seq in [3, 2, 1]:
+            emitted = collector.add(make_result(seq))
+            assert len(emitted) == 0
+
+        emitted = collector.add(make_result(0))
+        assert len(emitted) == 4
+        assert collector.is_complete
+
+    def test_duplicate_raises(self, make_result):
+        """Duplicate sequence number raises ReassemblyError."""
+        collector = ResultCollector(total_chunks=3)
+        collector.add(make_result(0))
+
+        with pytest.raises(ReassemblyError, match="Duplicate"):
+            collector.add(make_result(0))
+
+    def test_emitted_count(self, make_result):
+        """emitted_count tracks correctly."""
+        collector = ResultCollector(total_chunks=3)
+        assert collector.emitted_count == 0
+
+        collector.add(make_result(0))
+        assert collector.emitted_count == 1
+
+        collector.add(make_result(2))  # buffered
+        assert collector.emitted_count == 1
+
+        collector.add(make_result(1))  # releases 1 and 2
+        assert collector.emitted_count == 3
+
+    def test_get_ordered_results(self, make_result):
+        """get_ordered_results returns all emitted results in order."""
+        collector = ResultCollector(total_chunks=3)
+        collector.add(make_result(2))
+        collector.add(make_result(0))
+        collector.add(make_result(1))
+
+        ordered = collector.get_ordered_results()
+        assert [r.sequence for r in ordered] == [0, 1, 2]
+
+    def test_avg_processing_time(self, make_result):
+        """Average processing time from sliding window."""
+        collector = ResultCollector(total_chunks=2)
+        collector.add(make_result(0, processing_time=0.1))
+        collector.add(make_result(1, processing_time=0.3))
+
+        assert abs(collector.avg_processing_time - 0.2) < 0.001
+
+    def test_not_complete_when_partial(self, make_result):
+        """is_complete is False until all chunks emitted."""
+        collector = ResultCollector(total_chunks=3)
+        collector.add(make_result(0))
+        collector.add(make_result(1))
+        assert not collector.is_complete
--- a/tests/chunker/test_exceptions.py
+++ b/tests/chunker/test_exceptions.py
@@ -0,0 +1,69 @@
+"""
+Tests for exception hierarchy — catch patterns, attributes.
+
+Demonstrates: TDD (Interview Topic 8) — testing exception design.
+"""
+
+import pytest
+
+from core.chunker.exceptions import (
+    ChunkChecksumError,
+    ChunkError,
+    ChunkReadError,
+    PipelineError,
+    ProcessingError,
+    ProcessorFailureError,
+    ProcessorTimeoutError,
+    ReassemblyError,
+)
+
+
+class TestExceptionHierarchy:
+    """Verify the exception class hierarchy and catch patterns."""
+
+    def test_pipeline_error_is_base(self):
+        """All chunker exceptions inherit from PipelineError."""
+        assert issubclass(ChunkError, PipelineError)
+        assert issubclass(ProcessingError, PipelineError)
+        assert issubclass(ReassemblyError, PipelineError)
+
+    def test_chunk_error_subtypes(self):
+        """ChunkReadError and ChunkChecksumError are ChunkErrors."""
+        assert issubclass(ChunkReadError, ChunkError)
+        assert issubclass(ChunkChecksumError, ChunkError)
+
+    def test_processing_error_subtypes(self):
+        """ProcessorTimeoutError and ProcessorFailureError are ProcessingErrors."""
+        assert issubclass(ProcessorTimeoutError, ProcessingError)
+        assert issubclass(ProcessorFailureError, ProcessingError)
+
+    def test_catch_pipeline_error_catches_all(self):
+        """Catching PipelineError catches any subtype."""
+        with pytest.raises(PipelineError):
+            raise ChunkReadError("test")
+
+        with pytest.raises(PipelineError):
+            raise ReassemblyError("test")
+
+    def test_checksum_error_attributes(self):
+        """ChunkChecksumError carries sequence, expected, actual."""
+        err = ChunkChecksumError(sequence=5, expected="aaa", actual="bbb")
+        assert err.sequence == 5
+        assert err.expected == "aaa"
+        assert err.actual == "bbb"
+        assert "5" in str(err)
+
+    def test_timeout_error_attributes(self):
+        """ProcessorTimeoutError carries sequence and timeout."""
+        err = ProcessorTimeoutError(sequence=3, timeout=30.0)
+        assert err.sequence == 3
+        assert err.timeout == 30.0
+
+    def test_failure_error_attributes(self):
+        """ProcessorFailureError carries sequence, retries, original error."""
+        original = RuntimeError("boom")
+        err = ProcessorFailureError(sequence=1, retries=3, original_error=original)
+        assert err.sequence == 1
+        assert err.retries == 3
+        assert err.original_error is original
+        assert "boom" in str(err)
--- a/tests/chunker/test_pipeline.py
+++ b/tests/chunker/test_pipeline.py
@@ -0,0 +1,144 @@
+"""
+Tests for Pipeline — end-to-end orchestration, stats, error handling.
+
+Demonstrates: TDD (Interview Topic 8) — integration testing with mocked FFmpeg probe.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from core.chunker import Pipeline
+from core.chunker.exceptions import PipelineError
+
+
+def mock_probe(duration):
+    """Create a mock ProbeResult with the given duration."""
+    result = MagicMock()
+    result.duration = duration
+    return result
+
+
+class TestPipeline:
+    @patch("core.chunker.chunker.probe_file")
+    def test_end_to_end(self, mock_pf, temp_file):
+        """Full pipeline processes a file successfully."""
+        path = temp_file(b"x" * 4096)
+        mock_pf.return_value = mock_probe(40.0)
+
+        result = Pipeline(
+            source=path,
+            chunk_duration=10.0,
+            num_workers=2,
+            processor_type="checksum",
+        ).run()
+
+        assert result.total_chunks == 4
+        assert result.processed == 4
+        assert result.failed == 0
+        assert result.elapsed_time > 0
+        assert result.chunks_in_order is True
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_throughput_calculated(self, mock_pf, temp_file):
+        """Pipeline calculates throughput."""
+        path = temp_file(b"x" * 10000)
+        mock_pf.return_value = mock_probe(30.0)
+
+        result = Pipeline(source=path, chunk_duration=10.0, num_workers=2).run()
+
+        assert result.throughput_mbps > 0
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_worker_stats(self, mock_pf, temp_file):
+        """Pipeline reports per-worker stats."""
+        path = temp_file(b"x" * 4000)
+        mock_pf.return_value = mock_probe(40.0)
+
+        result = Pipeline(
+            source=path, chunk_duration=10.0, num_workers=2
+        ).run()
+
+        assert len(result.worker_stats) == 2
+        for worker_id, stats in result.worker_stats.items():
+            assert "processed" in stats
+            assert "errors" in stats
+
+    def test_nonexistent_file(self):
+        """Non-existent file raises PipelineError."""
+        with pytest.raises(PipelineError):
+            Pipeline(source="/nonexistent/file.mp4").run()
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_event_callback(self, mock_pf, temp_file):
+        """Pipeline emits events through callback."""
+        path = temp_file(b"x" * 2048)
+        mock_pf.return_value = mock_probe(20.0)
+        events = []
+
+        def capture(event_type, data):
+            events.append(event_type)
+
+        Pipeline(
+            source=path,
+            chunk_duration=10.0,
+            num_workers=1,
+            event_callback=capture,
+        ).run()
+
+        assert "pipeline_start" in events
+        assert "pipeline_complete" in events
+        assert "chunk_queued" in events
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_simulated_decode_processor(self, mock_pf, temp_file):
+        """Pipeline works with simulated_decode processor."""
+        path = temp_file(b"x" * 2048)
+        mock_pf.return_value = mock_probe(20.0)
+
+        result = Pipeline(
+            source=path,
+            chunk_duration=10.0,
+            num_workers=2,
+            processor_type="simulated_decode",
+        ).run()
+
+        assert result.total_chunks == 2
+        assert result.failed == 0
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_single_chunk_file(self, mock_pf, temp_file):
+        """Duration shorter than chunk_duration produces one chunk."""
+        path = temp_file(b"x" * 100)
+        mock_pf.return_value = mock_probe(5.0)
+
+        result = Pipeline(source=path, chunk_duration=10.0).run()
+
+        assert result.total_chunks == 1
+        assert result.processed == 1
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_retries_tracked(self, mock_pf, temp_file):
+        """Pipeline result tracks total retries."""
+        path = temp_file(b"x" * 2048)
+        mock_pf.return_value = mock_probe(20.0)
+
+        result = Pipeline(source=path, chunk_duration=10.0).run()
+
+        assert result.retries >= 0  # Might be 0 if no failures
+
+    @patch("core.chunker.chunker.probe_file")
+    def test_output_dir_and_chunk_files(self, mock_pf, temp_file):
+        """Pipeline tracks output_dir and chunk_files when set."""
+        path = temp_file(b"x" * 1024)
+        mock_pf.return_value = mock_probe(10.0)
+
+        result = Pipeline(
+            source=path,
+            chunk_duration=10.0,
+            processor_type="checksum",
+        ).run()
+
+        # No output_dir set, so chunk_files should be empty
+        assert result.output_dir is None
+        assert result.chunk_files == []
--- a/tests/chunker/test_processor.py
+++ b/tests/chunker/test_processor.py
@@ -0,0 +1,98 @@
+"""
+Tests for Processor implementations — ChecksumProcessor, SimulatedDecodeProcessor, CompositeProcessor.
+
+Demonstrates: TDD (Interview Topic 8) — ABC contract, parametrized tests.
+"""
+
+import pytest
+
+from core.chunker.exceptions import ChunkChecksumError
+from core.chunker.models import Chunk
+from core.chunker.processor import (
+    ChecksumProcessor,
+    CompositeProcessor,
+    Processor,
+    SimulatedDecodeProcessor,
+)
+
+
+class TestChecksumProcessor:
+    def test_valid_time_range(self, sample_chunk):
+        """Valid time range passes."""
+        proc = ChecksumProcessor()
+        result = proc.process(sample_chunk)
+        assert result.success is True
+        assert result.checksum_valid is True
+        assert result.processing_time > 0
+
+    def test_invalid_time_range(self):
+        """Invalid time range raises ChunkChecksumError."""
+        chunk = Chunk(
+            sequence=0,
+            start_time=10.0,
+            end_time=10.0,  # zero duration
+            source_path="/fake.mp4",
+            duration=0.0,
+        )
+        proc = ChecksumProcessor()
+        with pytest.raises(ChunkChecksumError) as exc_info:
+            proc.process(chunk)
+        assert exc_info.value.sequence == 0
+
+    def test_sequence_preserved(self, make_chunk):
+        """Result carries the chunk's sequence number."""
+        chunk = make_chunk(42)
+        proc = ChecksumProcessor()
+        result = proc.process(chunk)
+        assert result.sequence == 42
+
+
+class TestSimulatedDecodeProcessor:
+    def test_processes_successfully(self, sample_chunk):
+        """Simulated decode always succeeds."""
+        proc = SimulatedDecodeProcessor(ms_per_second=1.0)
+        result = proc.process(sample_chunk)
+        assert result.success is True
+        assert result.processing_time > 0
+
+    def test_time_proportional_to_duration(self):
+        """Longer chunks take longer."""
+        short = Chunk(0, 0.0, 1.0, "/fake.mp4", 1.0)
+        long = Chunk(1, 0.0, 10.0, "/fake.mp4", 10.0)
+
+        proc = SimulatedDecodeProcessor(ms_per_second=50.0)
+        r_short = proc.process(short)
+        r_long = proc.process(long)
+
+        assert r_long.processing_time > r_short.processing_time
+
+
+class TestCompositeProcessor:
+    def test_chains_processors(self, sample_chunk):
+        """Composite runs all processors in sequence."""
+        proc = CompositeProcessor([
+            ChecksumProcessor(),
+            SimulatedDecodeProcessor(ms_per_second=1.0),
+        ])
+        result = proc.process(sample_chunk)
+        assert result.success is True
+
+    def test_stops_on_failure(self):
+        """If first processor raises, composite propagates the error."""
+        bad_chunk = Chunk(0, 10.0, 10.0, "/fake.mp4", 0.0)  # invalid range
+        proc = CompositeProcessor([
+            ChecksumProcessor(),
+            SimulatedDecodeProcessor(ms_per_second=1.0),
+        ])
+        with pytest.raises(ChunkChecksumError):
+            proc.process(bad_chunk)
+
+    def test_requires_at_least_one(self):
+        """Empty processor list raises ValueError."""
+        with pytest.raises(ValueError, match="at least one"):
+            CompositeProcessor([])
+
+    def test_is_processor(self):
+        """CompositeProcessor is a Processor."""
+        proc = CompositeProcessor([ChecksumProcessor()])
+        assert isinstance(proc, Processor)
--- a/tests/chunker/test_queue.py
+++ b/tests/chunker/test_queue.py
@@ -0,0 +1,115 @@
+"""
+Tests for ChunkQueue — backpressure, sentinel shutdown, timeout behavior.
+
+Demonstrates: TDD (Interview Topic 8) — concurrency testing.
+"""
+
+import queue
+import threading
+
+import pytest
+
+from core.chunker.queue import ChunkQueue
+
+
+class TestChunkQueue:
+    def test_put_and_get(self, make_chunk):
+        """Basic put/get cycle."""
+        q = ChunkQueue(maxsize=5)
+        chunk = make_chunk(0)
+        q.put(chunk)
+        result = q.get(timeout=1.0)
+        assert result.sequence == 0
+
+    def test_fifo_order(self, make_chunk):
+        """Items come out in FIFO order."""
+        q = ChunkQueue(maxsize=5)
+        for i in range(3):
+            q.put(make_chunk(i))
+
+        for i in range(3):
+            assert q.get(timeout=1.0).sequence == i
+
+    def test_close_returns_none(self, make_chunk):
+        """After close(), get() returns None (sentinel)."""
+        q = ChunkQueue(maxsize=5)
+        q.put(make_chunk(0))
+        q.close()
+
+        result = q.get(timeout=1.0)
+        assert result.sequence == 0
+
+        # Next get should hit sentinel
+        result = q.get(timeout=1.0)
+        assert result is None
+
+    def test_close_propagates_to_multiple_consumers(self, make_chunk):
+        """Sentinel propagates: multiple consumers all get None."""
+        q = ChunkQueue(maxsize=5)
+        q.close()
+
+        # Multiple consumers should all see None
+        assert q.get(timeout=1.0) is None
+        assert q.get(timeout=1.0) is None
+
+    def test_is_closed(self):
+        """is_closed reflects state."""
+        q = ChunkQueue()
+        assert not q.is_closed
+        q.close()
+        assert q.is_closed
+
+    def test_qsize(self, make_chunk):
+        """qsize tracks approximate queue depth."""
+        q = ChunkQueue(maxsize=10)
+        assert q.qsize() == 0
+
+        q.put(make_chunk(0))
+        q.put(make_chunk(1))
+        assert q.qsize() == 2
+
+        q.get(timeout=1.0)
+        assert q.qsize() == 1
+
+    def test_backpressure_blocks(self, make_chunk):
+        """Put blocks when queue is full (backpressure)."""
+        q = ChunkQueue(maxsize=2)
+        q.put(make_chunk(0))
+        q.put(make_chunk(1))
+
+        # Queue is full — put with short timeout should raise
+        with pytest.raises(queue.Full):
+            q.put(make_chunk(2), timeout=0.05)
+
+    def test_get_timeout(self):
+        """Get on empty queue with timeout raises Empty."""
+        q = ChunkQueue(maxsize=5)
+
+        with pytest.raises(queue.Empty):
+            q.get(timeout=0.05)
+
+    def test_concurrent_put_get(self, make_chunk):
+        """Producer/consumer threads work correctly."""
+        q = ChunkQueue(maxsize=3)
+        results = []
+
+        def producer():
+            for i in range(10):
+                q.put(make_chunk(i))
+            q.close()
+
+        def consumer():
+            while True:
+                item = q.get(timeout=2.0)
+                if item is None:
+                    break
+                results.append(item.sequence)
+
+        t1 = threading.Thread(target=producer)
+        t2 = threading.Thread(target=consumer)
+        t1.start()
+        t2.start()
+        t1.join(timeout=5.0)
+        t2.join(timeout=5.0)
+
+        assert sorted(results) == list(range(10))
--- a/tests/chunker/test_worker.py
+++ b/tests/chunker/test_worker.py
@@ -0,0 +1,127 @@
+"""
+Tests for Worker — processing, retry with backoff, error handling.
+
+Demonstrates: TDD (Interview Topic 8) — mocking processors, testing retry logic.
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from core.chunker.models import Chunk, ChunkResult
+from core.chunker.processor import Processor
+from core.chunker.queue import ChunkQueue
+from core.chunker.worker import Worker
+
+
+class FailNTimesProcessor(Processor):
+    """Test processor that fails N times then succeeds."""
+
+    def __init__(self, fail_count: int):
+        self.fail_count = fail_count
+        self.call_count = 0
+
+    def process(self, chunk: Chunk) -> ChunkResult:
+        self.call_count += 1
+        if self.call_count <= self.fail_count:
+            raise RuntimeError(f"Simulated failure #{self.call_count}")
+        return ChunkResult(
+            sequence=chunk.sequence,
+            success=True,
+            processing_time=0.001,
+        )
+
+
+class AlwaysFailProcessor(Processor):
+    """Test processor that always fails."""
+
+    def process(self, chunk: Chunk) -> ChunkResult:
+        raise RuntimeError("Always fails")
+
+
+class TestWorker:
+    def test_processes_chunks(self, make_chunk):
+        """Worker processes all chunks from queue."""
+        q = ChunkQueue(maxsize=5)
+        for i in range(3):
+            q.put(make_chunk(i))
+        q.close()
+
+        from core.chunker.processor import ChecksumProcessor
+        worker = Worker("w-0", q, ChecksumProcessor(), max_retries=0)
+        results = worker.run()
+
+        assert len(results) == 3
+        assert all(r.success for r in results)
+
+    def test_retry_on_failure(self, make_chunk):
+        """Worker retries on processor failure."""
+        q = ChunkQueue(maxsize=5)
+        q.put(make_chunk(0))
+        q.close()
+
+        proc = FailNTimesProcessor(fail_count=2)
+        worker = Worker("w-0", q, proc, max_retries=3)
+        results = worker.run()
+
+        assert len(results) == 1
+        assert results[0].success is True
+        assert results[0].retries == 2
+        assert proc.call_count == 3  # 2 failures + 1 success
+
+    def test_max_retries_exceeded(self, make_chunk):
+        """Worker gives up after max retries."""
+        q = ChunkQueue(maxsize=5)
+        q.put(make_chunk(0))
+        q.close()
+
+        worker = Worker("w-0", q, AlwaysFailProcessor(), max_retries=2)
+        results = worker.run()
+
+        assert len(results) == 1
+        assert results[0].success is False
+        assert results[0].error is not None
+        assert worker.error_count == 1
+
+    def test_worker_id_on_results(self, make_chunk):
+        """Worker stamps its ID on results."""
+        q = ChunkQueue(maxsize=5)
+        q.put(make_chunk(0))
+        q.close()
+
+        from core.chunker.processor import ChecksumProcessor
+        worker = Worker("worker-7", q, ChecksumProcessor())
+        results = worker.run()
+
+        assert results[0].worker_id == "worker-7"
+
+    def test_event_callback(self, make_chunk):
+        """Worker emits events via callback."""
+        q = ChunkQueue(maxsize=5)
+        q.put(make_chunk(0))
+        q.close()
+
+        events = []
+        callback = MagicMock(side_effect=lambda t, d: events.append((t, d)))
+
+        from core.chunker.processor import ChecksumProcessor
+        worker = Worker("w-0", q, ChecksumProcessor(), event_callback=callback)
+        worker.run()
+
+        event_types = [e[0] for e in events]
+        assert "worker_status" in event_types
+        assert "chunk_processing" in event_types
+        assert "chunk_done" in event_types
+
+    def test_processed_count(self, make_chunk):
+        """Worker tracks processed count."""
+        q = ChunkQueue(maxsize=10)
+        for i in range(5):
+            q.put(make_chunk(i))
+        q.close()
+
+        from core.chunker.processor import ChecksumProcessor
+        worker = Worker("w-0", q, ChecksumProcessor())
+        worker.run()
+
+        assert worker.processed_count == 5