mediaproc/tests/detect/test_brand_resolver.py

"""Tests for BrandResolver stage."""

import numpy as np
import pytest

from detect.models import BoundingBox, Frame, TextCandidate
from detect.profiles.base import BrandDictionary, ResolverConfig
from detect.stages.brand_resolver import resolve_brands, _exact_match, _fuzzy_match


DICTIONARY = BrandDictionary(brands={
    "Nike": ["nike", "NIKE", "swoosh"],
    "Adidas": ["adidas", "ADIDAS"],
    "Coca-Cola": ["coca-cola", "coca cola", "coke", "COCA-COLA"],
    "Emirates": ["emirates", "fly emirates", "EMIRATES"],
})

CONFIG = ResolverConfig(fuzzy_threshold=75)


def _make_candidate(text: str, confidence: float = 0.9) -> TextCandidate:
    dummy_frame = Frame(sequence=0, chunk_id=0, timestamp=1.0,
                        image=np.zeros((10, 10, 3), dtype=np.uint8))
    dummy_box = BoundingBox(x=0, y=0, w=10, h=10, confidence=0.8, label="text")
    return TextCandidate(frame=dummy_frame, bbox=dummy_box, text=text, ocr_confidence=confidence)


def test_exact_match():
    assert _exact_match("Nike", DICTIONARY) == "Nike"
    assert _exact_match("nike", DICTIONARY) == "Nike"
    assert _exact_match("COCA-COLA", DICTIONARY) == "Coca-Cola"
    assert _exact_match("fly emirates", DICTIONARY) == "Emirates"
    assert _exact_match("unknown brand", DICTIONARY) is None


def test_fuzzy_match():
    brand, score = _fuzzy_match("Nik3", DICTIONARY, threshold=75)
    assert brand == "Nike"
    assert score >= 75

    brand, score = _fuzzy_match("adldas", DICTIONARY, threshold=75)
    assert brand == "Adidas"

    brand, score = _fuzzy_match("xyzxyzxyz", DICTIONARY, threshold=75)
    assert brand is None


def test_resolve_exact():
    candidates = [_make_candidate("Nike"), _make_candidate("EMIRATES")]
    matched, unresolved = resolve_brands(candidates, DICTIONARY, CONFIG)
    assert len(matched) == 2
    assert len(unresolved) == 0
    assert matched[0].brand == "Nike"
    assert matched[1].brand == "Emirates"


def test_resolve_fuzzy():
    candidates = [_make_candidate("coca coIa")]  # OCR misread
    matched, unresolved = resolve_brands(candidates, DICTIONARY, CONFIG)
    assert len(matched) == 1
    assert matched[0].brand == "Coca-Cola"


def test_resolve_unresolved():
    candidates = [_make_candidate("random garbage text")]
    matched, unresolved = resolve_brands(candidates, DICTIONARY, CONFIG)
    assert len(matched) == 0
    assert len(unresolved) == 1


def test_resolve_mixed():
    candidates = [
        _make_candidate("Nike"),
        _make_candidate("unknown"),
        _make_candidate("adldas"),
    ]
    matched, unresolved = resolve_brands(candidates, DICTIONARY, CONFIG)
    assert len(matched) == 2  # Nike exact + Adidas fuzzy
    assert len(unresolved) == 1


def test_events_emitted(monkeypatch):
    events = []
    monkeypatch.setattr("detect.emit.push_detect_event",
                        lambda job_id, etype, data: events.append((etype, data)))

    candidates = [_make_candidate("Nike")]
    resolve_brands(candidates, DICTIONARY, CONFIG, job_id="test-job")

    event_types = [e[0] for e in events]
    assert "log" in event_types
    assert "detection" in event_types