def test_construct_composite_template():
    # Initialize images of child symbols.
    images: Dict[MathMl, Dict[FontSize, List[np.array]]] = defaultdict(dict)
    x_img = np.array([[0, 255, 0], [255, 0, 255], [0, 255, 0]], dtype=np.int)
    images["x"]["normal"] = [x_img]
    i_img = np.array([[0], [255], [0]], dtype=np.int)
    images["i"]["script"] = [i_img]

    # Image is white, except for...
    composite_symbol_img = np.zeros((3, 5), dtype=np.int)
    composite_symbol_img[:] = 255
    # 'x' appearing at [left=0, top=0, width=3, height=3]
    composite_symbol_img[0:3, 0:3] = x_img
    # 'i' appearing at [left=4, top=0, width=1, height=3]
    composite_symbol_img[0:3, 4:5] = i_img

    # List of children expected to be found in the composite symbol image.
    children = ["x", "i"]

    # Create the composite symbol template.
    composite_template = create_symbol_template(composite_symbol_img, images, children)
    assert composite_template.anchor == SymbolId("x", "normal")
    members = composite_template.members
    assert len(members) == 1
    assert members[0].symbol_id == SymbolId("i", "script")
    assert members[0].center.x == pytest.approx(3.0)
    assert members[0].center.y == pytest.approx(0.0)
def test_do_not_match_composite_template_if_component_missing():
    template = SymbolTemplate(
        anchor=SymbolId(mathml="x", level="normal"),
        members=[Component(SymbolId("i", "script"), center=p(3.0, 0.5))],
    )
    index = TokenIndex(
        tokens=[
            instance("x", "normal", 100, 100, 3, 3),
            # Missing symbol "i"
        ]
    )
    locations = list(find_symbols(template, index))
    assert len(locations) == 0
def test_do_not_match_composite_template_if_component_too_far_from_expected_position():
    template = SymbolTemplate(
        anchor=SymbolId(mathml="x", level="normal"),
        members=[Component(SymbolId("i", "script"), center=p(3.0, 0.5))],
    )
    index = TokenIndex(
        tokens=[
            instance("x", "normal", 100, 100, 3, 3),  # center @ 101.5, 101.5
            # 'i' is too many pixels away to be considered a match for the 'i' component.
            instance("i", "script", 107, 101, 1, 2),  # center @ 107.5, 102
        ]
    )
    locations = list(find_symbols(template, index))
    assert len(locations) == 0
def test_exact_match_composite_template():
    template = SymbolTemplate(
        anchor=SymbolId(mathml="x", level="normal"),
        members=[Component(SymbolId("i", "script"), center=p(3.0, 0.5))],
    )
    index = TokenIndex(
        tokens=[
            instance("x", "normal", 100, 100, 3, 3),  # center @ 101.5, 101.5
            instance("i", "script", 104, 101, 1, 2),  # center @ 104.5, 102
        ]
    )
    locations = list(find_symbols(template, index))
    assert len(locations) == 1
    assert locations[0] == Rectangle(100, 100, 5, 3)
def test_expect_blank_border_around_tokens():
    x_img = np.array([[0, 255, 0], [255, 0, 255], [0, 255, 0]], dtype=np.int)
    images: Dict[MathMl, Dict[FontSize, List[np.array]]] = defaultdict(dict)
    x_img = np.array([[0, 255, 0], [255, 0, 255], [0, 255, 0]], dtype=np.int)
    images["x"]["normal"] = [x_img]
    i_img = np.array([[0], [0]], dtype=np.int)
    images["i"]["script"] = [i_img]

    # Composite image is white, except for...
    composite_symbol_img = np.zeros((3, 6), dtype=np.int)
    composite_symbol_img[:] = 255
    # 'x' appearing at [left=0, top=0, width=3, height=3]
    composite_symbol_img[0:3, 0:3] = x_img
    # 'i' appearing at [left=4, top=1, width=1, height=2]
    composite_symbol_img[1:3, 4:5] = i_img
    # And a junk black pixel bordering the 'i' on the right.
    composite_symbol_img[1, 5] = 0

    # List of children expected to be found in the composite symbol image.
    # (Though note that the 'i' should not be detected).
    children = ["x", "i"]

    # Create the composite symbol template.
    composite_template = create_symbol_template(
        composite_symbol_img, images, children, require_blank_border_around_tokens=True
    )
    assert composite_template.anchor == SymbolId("x", "normal")
    members = composite_template.members
    assert len(members) == 0
def test_fuzzy_match_composite_template():
    template = SymbolTemplate(
        anchor=SymbolId(mathml="x", level="normal"),
        members=[Component(SymbolId("i", "script"), center=p(3.0, 0.5))],
    )
    index = TokenIndex(
        tokens=[
            instance("x", "normal", 100, 100, 3, 3),  # center @ 101.5, 101.5
            # 'i' is one pixel away from its expected position, which should be
            # just within the tolerated variance.
            instance("i", "script", 105, 101, 1, 2),  # center @ 105.5, 102
        ]
    )
    locations = list(find_symbols(template, index))
    assert len(locations) == 1
    assert locations[0] == Rectangle(100, 100, 6, 3)
def test_find_symbols_in_index():
    index = TokenIndex(
        tokens=[
            SymbolInstance(
                id_=SymbolId(mathml="x", level="normal"),
                location=Rectangle(0, 0, 4, 2),  # Center: 2, 1
            ),
            SymbolInstance(
                id_=SymbolId(mathml="x", level="normal"),
                location=Rectangle(10, 10, 4, 2),  # Center: 12, 11
            ),
        ]
    )

    # Find instances of 'x'.
    assert len(index.find(SymbolId("x", "normal"), p(2, 1), p(1, 1))) == 1
    assert len(index.find(SymbolId("x", "normal"), p(12, 11), p(1, 1))) == 1
    assert len(index.find(SymbolId("x", "normal"), p(5, 5), p(10, 10))) == 2

    # Fail to find when query keys do not match, or tolerance is too small.
    assert len(index.find(SymbolId("y", "normal"), p(2, 1), p(1, 1))) == 0
    assert len(index.find(SymbolId("x", "script"), p(2, 1), p(1, 1))) == 0
    assert len(index.find(SymbolId("x", "normal"), p(0, 0), p(1, 1))) == 0
def test_construct_composite_template_without_repeating_symbols():
    # An earlier version of the template creation function would duplicate components if a single
    # subsymbol appeared multiple times in the composite symbol. Check that there is no duplication.
    images: Dict[MathMl, Dict[FontSize, List[np.array]]] = defaultdict(dict)
    i_img = np.array([[0], [255], [0]], dtype=np.int)
    images["i"]["normal"] = [i_img]

    # Composite image is white, except for...
    composite_symbol_img = np.zeros((3, 3), dtype=np.int)
    composite_symbol_img[:] = 255
    # 'i' appearing at [left=0, top=0, width=1, height=2]
    composite_symbol_img[0:3, 0:1] = i_img
    # 'i' appearing at [left=2, top=0, width=1, height=2]
    composite_symbol_img[0:3, 2:3] = i_img

    # List of children expected to be found in the composite symbol image.
    children = ["i", "i"]

    # Create the composite symbol template.
    composite_template = create_symbol_template(composite_symbol_img, images, children)
    assert composite_template.anchor == SymbolId("i", "normal")
    members = composite_template.members
    assert len(members) == 1
def instance(
    mathml: str, level: FontSize, left: int, top: int, width: int, height: int
) -> SymbolInstance:
    return SymbolInstance(
        id_=SymbolId(mathml, level), location=Rectangle(left, top, width, height)
    )