Exemplo n.º 1
0
 def test_should_fill_multi_token_begining_of_line_with_begin_prefix(self):
     assert get_extended_line_token_tags(
         [None, None, B_TAG1, I_TAG1, I_TAG1, I_TAG1],
         extend_to_line_enabled_map={TAG1: True},
         merge_enabled_map={TAG1: False}) == [
             B_TAG1, I_TAG1, I_TAG1, I_TAG1, I_TAG1, I_TAG1
         ]
def _merge_sub_tags(
        structured_document: AbstractStructuredDocument,
        tokens: List[Any],
        config: ReferenceAnnotatorConfig):
    sub_tags = [structured_document.get_sub_tag(token) for token in tokens]
    mapped_sub_tags = _map_tags(sub_tags, config.sub_tag_map)
    transformed_sub_tags = get_extended_line_token_tags(
        mapped_sub_tags,
        extend_to_line_enabled_map={},
        merge_enabled_map={
            key: True
            for key in config.merge_enabled_sub_tags
        },
        default_merge_enabled=False,
        default_extend_to_line_enabled=False
    )
    LOGGER.debug(
        'sub tokens, transformed: %s -> %s -> %s (tokens: %s)',
        sub_tags, mapped_sub_tags, transformed_sub_tags, tokens
    )
    for token, token_sub_tag in zip(tokens, transformed_sub_tags):
        if not token_sub_tag:
            continue
        structured_document.set_sub_tag(token, token_sub_tag)
    return structured_document
Exemplo n.º 3
0
 def test_should_fill_begining_of_line_if_not_enabled_by_tag_config_with_begin_prefix(
         self):
     assert get_extended_line_token_tags([None, B_TAG1, I_TAG1],
                                         extend_to_line_enabled_map={
                                             TAG1: False
                                         }) == [None, B_TAG1, I_TAG1]
Exemplo n.º 4
0
 def test_should_not_fill_line_if_minority_tag(self):
     token_tags = [None, None, TAG1, None, None]
     assert get_extended_line_token_tags(
         token_tags, extend_to_line_enabled_map={TAG1: True}) == token_tags
Exemplo n.º 5
0
 def test_should_not_fill_gaps_if_not_same_tag(self):
     assert get_extended_line_token_tags([TAG1, None, TAG2],
                                         extend_to_line_enabled_map={
                                             TAG1: True,
                                             TAG2: True
                                         }) == [TAG1, None, TAG2]
Exemplo n.º 6
0
 def test_should_not_fill_gaps_if_same_tag_with_begin_prefix_but_merge_disabled(
         self):
     assert get_extended_line_token_tags(
         [B_TAG1, None, B_TAG1],
         extend_to_line_enabled_map={TAG1: True},
         merge_enabled_map={TAG1: False}) == [B_TAG1, None, B_TAG1]
Exemplo n.º 7
0
 def test_should_adjust_begin_inside_tag_prefix_if_merge_enabled(self):
     assert get_extended_line_token_tags(
         [B_TAG1, I_TAG1, B_TAG1],
         extend_to_line_enabled_map={TAG1: True},
         merge_enabled_map={TAG1: True}) == [B_TAG1, I_TAG1, I_TAG1]
Exemplo n.º 8
0
 def test_should_fill_end_of_line_with_begin_prefix(self):
     assert get_extended_line_token_tags([B_TAG1, I_TAG1, None],
                                         extend_to_line_enabled_map={
                                             TAG1: True
                                         }) == [B_TAG1, I_TAG1, I_TAG1]
Exemplo n.º 9
0
 def test_should_fill_end_of_line(self):
     assert get_extended_line_token_tags(
         [TAG1, TAG1, None],
         extend_to_line_enabled_map={TAG1: True},
     ) == [TAG1] * 3