def test_should_fill_multi_token_begining_of_line_with_begin_prefix(self): assert get_extended_line_token_tags( [None, None, B_TAG1, I_TAG1, I_TAG1, I_TAG1], extend_to_line_enabled_map={TAG1: True}, merge_enabled_map={TAG1: False}) == [ B_TAG1, I_TAG1, I_TAG1, I_TAG1, I_TAG1, I_TAG1 ]
def _merge_sub_tags( structured_document: AbstractStructuredDocument, tokens: List[Any], config: ReferenceAnnotatorConfig): sub_tags = [structured_document.get_sub_tag(token) for token in tokens] mapped_sub_tags = _map_tags(sub_tags, config.sub_tag_map) transformed_sub_tags = get_extended_line_token_tags( mapped_sub_tags, extend_to_line_enabled_map={}, merge_enabled_map={ key: True for key in config.merge_enabled_sub_tags }, default_merge_enabled=False, default_extend_to_line_enabled=False ) LOGGER.debug( 'sub tokens, transformed: %s -> %s -> %s (tokens: %s)', sub_tags, mapped_sub_tags, transformed_sub_tags, tokens ) for token, token_sub_tag in zip(tokens, transformed_sub_tags): if not token_sub_tag: continue structured_document.set_sub_tag(token, token_sub_tag) return structured_document
def test_should_fill_begining_of_line_if_not_enabled_by_tag_config_with_begin_prefix( self): assert get_extended_line_token_tags([None, B_TAG1, I_TAG1], extend_to_line_enabled_map={ TAG1: False }) == [None, B_TAG1, I_TAG1]
def test_should_not_fill_line_if_minority_tag(self): token_tags = [None, None, TAG1, None, None] assert get_extended_line_token_tags( token_tags, extend_to_line_enabled_map={TAG1: True}) == token_tags
def test_should_not_fill_gaps_if_not_same_tag(self): assert get_extended_line_token_tags([TAG1, None, TAG2], extend_to_line_enabled_map={ TAG1: True, TAG2: True }) == [TAG1, None, TAG2]
def test_should_not_fill_gaps_if_same_tag_with_begin_prefix_but_merge_disabled( self): assert get_extended_line_token_tags( [B_TAG1, None, B_TAG1], extend_to_line_enabled_map={TAG1: True}, merge_enabled_map={TAG1: False}) == [B_TAG1, None, B_TAG1]
def test_should_adjust_begin_inside_tag_prefix_if_merge_enabled(self): assert get_extended_line_token_tags( [B_TAG1, I_TAG1, B_TAG1], extend_to_line_enabled_map={TAG1: True}, merge_enabled_map={TAG1: True}) == [B_TAG1, I_TAG1, I_TAG1]
def test_should_fill_end_of_line_with_begin_prefix(self): assert get_extended_line_token_tags([B_TAG1, I_TAG1, None], extend_to_line_enabled_map={ TAG1: True }) == [B_TAG1, I_TAG1, I_TAG1]
def test_should_fill_end_of_line(self): assert get_extended_line_token_tags( [TAG1, TAG1, None], extend_to_line_enabled_map={TAG1: True}, ) == [TAG1] * 3