def make_de_definitions_parser(): split_params = LineSplitParams() split_params.line_breaks = {'\n', '.', ';', '!', '?'} split_params.abbreviations = DeLanguageTokens.abbreviations split_params.abbr_ignore_case = True functions = [CommonDefinitionPatterns.match_es_def_by_semicolon, CommonDefinitionPatterns.match_acronyms, DeutscheParsingMethods.match_ist_jeder, DeutscheParsingMethods.match_im_sinne] p = UniversalDefinitionsParser(functions, split_params) p.prohibited_words = {w for w in DeLanguageTokens.articles + DeLanguageTokens.conjunctions} return p
def make_es_definitions_parser(): split_params = LineSplitParams() split_params.line_breaks = {'\n', '.', ';', '!', '?'} split_params.abbreviations = EsLanguageTokens.abbreviations split_params.abbr_ignore_case = True functions = [CommonDefinitionPatterns.match_es_def_by_semicolon, CommonDefinitionPatterns.match_acronyms, SpanishParsingMethods.match_es_def_by_hereafter, SpanishParsingMethods.match_es_def_by_reffered, SpanishParsingMethods.match_first_word_is] return UniversalDefinitionsParser(functions, split_params)