def test_load_abbs(self): with self.assertRaises(IncorrectFileType): utils.load_abbreviations_from_file(os.path.join( PUBLIC_DIR, 'mappings', 'abbreviations.json')) abbs = utils.load_abbreviations_from_file( os.path.join(PUBLIC_DIR, 'mappings', 'abbreviations.csv')) self.assertTrue("VOWEL" in abbs) self.assertEqual(abbs['VOWEL'], ['a', 'e', 'i', 'o', 'u'])
def __init__(self, mapping=None, abbreviations: Union[str, DefaultDict[str, List[str]]] = False, **kwargs): # should these just be explicit instead of kwargs... # yes, they should self.allowable_kwargs = [ 'language_name', 'display_name', 'mapping', 'in_lang', 'out_lang', 'out_delimiter', 'as_is', 'case_sensitive', 'rule_ordering', 'escape_special', 'norm_form', 'prevent_feeding', 'reverse' ] self.kwargs = OrderedDict(kwargs) self.processed = False if isinstance(abbreviations, defaultdict) or not abbreviations: self.abbreviations = abbreviations elif abbreviations: self.abbreviations = load_abbreviations_from_file(abbreviations) # Handle user-supplied list if isinstance(mapping, list): self.mapping = validate(mapping) elif isinstance(mapping, str) and (mapping.endswith('yaml') or mapping.endswith('yml')): loaded_config = load_mapping_from_path(mapping) self.process_loaded_config(loaded_config) elif isinstance(mapping, str): self.mapping = validate(load_from_file(mapping)) else: if "in_lang" in self.kwargs and "out_lang" in self.kwargs: loaded_config = find_mapping(self.kwargs['in_lang'], self.kwargs['out_lang']) self.process_loaded_config(loaded_config) elif 'id' in self.kwargs: loaded_config = self.find_mapping_by_id(self.kwargs['id']) self.process_loaded_config(loaded_config) else: raise exceptions.MalformedLookup() if self.abbreviations: for abb, stands_for in self.abbreviations.items(): abb_match = re.compile(abb) abb_repl = '|'.join(stands_for) if self.mapping and 'match_pattern' not in self.mapping[0]: for io in self.mapping: for key in io.keys(): if key in [ 'in', 'out', 'context_before', 'context_after' ] and re.search(abb_match, io[key]): io[key] = re.sub(abb_match, unicode_escape(abb_repl), io[key]) if not self.processed: self.mapping = self.process_kwargs(self.mapping)
def __init__( self, mapping=None, abbreviations: Union[str, DefaultDict[str, List[str]]] = False, **kwargs, ): # should these just be explicit instead of kwargs... # yes, they should self.allowable_kwargs = [ "language_name", "display_name", "mapping", "in_lang", "out_lang", "out_delimiter", "as_is", "case_sensitive", "rule_ordering", "escape_special", "norm_form", "prevent_feeding", "reverse", "type", ] self.kwargs = OrderedDict(kwargs) self.processed = False if isinstance(abbreviations, defaultdict) or not abbreviations: self.abbreviations = abbreviations else: self.abbreviations = load_abbreviations_from_file(abbreviations) # Handle user-supplied list if isinstance(mapping, list): self.mapping = validate(mapping, path="user-supplied mapping") elif isinstance(mapping, str) and (mapping.endswith("yaml") or mapping.endswith("yml")): loaded_config = load_mapping_from_path(mapping) self.process_loaded_config(loaded_config) elif isinstance(mapping, str): self.mapping = validate(load_from_file(mapping), path=mapping) else: if "in_lang" in self.kwargs and "out_lang" in self.kwargs: loaded_config = find_mapping(self.kwargs["in_lang"], self.kwargs["out_lang"]) self.process_loaded_config(loaded_config) elif "id" in self.kwargs: loaded_config = self.find_mapping_by_id(self.kwargs["id"]) self.process_loaded_config(loaded_config) elif self.kwargs.get("type", "") == "unidecode": self.mapping = [] else: raise exceptions.MalformedLookup() if self.abbreviations: for abb, stands_for in sorted(self.abbreviations.items(), key=lambda x: len(x[0]), reverse=True): abb_match = re.compile(abb) abb_repl = "|".join(stands_for) if self.mapping and "match_pattern" not in self.mapping[0]: for io in self.mapping: for key in io.keys(): if key in [ "in", "out", "context_before", "context_after", ] and re.search(abb_match, io[key]): io[key] = re.sub(abb_match, unicode_escape(abb_repl), io[key]) if not self.processed: self.mapping = self.process_kwargs(self.mapping)