示例#1
0
 def test_load_abbs(self):
     with self.assertRaises(IncorrectFileType):
         utils.load_abbreviations_from_file(os.path.join(
             PUBLIC_DIR, 'mappings', 'abbreviations.json'))
     abbs = utils.load_abbreviations_from_file(
         os.path.join(PUBLIC_DIR, 'mappings', 'abbreviations.csv'))
     self.assertTrue("VOWEL" in abbs)
     self.assertEqual(abbs['VOWEL'], ['a', 'e', 'i', 'o', 'u'])
示例#2
0
 def __init__(self,
              mapping=None,
              abbreviations: Union[str, DefaultDict[str,
                                                    List[str]]] = False,
              **kwargs):
     # should these just be explicit instead of kwargs...
     # yes, they should
     self.allowable_kwargs = [
         'language_name', 'display_name', 'mapping', 'in_lang', 'out_lang',
         'out_delimiter', 'as_is', 'case_sensitive', 'rule_ordering',
         'escape_special', 'norm_form', 'prevent_feeding', 'reverse'
     ]
     self.kwargs = OrderedDict(kwargs)
     self.processed = False
     if isinstance(abbreviations, defaultdict) or not abbreviations:
         self.abbreviations = abbreviations
     elif abbreviations:
         self.abbreviations = load_abbreviations_from_file(abbreviations)
     # Handle user-supplied list
     if isinstance(mapping, list):
         self.mapping = validate(mapping)
     elif isinstance(mapping, str) and (mapping.endswith('yaml')
                                        or mapping.endswith('yml')):
         loaded_config = load_mapping_from_path(mapping)
         self.process_loaded_config(loaded_config)
     elif isinstance(mapping, str):
         self.mapping = validate(load_from_file(mapping))
     else:
         if "in_lang" in self.kwargs and "out_lang" in self.kwargs:
             loaded_config = find_mapping(self.kwargs['in_lang'],
                                          self.kwargs['out_lang'])
             self.process_loaded_config(loaded_config)
         elif 'id' in self.kwargs:
             loaded_config = self.find_mapping_by_id(self.kwargs['id'])
             self.process_loaded_config(loaded_config)
         else:
             raise exceptions.MalformedLookup()
     if self.abbreviations:
         for abb, stands_for in self.abbreviations.items():
             abb_match = re.compile(abb)
             abb_repl = '|'.join(stands_for)
             if self.mapping and 'match_pattern' not in self.mapping[0]:
                 for io in self.mapping:
                     for key in io.keys():
                         if key in [
                                 'in', 'out', 'context_before',
                                 'context_after'
                         ] and re.search(abb_match, io[key]):
                             io[key] = re.sub(abb_match,
                                              unicode_escape(abb_repl),
                                              io[key])
     if not self.processed:
         self.mapping = self.process_kwargs(self.mapping)
示例#3
0
文件: __init__.py 项目: deltork/g2p
 def __init__(
     self,
     mapping=None,
     abbreviations: Union[str, DefaultDict[str, List[str]]] = False,
     **kwargs,
 ):
     # should these just be explicit instead of kwargs...
     # yes, they should
     self.allowable_kwargs = [
         "language_name",
         "display_name",
         "mapping",
         "in_lang",
         "out_lang",
         "out_delimiter",
         "as_is",
         "case_sensitive",
         "rule_ordering",
         "escape_special",
         "norm_form",
         "prevent_feeding",
         "reverse",
         "type",
     ]
     self.kwargs = OrderedDict(kwargs)
     self.processed = False
     if isinstance(abbreviations, defaultdict) or not abbreviations:
         self.abbreviations = abbreviations
     else:
         self.abbreviations = load_abbreviations_from_file(abbreviations)
     # Handle user-supplied list
     if isinstance(mapping, list):
         self.mapping = validate(mapping, path="user-supplied mapping")
     elif isinstance(mapping, str) and (mapping.endswith("yaml")
                                        or mapping.endswith("yml")):
         loaded_config = load_mapping_from_path(mapping)
         self.process_loaded_config(loaded_config)
     elif isinstance(mapping, str):
         self.mapping = validate(load_from_file(mapping), path=mapping)
     else:
         if "in_lang" in self.kwargs and "out_lang" in self.kwargs:
             loaded_config = find_mapping(self.kwargs["in_lang"],
                                          self.kwargs["out_lang"])
             self.process_loaded_config(loaded_config)
         elif "id" in self.kwargs:
             loaded_config = self.find_mapping_by_id(self.kwargs["id"])
             self.process_loaded_config(loaded_config)
         elif self.kwargs.get("type", "") == "unidecode":
             self.mapping = []
         else:
             raise exceptions.MalformedLookup()
     if self.abbreviations:
         for abb, stands_for in sorted(self.abbreviations.items(),
                                       key=lambda x: len(x[0]),
                                       reverse=True):
             abb_match = re.compile(abb)
             abb_repl = "|".join(stands_for)
             if self.mapping and "match_pattern" not in self.mapping[0]:
                 for io in self.mapping:
                     for key in io.keys():
                         if key in [
                                 "in",
                                 "out",
                                 "context_before",
                                 "context_after",
                         ] and re.search(abb_match, io[key]):
                             io[key] = re.sub(abb_match,
                                              unicode_escape(abb_repl),
                                              io[key])
     if not self.processed:
         self.mapping = self.process_kwargs(self.mapping)