def check_ipa_known_segs(mappings_to_check=False): dst = distance.Distance() if not mappings_to_check: mappings_to_check = [x['out_lang'] for x in MAPPINGS_AVAILABLE] found_error = False for mapping in [ x for x in MAPPINGS_AVAILABLE if x['out_lang'] in mappings_to_check ]: if mapping['out_lang'].endswith('-ipa'): for rule in mapping['mapping_data']: joined_ipa_segs = ''.join(dst.fm.ipa_segs(rule['out'])) if not joined_ipa_segs == rule['out']: LOGGER.warning( f"Output '{rule['out']}' in rule {rule} in mapping between {mapping['in_lang']} and {mapping['out_lang']} is not recognized as valid IPA by panphon. You may ignore this warning if you know it gets remapped to IPA later." ) found_error = True if found_error: LOGGER.warning( "Please refer to https://github.com/dmort27/panphon for information about panphon." )
def get_multitree(voc_fd, lang_id): """Get a multitree for the given language.""" # Get a dict() where each key is a letter and each value # is a BK tree of the words that start with that letter dst = distance.Distance() ed = EditDistanceWrapper(lang_id, dst, phonemise) distractors = {} for line in voc_fd.readlines(): (f, w) = line.strip("\n").split("\t") first_letter = w[0].lower() if first_letter not in distractors: distractors[first_letter] = [] distractors[first_letter].append(w.lower()) distractors_tree = {} for letter in distractors: distractors_tree[letter] = pybktree.BKTree( ed.edit_distance, distractors[letter], ) return distractors_tree
def setUp(self): self.dist = distance.Distance(feature_model=feature_model)
def setUp(self): self.dist = distance.Distance(feature_model=feature_model) self.ft = panphon.FeatureTable()