示例#1
0
def check_ipa_known_segs(mappings_to_check=False):
    dst = distance.Distance()
    if not mappings_to_check:
        mappings_to_check = [x['out_lang'] for x in MAPPINGS_AVAILABLE]
    found_error = False
    for mapping in [
            x for x in MAPPINGS_AVAILABLE if x['out_lang'] in mappings_to_check
    ]:
        if mapping['out_lang'].endswith('-ipa'):
            for rule in mapping['mapping_data']:
                joined_ipa_segs = ''.join(dst.fm.ipa_segs(rule['out']))
                if not joined_ipa_segs == rule['out']:
                    LOGGER.warning(
                        f"Output '{rule['out']}' in rule {rule} in mapping between {mapping['in_lang']} and {mapping['out_lang']} is not recognized as valid IPA by panphon. You may ignore this warning if you know it gets remapped to IPA later."
                    )
                    found_error = True
    if found_error:
        LOGGER.warning(
            "Please refer to https://github.com/dmort27/panphon for information about panphon."
        )
示例#2
0
def get_multitree(voc_fd, lang_id):
    """Get a multitree for the given language."""
    # Get a dict() where each key is a letter and each value
    # is a BK tree of the words that start with that letter
    dst = distance.Distance()
    ed = EditDistanceWrapper(lang_id, dst, phonemise)
    distractors = {}
    for line in voc_fd.readlines():
        (f, w) = line.strip("\n").split("\t")
        first_letter = w[0].lower()
        if first_letter not in distractors:
            distractors[first_letter] = []
        distractors[first_letter].append(w.lower())

    distractors_tree = {}
    for letter in distractors:
        distractors_tree[letter] = pybktree.BKTree(
            ed.edit_distance,
            distractors[letter],
        )

    return distractors_tree
示例#3
0
 def setUp(self):
     self.dist = distance.Distance(feature_model=feature_model)
示例#4
0
 def setUp(self):
     self.dist = distance.Distance(feature_model=feature_model)
     self.ft = panphon.FeatureTable()