def fill_gaps(input_path,
              output_path,
              manner,
              voice,
              feature1='place',
              feature2='manner'):
    print(input_path)
    with open(input_path, 'r', encoding='utf-8') as inp:
        gaps = json.load(inp)
    reference_segments = ql.get_manners(segments, [manner])
    reference_segments = ql.get_voices(reference_segments, [voice])

    result = {}
    for key in gaps:
        a, b, c = key[1:-1].split()
        for d in filter(lambda x: different(a, b, c, x), reference_segments):
            quadruple = a, b, c, d
            opps_list_voice = list(ql.oppositions(quadruple, feature1))
            opps_list_place = list(ql.oppositions(quadruple, feature2))
            if len(opps_list_voice) == 2 and len(opps_list_place) == 2:
                print(f'{key} -> {d}')
                result[key] = d
                break
        else:
            print(f'{key} cannot be filled.')
            result[key] = None
    with open(output_path, 'w', encoding='utf-8') as out:
        json.dump(result, out, indent=2, ensure_ascii=False)
def enumerate_triples_from_list(segments: List[str],
                                manners: List[str]) -> Dict[str, List[str]]:
    filtered = ql.get_manners(segments, manners)
    result = []
    for triple in combinations(filtered, 3):
        if len(list(ql.oppositions(triple, 'voice'))) == 1 and len(
                list(ql.oppositions(triple, 'place'))) == 1:
            a, b, c = triple
            plug_found = False
            for d in filter(lambda x: x not in triple, filtered):
                quadruple = a, b, c, d
                if len(ql.oppositions(quadruple, 'voice')) == 2 and len(
                        ql.oppositions(quadruple, 'place')) == 2:
                    plug_found = True
                    break
            if not plug_found:
                result.append(f'/{" ".join(triple)}/')
    return result
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str],
                      voice: str, direction: str) -> Dict[str, List[str]]:
    """
    @manners must have exactly two elements
    @direction can be either 'direct' or 'inverse'
    When @direction is 'direct', we take triples with two elements having the first manner.
    When @direction is 'inverse', we take triples with two elements having the second manner.
    """
    if direction not in {'direct', 'inverse'}:
        raise ValueError(f'Wrong direction: {direction}')
    result = defaultdict(list)
    for gltc in data_frame.Glottocode.unique():
        segments = ql.get_manners(
            ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]),
            manners)
        manner1, manner2 = manners
        for triple in combinations(segments, 3):
            manner1_count = 0
            manner2_count = 0
            for el in triple:
                parse = parse_consonant(el)
                if parse['manner'] == manner1:
                    manner1_count += 1
                elif parse['manner'] == manner2:
                    manner2_count += 1
            if direction == 'direct' and manner1_count != 2:
                continue
            elif direction == 'inverse' and manner2_count != 2:
                continue
            if len(ql.oppositions(triple, 'place')) == 1 and len(
                    ql.oppositions(triple, 'manner')) == 1:
                a, b, c = triple
                plug_found = False
                for d in filter(lambda x: x not in triple, segments):
                    quadruple = a, b, c, d
                    if len(ql.oppositions(quadruple, 'place')) == 2 and len(
                            ql.oppositions(quadruple, 'manner')) == 2:
                        plug_found = True
                        break
                if not plug_found:
                    result[f'/{" ".join(triple)}/'].append(gltc)
    return result
示例#4
0
def enumerate_triples(data_frame: pd.DataFrame,
                      manners: List[str]) -> Dict[str, List[str]]:
    result = defaultdict(list)
    for gltc in data_frame.Glottocode.unique():
        table = data_frame.loc[data_frame.Glottocode == gltc]
        segments = list(table.Phoneme)
        stops = ql.get_manners(segments, manners)
        for triple in combinations(stops, 3):
            if len(list(ql.oppositions(triple, 'voice'))) == 1 and len(
                    list(ql.oppositions(triple, 'place'))) == 1:
                a, b, c = triple
                plug_found = False
                for d in filter(lambda x: x not in triple, stops):
                    quadruple = a, b, c, d
                    if len(ql.oppositions(quadruple, 'voice')) == 2 and len(
                            ql.oppositions(quadruple, 'place')) == 2:
                        plug_found = True
                        break
                if not plug_found:
                    result[f'/{" ".join(triple)}/'].append(gltc)
    return result
        return False
    if ql.feature_difference(a, x) == {} or ql.feature_difference(
            b, x) == {} or ql.feature_difference(c, x) == {}:
        return False
    if places_congruent(a, x) or places_congruent(b, x) or places_congruent(
            c, x):
        return False
    return True


if __name__ == '__main__':
    with open('../json/affricate_gaps.json', 'r', encoding='utf-8') as inp:
        gaps = json.load(inp)
    d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False)
    segments = list(d.Phoneme.unique())
    reference_segments = ql.get_manners(segments, ['affricate'])

    result = {}
    for key in gaps:
        a, b, c = key[1:-1].split()
        for d in filter(lambda x: different(a, b, c, x), reference_segments):
            quadruple = a, b, c, d
            opps_list_voice = list(ql.oppositions(quadruple, 'voice'))
            opps_list_place = list(ql.oppositions(quadruple, 'place'))
            if len(opps_list_voice) == 2 and len(opps_list_place) == 2:
                print(f'{key} -> {d}')
                result[key] = d
                break
        else:
            print(f'{key} cannot be filled.')
            result[key] = None
np.random.seed(42)
d = pd.read_csv('phoible_working_sample.csv')

# Sample size
print('Sample size: ', end='')
print(len(set(d.Glottocode.unique())))

stops_fricatives = []
fricatives_affricates = []

# Statistics for different places of articulation
places = defaultdict(list)
for gltc in d.Glottocode.unique():
    segments = ql.get_inventory(d, gltc)
    stops = ql.get_manners(segments, ['stop'])
    fricatives = ql.get_manners(segments, ['fricative'])
    affricates = ql.get_manners(segments, ['affricate'])
    stops_places = ql.count_places(stops)
    fricatives_places = ql.count_places(fricatives)
    affricates_places = ql.count_places(affricates)
    places['stops'].append(stops_places)
    places['fricatives'].append(fricatives_places)
    places['affricates'].append(affricates_places)
    stops_fricatives.append(stops_places - fricatives_places)
    fricatives_affricates.append(fricatives_places - affricates_places)
print('Stops:')
print(scipy.stats.describe(places['stops']))
print('Fricatives:')
print(scipy.stats.describe(places['fricatives']))
print('Affricates:')
示例#7
0
    if x in (a, b, c):
        return False
    if ql.feature_difference(a, x) == {} or ql.feature_difference(
            b, x) == {} or ql.feature_difference(c, x) == {}:
        return False
    if alveolar_dental(a, x) or alveolar_dental(b, x) or alveolar_dental(c, x):
        return False
    return True


if __name__ == '__main__':
    with open('fricative_gaps.json', 'r', encoding='utf-8') as inp:
        gaps = json.load(inp)
    d = pd.read_csv('phoible_working_sample.csv', low_memory=False)
    segments = list(d.Phoneme.unique())
    reference_segments = ql.get_manners(segments, ['fricative'])

    result = {}
    for key in gaps:
        a, b, c = key[1:-1].split()
        for d in filter(lambda x: different(a, b, c, x), reference_segments):
            quadruple = a, b, c, d
            opps_list_voice = list(ql.oppositions(quadruple, 'voice'))
            opps_list_place = list(ql.oppositions(quadruple, 'place'))
            if len(opps_list_voice) == 2 and len(opps_list_place) == 2:
                print(f'{key} -> {d}')
                result[key] = d
                break
        else:
            print(f'{key} cannot be filled.')
            result[key] = None