def fill_gaps(input_path,
              output_path,
              manner,
              voice,
              feature1='place',
              feature2='manner'):
    print(input_path)
    with open(input_path, 'r', encoding='utf-8') as inp:
        gaps = json.load(inp)
    reference_segments = ql.get_manners(segments, [manner])
    reference_segments = ql.get_voices(reference_segments, [voice])

    result = {}
    for key in gaps:
        a, b, c = key[1:-1].split()
        for d in filter(lambda x: different(a, b, c, x), reference_segments):
            quadruple = a, b, c, d
            opps_list_voice = list(ql.oppositions(quadruple, feature1))
            opps_list_place = list(ql.oppositions(quadruple, feature2))
            if len(opps_list_voice) == 2 and len(opps_list_place) == 2:
                print(f'{key} -> {d}')
                result[key] = d
                break
        else:
            print(f'{key} cannot be filled.')
            result[key] = None
    with open(output_path, 'w', encoding='utf-8') as out:
        json.dump(result, out, indent=2, ensure_ascii=False)
def enumerate_triples_from_list(segments: List[str],
                                manners: List[str]) -> Dict[str, List[str]]:
    filtered = ql.get_manners(segments, manners)
    result = []
    for triple in combinations(filtered, 3):
        if len(list(ql.oppositions(triple, 'voice'))) == 1 and len(
                list(ql.oppositions(triple, 'place'))) == 1:
            a, b, c = triple
            plug_found = False
            for d in filter(lambda x: x not in triple, filtered):
                quadruple = a, b, c, d
                if len(ql.oppositions(quadruple, 'voice')) == 2 and len(
                        ql.oppositions(quadruple, 'place')) == 2:
                    plug_found = True
                    break
            if not plug_found:
                result.append(f'/{" ".join(triple)}/')
    return result
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str],
                      voice: str, direction: str) -> Dict[str, List[str]]:
    """
    @manners must have exactly two elements
    @direction can be either 'direct' or 'inverse'
    When @direction is 'direct', we take triples with two elements having the first manner.
    When @direction is 'inverse', we take triples with two elements having the second manner.
    """
    if direction not in {'direct', 'inverse'}:
        raise ValueError(f'Wrong direction: {direction}')
    result = defaultdict(list)
    for gltc in data_frame.Glottocode.unique():
        segments = ql.get_manners(
            ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]),
            manners)
        manner1, manner2 = manners
        for triple in combinations(segments, 3):
            manner1_count = 0
            manner2_count = 0
            for el in triple:
                parse = parse_consonant(el)
                if parse['manner'] == manner1:
                    manner1_count += 1
                elif parse['manner'] == manner2:
                    manner2_count += 1
            if direction == 'direct' and manner1_count != 2:
                continue
            elif direction == 'inverse' and manner2_count != 2:
                continue
            if len(ql.oppositions(triple, 'place')) == 1 and len(
                    ql.oppositions(triple, 'manner')) == 1:
                a, b, c = triple
                plug_found = False
                for d in filter(lambda x: x not in triple, segments):
                    quadruple = a, b, c, d
                    if len(ql.oppositions(quadruple, 'place')) == 2 and len(
                            ql.oppositions(quadruple, 'manner')) == 2:
                        plug_found = True
                        break
                if not plug_found:
                    result[f'/{" ".join(triple)}/'].append(gltc)
    return result
def check_gaps(input_path, feature1='place', feature2='manner'):
    with open(
        input_path, 
        'r', 
        encoding='utf-8'
    ) as inp:
        gap_fillers = json.load(inp)

    for k, x in gap_fillers.items():
        if x is None:
            continue
        a, b, c = k[1:-1].split()
        quadruple = a, b, c, x
        # print(quadruple)
        if not len(ql.oppositions(quadruple, feature1)) == 2 and \
            len(ql.oppositions(quadruple, feature2)) == 2:
            print(quadruple)
            print('Error!')
            sys.exit(1)
    print('Passed.')
Exemplo n.º 5
0
def enumerate_triples(data_frame: pd.DataFrame,
                      manners: List[str]) -> Dict[str, List[str]]:
    result = defaultdict(list)
    for gltc in data_frame.Glottocode.unique():
        table = data_frame.loc[data_frame.Glottocode == gltc]
        segments = list(table.Phoneme)
        stops = ql.get_manners(segments, manners)
        for triple in combinations(stops, 3):
            if len(list(ql.oppositions(triple, 'voice'))) == 1 and len(
                    list(ql.oppositions(triple, 'place'))) == 1:
                a, b, c = triple
                plug_found = False
                for d in filter(lambda x: x not in triple, stops):
                    quadruple = a, b, c, d
                    if len(ql.oppositions(quadruple, 'voice')) == 2 and len(
                            ql.oppositions(quadruple, 'place')) == 2:
                        plug_found = True
                        break
                if not plug_found:
                    result[f'/{" ".join(triple)}/'].append(gltc)
    return result
        return False
    if places_congruent(a, x) or places_congruent(b, x) or places_congruent(
            c, x):
        return False
    return True


if __name__ == '__main__':
    with open('../json/affricate_gaps.json', 'r', encoding='utf-8') as inp:
        gaps = json.load(inp)
    d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False)
    segments = list(d.Phoneme.unique())
    reference_segments = ql.get_manners(segments, ['affricate'])

    result = {}
    for key in gaps:
        a, b, c = key[1:-1].split()
        for d in filter(lambda x: different(a, b, c, x), reference_segments):
            quadruple = a, b, c, d
            opps_list_voice = list(ql.oppositions(quadruple, 'voice'))
            opps_list_place = list(ql.oppositions(quadruple, 'place'))
            if len(opps_list_voice) == 2 and len(opps_list_place) == 2:
                print(f'{key} -> {d}')
                result[key] = d
                break
        else:
            print(f'{key} cannot be filled.')
            result[key] = None
    with open('../json/affricate_gaps_fillers.json', 'w',
              encoding='utf-8') as out:
        json.dump(result, out, indent=2, ensure_ascii=False)
Exemplo n.º 7
0
import json
import sys
import query_lib as ql

with open('affricate_gaps_fillers.json', 'r', encoding='utf-8') as inp:
    gap_fillers = json.load(inp)

for k, x in gap_fillers.items():
    if x is None:
        continue
    a, b, c = k[1:-1].split()
    quadruple = a, b, c, x
    print(quadruple)
    if not len(ql.oppositions(quadruple, 'voice')) == 2 and len(
            ql.oppositions(quadruple, 'place')) == 2:
        sys.exit(1)