def fill_gaps(input_path, output_path, manner, voice, feature1='place', feature2='manner'): print(input_path) with open(input_path, 'r', encoding='utf-8') as inp: gaps = json.load(inp) reference_segments = ql.get_manners(segments, [manner]) reference_segments = ql.get_voices(reference_segments, [voice]) result = {} for key in gaps: a, b, c = key[1:-1].split() for d in filter(lambda x: different(a, b, c, x), reference_segments): quadruple = a, b, c, d opps_list_voice = list(ql.oppositions(quadruple, feature1)) opps_list_place = list(ql.oppositions(quadruple, feature2)) if len(opps_list_voice) == 2 and len(opps_list_place) == 2: print(f'{key} -> {d}') result[key] = d break else: print(f'{key} cannot be filled.') result[key] = None with open(output_path, 'w', encoding='utf-8') as out: json.dump(result, out, indent=2, ensure_ascii=False)
def enumerate_triples_from_list(segments: List[str], manners: List[str]) -> Dict[str, List[str]]: filtered = ql.get_manners(segments, manners) result = [] for triple in combinations(filtered, 3): if len(list(ql.oppositions(triple, 'voice'))) == 1 and len( list(ql.oppositions(triple, 'place'))) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, filtered): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'voice')) == 2 and len( ql.oppositions(quadruple, 'place')) == 2: plug_found = True break if not plug_found: result.append(f'/{" ".join(triple)}/') return result
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str], voice: str, direction: str) -> Dict[str, List[str]]: """ @manners must have exactly two elements @direction can be either 'direct' or 'inverse' When @direction is 'direct', we take triples with two elements having the first manner. When @direction is 'inverse', we take triples with two elements having the second manner. """ if direction not in {'direct', 'inverse'}: raise ValueError(f'Wrong direction: {direction}') result = defaultdict(list) for gltc in data_frame.Glottocode.unique(): segments = ql.get_manners( ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]), manners) manner1, manner2 = manners for triple in combinations(segments, 3): manner1_count = 0 manner2_count = 0 for el in triple: parse = parse_consonant(el) if parse['manner'] == manner1: manner1_count += 1 elif parse['manner'] == manner2: manner2_count += 1 if direction == 'direct' and manner1_count != 2: continue elif direction == 'inverse' and manner2_count != 2: continue if len(ql.oppositions(triple, 'place')) == 1 and len( ql.oppositions(triple, 'manner')) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, segments): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'place')) == 2 and len( ql.oppositions(quadruple, 'manner')) == 2: plug_found = True break if not plug_found: result[f'/{" ".join(triple)}/'].append(gltc) return result
def enumerate_triples(data_frame: pd.DataFrame, manners: List[str]) -> Dict[str, List[str]]: result = defaultdict(list) for gltc in data_frame.Glottocode.unique(): table = data_frame.loc[data_frame.Glottocode == gltc] segments = list(table.Phoneme) stops = ql.get_manners(segments, manners) for triple in combinations(stops, 3): if len(list(ql.oppositions(triple, 'voice'))) == 1 and len( list(ql.oppositions(triple, 'place'))) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, stops): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'voice')) == 2 and len( ql.oppositions(quadruple, 'place')) == 2: plug_found = True break if not plug_found: result[f'/{" ".join(triple)}/'].append(gltc) return result
return False if ql.feature_difference(a, x) == {} or ql.feature_difference( b, x) == {} or ql.feature_difference(c, x) == {}: return False if places_congruent(a, x) or places_congruent(b, x) or places_congruent( c, x): return False return True if __name__ == '__main__': with open('../json/affricate_gaps.json', 'r', encoding='utf-8') as inp: gaps = json.load(inp) d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False) segments = list(d.Phoneme.unique()) reference_segments = ql.get_manners(segments, ['affricate']) result = {} for key in gaps: a, b, c = key[1:-1].split() for d in filter(lambda x: different(a, b, c, x), reference_segments): quadruple = a, b, c, d opps_list_voice = list(ql.oppositions(quadruple, 'voice')) opps_list_place = list(ql.oppositions(quadruple, 'place')) if len(opps_list_voice) == 2 and len(opps_list_place) == 2: print(f'{key} -> {d}') result[key] = d break else: print(f'{key} cannot be filled.') result[key] = None
np.random.seed(42) d = pd.read_csv('phoible_working_sample.csv') # Sample size print('Sample size: ', end='') print(len(set(d.Glottocode.unique()))) stops_fricatives = [] fricatives_affricates = [] # Statistics for different places of articulation places = defaultdict(list) for gltc in d.Glottocode.unique(): segments = ql.get_inventory(d, gltc) stops = ql.get_manners(segments, ['stop']) fricatives = ql.get_manners(segments, ['fricative']) affricates = ql.get_manners(segments, ['affricate']) stops_places = ql.count_places(stops) fricatives_places = ql.count_places(fricatives) affricates_places = ql.count_places(affricates) places['stops'].append(stops_places) places['fricatives'].append(fricatives_places) places['affricates'].append(affricates_places) stops_fricatives.append(stops_places - fricatives_places) fricatives_affricates.append(fricatives_places - affricates_places) print('Stops:') print(scipy.stats.describe(places['stops'])) print('Fricatives:') print(scipy.stats.describe(places['fricatives'])) print('Affricates:')
if x in (a, b, c): return False if ql.feature_difference(a, x) == {} or ql.feature_difference( b, x) == {} or ql.feature_difference(c, x) == {}: return False if alveolar_dental(a, x) or alveolar_dental(b, x) or alveolar_dental(c, x): return False return True if __name__ == '__main__': with open('fricative_gaps.json', 'r', encoding='utf-8') as inp: gaps = json.load(inp) d = pd.read_csv('phoible_working_sample.csv', low_memory=False) segments = list(d.Phoneme.unique()) reference_segments = ql.get_manners(segments, ['fricative']) result = {} for key in gaps: a, b, c = key[1:-1].split() for d in filter(lambda x: different(a, b, c, x), reference_segments): quadruple = a, b, c, d opps_list_voice = list(ql.oppositions(quadruple, 'voice')) opps_list_place = list(ql.oppositions(quadruple, 'place')) if len(opps_list_voice) == 2 and len(opps_list_place) == 2: print(f'{key} -> {d}') result[key] = d break else: print(f'{key} cannot be filled.') result[key] = None