def oppositions(consonants, feature, others_same=True): """ Returns pairs of consonants opposed by the value of a feature while values of all other features are kept fixed (default) or a free to vary. """ results = {} for c1, c2 in combinations(consonants, 2): parse1 = parse_consonant(c1) parse2 = parse_consonant(c2) if feature not in parse1 or feature not in parse2: continue if parse1[feature] != parse2[feature]: if others_same: other_are_same = True for k in parse1: if k == feature or k == 'glyph': continue elif k in {'additional articulations', 'pre-features'}: if sorted(parse1[k]) != sorted(parse2[k]): other_are_same = False break else: if not congruent(parse1[k], parse2[k]): other_are_same = False break if other_are_same: results[(c1, c2)] = (parse1[feature], parse2[feature]) else: results[(c1, c2)] = (parse1[feature], parse2[feature]) return results
def all_segments_parsable(inventory): for p in inventory: try: parse_consonant(p) except: return False return True
def feature_difference(p1, p2): parse1 = parse_consonant(p1) parse2 = parse_consonant(p2) parse_differences = {} for k in parse1: if k == 'glyph': continue if parse1[k] != parse2[k]: parse_differences[k] = {'p1': parse1[k], 'p2': parse2[k]} return parse_differences
def get_html_for_consonants(cons_list): unparsed = [] parsed = {} for c in cons_list: try: parsed[c] = parse_consonant(c) except ValueError: unparsed.append(c) with StringIO() as html_stream: get_tables_consonants(parsed, html_stream) dump_unparsed(unparsed, html_stream) return html_stream.getvalue()
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str], voice: str, direction: str) -> Dict[str, List[str]]: """ @manners must have exactly two elements @direction can be either 'direct' or 'inverse' When @direction is 'direct', we take triples with two elements having the first manner. When @direction is 'inverse', we take triples with two elements having the second manner. """ if direction not in {'direct', 'inverse'}: raise ValueError(f'Wrong direction: {direction}') result = defaultdict(list) for gltc in data_frame.Glottocode.unique(): segments = ql.get_manners( ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]), manners) manner1, manner2 = manners for triple in combinations(segments, 3): manner1_count = 0 manner2_count = 0 for el in triple: parse = parse_consonant(el) if parse['manner'] == manner1: manner1_count += 1 elif parse['manner'] == manner2: manner2_count += 1 if direction == 'direct' and manner1_count != 2: continue elif direction == 'inverse' and manner2_count != 2: continue if len(ql.oppositions(triple, 'place')) == 1 and len( ql.oppositions(triple, 'manner')) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, segments): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'place')) == 2 and len( ql.oppositions(quadruple, 'manner')) == 2: plug_found = True break if not plug_found: result[f'/{" ".join(triple)}/'].append(gltc) return result
def get_voices(inventory, voices): return sorted( filter(lambda x: parse_consonant(x).get('voice', None) in voices, inventory))
def get_manners(inventory, manners): return sorted( filter(lambda x: parse_consonant(x).get('manner', None) in manners, inventory))
def count_places(inventory): result = set() for segment in inventory: result.add(parse_consonant(segment).get('place', 'na')) return len(result)
from collections import Counter import pandas as pd import query_lib as ql from IPAParser_2_0 import parse_consonant d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False) voiced_stops = Counter() voiced_stops_single = Counter() voiced_stop_combs = Counter() for gltc in d.Glottocode.unique(): inv = ql.get_inventory(d, gltc) tmp = 0 tmp_stops = [] has_implosives = False for p in inv: parse = parse_consonant(p) if parse.get('voice', '') == 'voiced' and \ parse.get('manner', '') == 'stop' and \ parse['nasal'] == False and \ parse['place'] == 'bilabial': tmp_stops.append(p) tmp += 1 if parse.get('implosive', False) == True: has_implosives = True if not has_implosives: continue voiced_stops[tmp] += 1 if tmp == 1: voiced_stops_single[tmp_stops[0]] += 1 else: voiced_stop_combs[tuple(sorted(tmp_stops))] += 1
if all_segments_parsable(inventory): parsable_sample.add(inv_id) print(f'Sample size: {len(parsable_sample)}') d = d.loc[ d.apply(lambda row: row.InventoryID in parsable_sample, axis=1) ] for gltc in d.Glottocode.unique(): inv_id = list(d.loc[ d.Glottocode == gltc ].InventoryID)[0] inv = get_inventory(d, gltc) if not voice_opp_in(inv, ['stop']): continue opps = voice_opp_in(inv, ['affricate']) if opps: fricatives = get_manners(inv, ['fricative']) affricates = get_manners(inv, ['affricate']) voiced_affricates = list(filter( lambda x: parse_consonant(x)['voice'] == 'voiced', affricates)) voiceless_affricates = list(filter( lambda x: parse_consonant(x)['voice'] == 'voiceless', affricates)) # Check for voiced affricates that have paired # voiceless affricates and voiceless fricatives # but do not have paired voiced fricatives. result = [] for affr_vcd in voiced_affricates: if oppositions([affr_vcd] + fricatives, 'manner'): continue # Find the corresponding voiceless affricate. opps_tmp = oppositions([affr_vcd] + voiceless_affricates, 'voice') if opps_tmp: for _, affr_vcl in opps_tmp: