fs = code.split(".")[1][4] pc = "3" pcode = phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset) if pcode == "33": return True elif pcode == "3": return False if __name__ == "__main__": pron = {} for line in open("dict", "r"): if line.startswith(';'): continue (word, pron_string) = line.rstrip().split(' ', 1) pron[word] = pron_string.split() words = ['MADDER', 'BADNESS', 'BADDEST', 'RAN', 'SWAM', 'MATH', 'SAD', 'HAND', 'HAM', 'HALF', 'PATH', 'PASS', 'CASH', 'BANG', 'BAT', 'PAL', 'BAG', 'CAB', 'HAMMER', 'MANAGE', 'PLANET', 'PLANETS', 'MANNING', 'CLASSES', 'ASKING', 'CAN', 'BEGAN', 'ANNE', 'ASPECT', 'ASPIRIN', 'CASKET', 'ASKED', 'BASKETBALL', 'BANDSTAND', 'BACKSTROKE', 'TRANSFER', "GLADDEN", "TRANSMISSION", 'SANTA', 'GRANDMOTHER', 'RASCAL', "ASKIN'", "PASSIN'", 'PASSING', 'TRAFFIC', 'CATHOLIC', 'MARRY', 'SPANISH'] for word in words: word_pron = pron[word] old_coding = is_tense_old(word, word_pron) new_coding = is_tense(word, word_pron) #if old_coding != new_coding: print word, old_coding, new_coding
def phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset): """redefines vowel classes for Philadelphia""" orig_pc = pc # Plotnik code returned by arpabet2plotnik phones = split_stress_digit(phones) # separate Arpabet coding from stress digit for vowels # 1. New short-a coding using Kyle Gorman's syllabification & # classification functions above if pc == '3' and phones[i].label in {'AE0','AE1','AE2','AE'} and fm != '0': tenseness = is_tense(trans, [ph.label for ph in phones[i:]]) # the second argument is a list of ARPABET phones for the word starting # with the target vowel; this will not affect correctness of # syllabification. is_tense returns True if tense, False if lax, and None # if variable/"unclassifiable" if tenseness == True: pc = '33' elif tenseness == False: pc = '3' else: pc = '39' # convert dictionary entries to LAX short-a for "-arry" words if pc == '2' and 'ARRY' in trans.upper(): if len(phones) > i + 2 and phones[i + 1].arpa == 'R' and phoneset[phones[i + 2].arpa].cvox == '0': pc = '3' # random dictionary inaccuracies if pc == '5' and trans.upper() == 'MARIO': pc = '3' # 2. /e/ if trans.upper() in ["CATCH", "KEPT"]: pc = '2' # 3. /oh/ if phones[ i].arpa == 'AA' and trans.upper() in ['LAW', 'LAWS', "LAW'S", 'LAWFUL', 'UNLAWFUL', 'DOG', 'DOGS', "DOG'S", 'DOGGED', 'ALL', "ALL'S", 'CALL', 'CALLS', "CALL'S", 'CALLING', 'CALLED', 'FALL', 'FALLS', "FALL'S", 'FALLING', 'AUDIENCE', 'AUDIENCES', "AUDIENCE'S", 'ON', 'ONTO', 'GONNA', 'GONE', 'BOSTON', "BOSTON'S", 'AWFUL', 'AWFULLY', 'AWFULNESS', 'AWKWARD', 'AWKWARDLY', 'AWKWARDNESS', 'AWESOME', 'AUGUST', 'COUGH', 'COUGHS', 'COUGHED', 'COUGHING']: pc = '53' # 4. /o/ if phones[ i].arpa == 'AO' and trans.upper() in ['CHOCOLATE', 'CHOCOLATES', "CHOCOLATE'S", 'WALLET', 'WALLETS', 'WARRANT', 'WARRANTS', 'WATCH', 'WATCHES', 'WATCHED', 'WATCHING', 'WANDER', 'WANDERS', 'WANDERED', 'WANDERING', 'CONNIE', 'CATHOLICISM', 'WANT', 'WANTED', 'PONG', 'GONG', 'KONG', 'FLORIDA', 'ORANGE', 'HORRIBLE', 'MAJORITY']: pc = '5' if phones[i].arpa == 'AE' and trans.upper() in ['LANZA', "LANZA'S"]: pc = '5' # 4. /ah/ # if pc == '5' and phones[i].arpa == 'AA' and (i > 0 and phones[i-1].arpa != 'W') and (len(phones) != i+1 and phones[i+1].arpa != 'R'): ## x1 = max(0, i - 3) ## x2 = min(len(trans), i + 3) # print "Checking a spelling in chunk %s of word %s (%s)." % (trans[x1:x2].upper(), trans, ' '.join([p.label for p in phones])) # for t in ['AU', 'AW', 'AL']: # if 'A' in trans[x1:x2].upper() and t not in trans[x1:x2].upper(): ## pc = '43' # 5. /iw/ if phones[i].label == "UW1": # UW1 preceded by /y/ if i > 0 and phones[i - 1].arpa == 'Y': pc = '82' # words spelled with "-ew", e.g. "threw", "new", "brew" if 'EW' in trans.upper(): pc = '82' # words spelled with "-u" after /t/, /d/, /n/, /l/, /s/, e.g. # "Tuesday", "nude", "duty", "new" if i > 0 and phones[i - 1].arpa in ['T', 'D', 'N', 'L', 'S']: for t in ['TU', 'DU', 'NU', 'LU', 'SU']: # make sure -u spelling is adjacent to consonant in orthography if t in trans.upper(): pc = '82' # 6. /Tuw/ if phones[i].label == "UW1" and trans.upper in ['THROUGH']: pc = '73' # if pc != orig_pc: # print "\tPhila system reassignment: Changed class of vowel %s from %2s # to %2s in word %s (%s)." % (phones[i].label, orig_pc, pc, trans, ' # '.join([p.label for p in phones])) # 7. front vowels before r if len(phones) > i + 1 and phones[i].arpa in ['EH', 'AE'] and phones[i + 1].arpa == 'R': if len(phones) == i + 2: # word-final /r/ pc = '24' if len(phones) > i + 2 and phoneset[phones[i + 2].arpa].cvox != '0': # not word-final but also NOT intervocalic r pc = '24' return pc
pcode = phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset) if pcode == "33": return True elif pcode == "3": return False if __name__ == "__main__": pron = {} for line in open("dict", "r"): if line.startswith(';'): continue (word, pron_string) = line.rstrip().split(' ', 1) pron[word] = pron_string.split() words = [ 'MADDER', 'BADNESS', 'BADDEST', 'RAN', 'SWAM', 'MATH', 'SAD', 'HAND', 'HAM', 'HALF', 'PATH', 'PASS', 'CASH', 'BANG', 'BAT', 'PAL', 'BAG', 'CAB', 'HAMMER', 'MANAGE', 'PLANET', 'PLANETS', 'MANNING', 'CLASSES', 'ASKING', 'CAN', 'BEGAN', 'ANNE', 'ASPECT', 'ASPIRIN', 'CASKET', 'ASKED', 'BASKETBALL', 'BANDSTAND', 'BACKSTROKE', 'TRANSFER', "GLADDEN", "TRANSMISSION", 'SANTA', 'GRANDMOTHER', 'RASCAL', "ASKIN'", "PASSIN'", 'PASSING', 'TRAFFIC', 'CATHOLIC', 'MARRY', 'SPANISH' ] for word in words: word_pron = pron[word] old_coding = is_tense_old(word, word_pron) new_coding = is_tense(word, word_pron) #if old_coding != new_coding: print word, old_coding, new_coding
def phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset): """redefines vowel classes for Philadelphia""" orig_pc = pc # Plotnik code returned by arpabet2plotnik phones = split_stress_digit(phones) # separate Arpabet coding from stress digit for vowels # 1. New short-a coding using Kyle Gorman's syllabification & # classification functions above if pc == '3' and phones[i].label in {'AE0', 'AE1', 'AE2', 'AE' } and fm != '0': tenseness = is_tense(trans, [ph.label for ph in phones[i:]]) # the second argument is a list of ARPABET phones for the word starting # with the target vowel; this will not affect correctness of # syllabification. is_tense returns True if tense, False if lax, and None # if variable/"unclassifiable" if tenseness == True: pc = '33' elif tenseness == False: pc = '3' else: pc = '39' # convert dictionary entries to LAX short-a for "-arry" words if pc == '2' and 'ARRY' in trans.upper(): if len(phones) > i + 2 and phones[i + 1].arpa == 'R' and phoneset[ phones[i + 2].arpa].cvox == '0': pc = '3' # random dictionary inaccuracies if pc == '5' and trans.upper() == 'MARIO': pc = '3' # 2. /e/ if trans.upper() in ["CATCH", "KEPT"]: pc = '2' # 3. /oh/ if phones[i].arpa == 'AA' and trans.upper() in [ 'LAW', 'LAWS', "LAW'S", 'LAWFUL', 'UNLAWFUL', 'DOG', 'DOGS', "DOG'S", 'DOGGED', 'ALL', "ALL'S", 'CALL', 'CALLS', "CALL'S", 'CALLING', 'CALLED', 'FALL', 'FALLS', "FALL'S", 'FALLING', 'AUDIENCE', 'AUDIENCES', "AUDIENCE'S", 'ON', 'ONTO', 'GONNA', 'GONE', 'BOSTON', "BOSTON'S", 'AWFUL', 'AWFULLY', 'AWFULNESS', 'AWKWARD', 'AWKWARDLY', 'AWKWARDNESS', 'AWESOME', 'AUGUST', 'COUGH', 'COUGHS', 'COUGHED', 'COUGHING' ]: pc = '53' # 4. /o/ if phones[i].arpa == 'AO' and trans.upper() in [ 'CHOCOLATE', 'CHOCOLATES', "CHOCOLATE'S", 'WALLET', 'WALLETS', 'WARRANT', 'WARRANTS', 'WATCH', 'WATCHES', 'WATCHED', 'WATCHING', 'WANDER', 'WANDERS', 'WANDERED', 'WANDERING', 'CONNIE', 'CATHOLICISM', 'WANT', 'WANTED', 'PONG', 'GONG', 'KONG', 'FLORIDA', 'ORANGE', 'HORRIBLE', 'MAJORITY' ]: pc = '5' if phones[i].arpa == 'AE' and trans.upper() in ['LANZA', "LANZA'S"]: pc = '5' # 4. /ah/ # if pc == '5' and phones[i].arpa == 'AA' and (i > 0 and phones[i-1].arpa != 'W') and (len(phones) != i+1 and phones[i+1].arpa != 'R'): ## x1 = max(0, i - 3) ## x2 = min(len(trans), i + 3) # print "Checking a spelling in chunk %s of word %s (%s)." % (trans[x1:x2].upper(), trans, ' '.join([p.label for p in phones])) # for t in ['AU', 'AW', 'AL']: # if 'A' in trans[x1:x2].upper() and t not in trans[x1:x2].upper(): ## pc = '43' # 5. /iw/ if phones[i].label == "UW1": # UW1 preceded by /y/ if i > 0 and phones[i - 1].arpa == 'Y': pc = '82' # words spelled with "-ew", e.g. "threw", "new", "brew" if 'EW' in trans.upper(): pc = '82' # words spelled with "-u" after /t/, /d/, /n/, /l/, /s/, e.g. # "Tuesday", "nude", "duty", "new" if i > 0 and phones[i - 1].arpa in ['T', 'D', 'N', 'L', 'S']: for t in ['TU', 'DU', 'NU', 'LU', 'SU']: # make sure -u spelling is adjacent to consonant in orthography if t in trans.upper(): pc = '82' # 6. /Tuw/ if phones[i].label == "UW1" and trans.upper in ['THROUGH']: pc = '73' # if pc != orig_pc: # print "\tPhila system reassignment: Changed class of vowel %s from %2s # to %2s in word %s (%s)." % (phones[i].label, orig_pc, pc, trans, ' # '.join([p.label for p in phones])) # 7. front vowels before r if len(phones) > i + 1 and phones[i].arpa in [ 'EH', 'AE' ] and phones[i + 1].arpa == 'R': if len(phones) == i + 2: # word-final /r/ pc = '24' if len(phones) > i + 2 and phoneset[phones[i + 2].arpa].cvox != '0': # not word-final but also NOT intervocalic r pc = '24' return pc