Пример #1
0
    fs = code.split(".")[1][4]
    pc = "3"
    pcode = phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset)
    if pcode == "33":
        return True
    elif pcode == "3":
        return False

if __name__ == "__main__":
    pron = {}
    for line in open("dict", "r"):
        if line.startswith(';'):
            continue
        (word, pron_string) = line.rstrip().split('  ', 1)
        pron[word] = pron_string.split()

    words = ['MADDER', 'BADNESS', 'BADDEST', 'RAN', 'SWAM', 'MATH', 'SAD',
             'HAND', 'HAM', 'HALF', 'PATH', 'PASS', 'CASH', 'BANG', 'BAT',
             'PAL', 'BAG', 'CAB', 'HAMMER', 'MANAGE', 'PLANET', 'PLANETS', 
             'MANNING', 'CLASSES', 'ASKING', 'CAN', 'BEGAN', 'ANNE',
             'ASPECT', 'ASPIRIN', 'CASKET', 'ASKED', 'BASKETBALL', 'BANDSTAND',
             'BACKSTROKE', 'TRANSFER', "GLADDEN", "TRANSMISSION",
             'SANTA', 'GRANDMOTHER', 'RASCAL', "ASKIN'", "PASSIN'", 'PASSING',
             'TRAFFIC', 'CATHOLIC', 'MARRY', 'SPANISH']
    for word in words:
        word_pron = pron[word]
        old_coding = is_tense_old(word, word_pron)
        new_coding = is_tense(word, word_pron)
        #if old_coding != new_coding:
        print word, old_coding, new_coding
Пример #2
0
def phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset):
    """redefines vowel classes for Philadelphia"""

    orig_pc = pc  
    # Plotnik code returned by arpabet2plotnik
    phones = split_stress_digit(phones)  
    # separate Arpabet coding from stress digit for vowels

    # 1. New short-a coding using Kyle Gorman's syllabification & 
    # classification functions above 
    if pc == '3' and phones[i].label in {'AE0','AE1','AE2','AE'} and fm != '0':
	    tenseness = is_tense(trans, [ph.label for ph in phones[i:]])
    # the second argument is a list of ARPABET phones for the word starting 
    # with the target vowel; this will not affect correctness of 
    # syllabification. is_tense returns True if tense, False if lax, and None 
    # if variable/"unclassifiable"
	    if tenseness == True:
		    pc = '33'
	    elif tenseness == False:
		    pc = '3'
	    else:
		    pc = '39'	

    # convert dictionary entries to LAX short-a for "-arry" words
    if pc == '2' and 'ARRY' in trans.upper():
        if len(phones) > i + 2 and phones[i + 1].arpa == 'R' and phoneset[phones[i + 2].arpa].cvox == '0':
            pc = '3'

    # random dictionary inaccuracies
    if pc == '5' and trans.upper() == 'MARIO':
        pc = '3'

    # 2. /e/
    if trans.upper() in ["CATCH", "KEPT"]:
        pc = '2'

    # 3. /oh/
    if phones[
        i].arpa == 'AA' and trans.upper() in ['LAW', 'LAWS', "LAW'S", 'LAWFUL',
                                              'UNLAWFUL', 'DOG', 'DOGS', "DOG'S",
                                              'DOGGED', 'ALL', "ALL'S", 'CALL', 
                                              'CALLS', "CALL'S", 'CALLING', 
                                              'CALLED', 'FALL', 'FALLS', "FALL'S", 
                                              'FALLING', 'AUDIENCE', 'AUDIENCES', 
                                              "AUDIENCE'S", 'ON', 'ONTO', 'GONNA', 
                                              'GONE', 'BOSTON', "BOSTON'S", 'AWFUL', 
                                              'AWFULLY', 'AWFULNESS', 'AWKWARD', 
                                              'AWKWARDLY', 'AWKWARDNESS', 'AWESOME', 'AUGUST',
                                              'COUGH', 'COUGHS', 'COUGHED', 'COUGHING']:
        pc = '53'

    # 4. /o/
    if phones[
        i].arpa == 'AO' and trans.upper() in ['CHOCOLATE', 'CHOCOLATES', "CHOCOLATE'S", 
                                              'WALLET', 'WALLETS', 'WARRANT', 'WARRANTS',
                                              'WATCH', 'WATCHES', 'WATCHED', 'WATCHING', 
                                              'WANDER', 'WANDERS', 'WANDERED', 'WANDERING',
                                              'CONNIE', 'CATHOLICISM', 'WANT', 'WANTED', 
                                              'PONG', 'GONG', 'KONG', 'FLORIDA', 'ORANGE',
                                              'HORRIBLE', 'MAJORITY']:
        pc = '5'

    if phones[i].arpa == 'AE' and trans.upper() in ['LANZA', "LANZA'S"]:
        pc = '5'

# 4. /ah/
# if pc == '5' and phones[i].arpa == 'AA' and (i > 0 and phones[i-1].arpa != 'W') and (len(phones) != i+1 and phones[i+1].arpa != 'R'):
##    x1 = max(0, i - 3)
##    x2 = min(len(trans), i + 3)
# print "Checking a spelling in chunk %s of word %s (%s)." % (trans[x1:x2].upper(), trans, ' '.join([p.label for p in phones]))
# for t in ['AU', 'AW', 'AL']:
# if 'A' in trans[x1:x2].upper() and t not in trans[x1:x2].upper():
##        pc = '43'

    # 5. /iw/
    if phones[i].label == "UW1":
        # UW1 preceded by /y/
        if i > 0 and phones[i - 1].arpa == 'Y':
            pc = '82'
        # words spelled with "-ew", e.g. "threw", "new", "brew"
        if 'EW' in trans.upper():
            pc = '82'
        # words spelled with "-u" after /t/, /d/, /n/, /l/, /s/, e.g.
        # "Tuesday", "nude", "duty", "new"
        if i > 0 and phones[i - 1].arpa in ['T', 'D', 'N', 'L', 'S']:
            for t in ['TU', 'DU', 'NU', 'LU', 'SU']:  
            # make sure -u spelling is adjacent to consonant in orthography
                if t in trans.upper():
                    pc = '82'

    # 6. /Tuw/
    if phones[i].label == "UW1" and trans.upper in ['THROUGH']:
        pc = '73'

# if pc != orig_pc:
# print "\tPhila system reassignment:  Changed class of vowel %s from %2s
# to %2s in word %s (%s)." % (phones[i].label, orig_pc, pc, trans, '
# '.join([p.label for p in phones]))

    # 7. front vowels before r
    if len(phones) > i + 1 and phones[i].arpa in ['EH', 'AE'] and phones[i + 1].arpa == 'R':
        if len(phones) == i + 2:  # word-final /r/
            pc = '24'
        if len(phones) > i + 2 and phoneset[phones[i + 2].arpa].cvox != '0':  
        # not word-final but also NOT intervocalic r
            pc = '24'

    return pc
Пример #3
0
    pcode = phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset)
    if pcode == "33":
        return True
    elif pcode == "3":
        return False


if __name__ == "__main__":
    pron = {}
    for line in open("dict", "r"):
        if line.startswith(';'):
            continue
        (word, pron_string) = line.rstrip().split('  ', 1)
        pron[word] = pron_string.split()

    words = [
        'MADDER', 'BADNESS', 'BADDEST', 'RAN', 'SWAM', 'MATH', 'SAD', 'HAND',
        'HAM', 'HALF', 'PATH', 'PASS', 'CASH', 'BANG', 'BAT', 'PAL', 'BAG',
        'CAB', 'HAMMER', 'MANAGE', 'PLANET', 'PLANETS', 'MANNING', 'CLASSES',
        'ASKING', 'CAN', 'BEGAN', 'ANNE', 'ASPECT', 'ASPIRIN', 'CASKET',
        'ASKED', 'BASKETBALL', 'BANDSTAND', 'BACKSTROKE', 'TRANSFER',
        "GLADDEN", "TRANSMISSION", 'SANTA', 'GRANDMOTHER', 'RASCAL', "ASKIN'",
        "PASSIN'", 'PASSING', 'TRAFFIC', 'CATHOLIC', 'MARRY', 'SPANISH'
    ]
    for word in words:
        word_pron = pron[word]
        old_coding = is_tense_old(word, word_pron)
        new_coding = is_tense(word, word_pron)
        #if old_coding != new_coding:
        print word, old_coding, new_coding
Пример #4
0
def phila_system(i, phones, trans, fm, fp, fv, ps, fs, pc, phoneset):
    """redefines vowel classes for Philadelphia"""

    orig_pc = pc
    # Plotnik code returned by arpabet2plotnik
    phones = split_stress_digit(phones)
    # separate Arpabet coding from stress digit for vowels

    # 1. New short-a coding using Kyle Gorman's syllabification &
    # classification functions above
    if pc == '3' and phones[i].label in {'AE0', 'AE1', 'AE2', 'AE'
                                         } and fm != '0':
        tenseness = is_tense(trans, [ph.label for ph in phones[i:]])
        # the second argument is a list of ARPABET phones for the word starting
        # with the target vowel; this will not affect correctness of
        # syllabification. is_tense returns True if tense, False if lax, and None
        # if variable/"unclassifiable"
        if tenseness == True:
            pc = '33'
        elif tenseness == False:
            pc = '3'
        else:
            pc = '39'

    # convert dictionary entries to LAX short-a for "-arry" words
    if pc == '2' and 'ARRY' in trans.upper():
        if len(phones) > i + 2 and phones[i + 1].arpa == 'R' and phoneset[
                phones[i + 2].arpa].cvox == '0':
            pc = '3'

    # random dictionary inaccuracies
    if pc == '5' and trans.upper() == 'MARIO':
        pc = '3'

    # 2. /e/
    if trans.upper() in ["CATCH", "KEPT"]:
        pc = '2'

    # 3. /oh/
    if phones[i].arpa == 'AA' and trans.upper() in [
            'LAW', 'LAWS', "LAW'S", 'LAWFUL', 'UNLAWFUL', 'DOG', 'DOGS',
            "DOG'S", 'DOGGED', 'ALL', "ALL'S", 'CALL', 'CALLS', "CALL'S",
            'CALLING', 'CALLED', 'FALL', 'FALLS', "FALL'S", 'FALLING',
            'AUDIENCE', 'AUDIENCES', "AUDIENCE'S", 'ON', 'ONTO', 'GONNA',
            'GONE', 'BOSTON', "BOSTON'S", 'AWFUL', 'AWFULLY', 'AWFULNESS',
            'AWKWARD', 'AWKWARDLY', 'AWKWARDNESS', 'AWESOME', 'AUGUST',
            'COUGH', 'COUGHS', 'COUGHED', 'COUGHING'
    ]:
        pc = '53'

    # 4. /o/
    if phones[i].arpa == 'AO' and trans.upper() in [
            'CHOCOLATE', 'CHOCOLATES', "CHOCOLATE'S", 'WALLET', 'WALLETS',
            'WARRANT', 'WARRANTS', 'WATCH', 'WATCHES', 'WATCHED', 'WATCHING',
            'WANDER', 'WANDERS', 'WANDERED', 'WANDERING', 'CONNIE',
            'CATHOLICISM', 'WANT', 'WANTED', 'PONG', 'GONG', 'KONG', 'FLORIDA',
            'ORANGE', 'HORRIBLE', 'MAJORITY'
    ]:
        pc = '5'

    if phones[i].arpa == 'AE' and trans.upper() in ['LANZA', "LANZA'S"]:
        pc = '5'

# 4. /ah/
# if pc == '5' and phones[i].arpa == 'AA' and (i > 0 and phones[i-1].arpa != 'W') and (len(phones) != i+1 and phones[i+1].arpa != 'R'):
##    x1 = max(0, i - 3)
##    x2 = min(len(trans), i + 3)
# print "Checking a spelling in chunk %s of word %s (%s)." % (trans[x1:x2].upper(), trans, ' '.join([p.label for p in phones]))
# for t in ['AU', 'AW', 'AL']:
# if 'A' in trans[x1:x2].upper() and t not in trans[x1:x2].upper():
##        pc = '43'

# 5. /iw/
    if phones[i].label == "UW1":
        # UW1 preceded by /y/
        if i > 0 and phones[i - 1].arpa == 'Y':
            pc = '82'
        # words spelled with "-ew", e.g. "threw", "new", "brew"
        if 'EW' in trans.upper():
            pc = '82'
        # words spelled with "-u" after /t/, /d/, /n/, /l/, /s/, e.g.
        # "Tuesday", "nude", "duty", "new"
        if i > 0 and phones[i - 1].arpa in ['T', 'D', 'N', 'L', 'S']:
            for t in ['TU', 'DU', 'NU', 'LU', 'SU']:
                # make sure -u spelling is adjacent to consonant in orthography
                if t in trans.upper():
                    pc = '82'

    # 6. /Tuw/
    if phones[i].label == "UW1" and trans.upper in ['THROUGH']:
        pc = '73'


# if pc != orig_pc:
# print "\tPhila system reassignment:  Changed class of vowel %s from %2s
# to %2s in word %s (%s)." % (phones[i].label, orig_pc, pc, trans, '
# '.join([p.label for p in phones]))

# 7. front vowels before r
    if len(phones) > i + 1 and phones[i].arpa in [
            'EH', 'AE'
    ] and phones[i + 1].arpa == 'R':
        if len(phones) == i + 2:  # word-final /r/
            pc = '24'
        if len(phones) > i + 2 and phoneset[phones[i + 2].arpa].cvox != '0':
            # not word-final but also NOT intervocalic r
            pc = '24'

    return pc