Пример #1
0
def findLineStress(line):
    '''find accentual stress of a given line, based on CMU dict.  Still a bit unclever.
    
    _parameters_
    line: line of text
    
    _returns_
    parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle)
    syllableLengths: list of syllable lengths corresponding to the parses in parselist
    wordCount: count of words in the line 
    '''
    line = prepString(removeMarkupWords(line))
    words = line.split()
    wordCount = len(words)
    parses = ['']
    for word in words:
        pros = pronouncing.phones_for_word(word)
        if pros:
            for phonelist in [pronouncing.phones_for_word(word)]:
                stressOptions = copy.deepcopy(parses)
                currLen = len(parses)
                newparse = []
                # I don't really need to loop through pronunciations, just distinct stress patterns, so a little inefficient here
                for pronunciation in phonelist:
                    wordStress = pronouncing.stresses(pronunciation)
                    for option in range(currLen):
                        newparse.append('' + str(stressOptions[option]) +
                                        str(wordStress))
            parses = newparse

    return list(set(parses)), [len(parse)
                               for parse in list(set(parses))], wordCount
Пример #2
0
def findLineStress(tokenized_line):
    '''
    find accentual stress of a given tokenized line, based on CMU dict.
    Uses relative stress per word, so somewhat limited.

    Parameters
    ----------
    tokenized_line : list
        list of tokens from line, usually preprocessed to remove non-words

    Returns
    -------
    parselist: list of potential stresses after parsing.
        0 is unstressed, 1 is primary stress, 2 is secondary stress (middle)
    '''

    parses = ['']
    for word in tokenized_line:
        pros = pronouncing.phones_for_word(word)
        if pros:
            for phonelist in [pronouncing.phones_for_word(word)]:
                stressOptions = deepcopy(parses)
                currLen = len(parses)
                newparse = []
                # I don't really need to loop through pronunciations
                # just distinct stress patterns, so a little inefficient here
                for pronunciation in phonelist:
                    wordStress = pronouncing.stresses(pronunciation)
                    for option in range(currLen):
                        newparse.append('' + str(stressOptions[option]) +
                                        str(wordStress))
            parses = newparse

    return list(set(parses))
Пример #3
0
def getStressStruct(line):
    stresses = ""
    for word in line:
        phones = pronouncing.phones_for_word(word)
        if phones:
            stress_list = [pronouncing.stresses(phone) for phone in phones]
            if len(stress_list) > 1:

                def xnor(a, b):
                    if '2' in a or '2' in b:
                        return '0' * len(b)
                    else:
                        if len(a) == len(b):
                            return str(bin(~(int(a, 2) ^ int(b, 2))))
                        elif len(a) > len(b):
                            return '0' * len(a)
                        else:
                            return '0' * len(b)

                matches = reduce(xnor, stress_list)
                if matches:
                    for index, stress in enumerate(stress_list[0]):
                        if matches[index] == 1:
                            stresses += stress
                        else:
                            stresses += "*"
                else:
                    stresses += "*" * len(stress_list[0])
            else:
                stresses += stress_list[0]
        else:
            stresses += "X"
    return stresses
Пример #4
0
def is_iambic(phrase):
    """
    check that we satisfy iambic meter.
    return 1 if so, otherwise 0. 
    definitely an imperfect check...
    if we end up needing to check a word that's not in the CMU dictionary, just return 0. 
    """
    meter = ''
    for word in phrase.split():
        word = word.strip().strip(string.punctuation).lower()
        try:
            phones_list = pronouncing.phones_for_word(word)
            stresses = pronouncing.stresses(phones_list[0])
            if len(stresses) == 1:
                if stresses == '1':
                    stresses = '2'  # allow ambiguity for 1-syllable words with stress 1
            meter += stresses  # just default to the first pronunciation if > 1 given
        except:
            return 0  # word not found
    meter = [int(x) for x in meter]
    even_stresses_full = [meter[i] for i in range(0, len(meter), 2)]
    odd_stresses_full = [meter[i] for i in range(1, len(meter), 2)]
    even_stresses = set(even_stresses_full)
    odd_stresses = set(odd_stresses_full)
    if 0 in odd_stresses:
        return 0
    if 1 in even_stresses:
        return 0
    return 1
Пример #5
0
def rhyme_same_stress(word):
    timeout_timer = 0
    # print('in the stress loop')
    while (True):
        phones = pronouncing.phones_for_word(word)
        phone = random.choice(phones)
        word_stress = pronouncing.stresses(phone)
        rhyme = rhyme_type_random(word)
        phones = pronouncing.phones_for_word(rhyme)
        for phone in phones:
            rhyme_stress = pronouncing.stresses(phone)
            if word_stress == rhyme_stress:
                return rhyme
        print(timeout_timer)
        if timeout_timer == 10:
            return rhyme
        timeout_timer += 1
Пример #6
0
def is_final_syllable_stressed(word):
    phones_list = pronouncing.phones_for_word(word)
    stresses = pronouncing.stresses(phones_list[0])

    if (stresses[-1] == '1'):
        return True

    return False
Пример #7
0
 def is_iambic(self, new_sent):
     sent_pat = ""
     for word in new_sent.split():
         pronunciations = pronouncing.phones_for_word(word)
         pat = pronouncing.stresses(pronunciations[0])
         sent_pat += pat
     if sent_pat == self.iamb_pat:
         return True
     else:
         return False
Пример #8
0
def get_stresses_oneword(word):
    phones_list = pronouncing.phones_for_word(word)
    if len(phones_list) > 0:
        phones = phones_list[0]
        stresses = pronouncing.stresses(phones)
        if len(stresses) == 0 or len(stresses) == 1:
            stresses = "3"
    else:
        num_syllables = dirtysyllables(word)
        stresses = '3' * num_syllables
    return stresses
Пример #9
0
def getWordStresses(word: str):
    word = numbersToWords(word)
    if " " in word:
        return word.split()
    try:
        phones = pronouncing.phones_for_word(word)
        stresses = pronouncing.stresses(phones[0])
    except IndexError:
        # Hacky way of discarding candidate title
        return "1111111111"
    return stresses
Пример #10
0
def stress(sentence):

    """
    Returns stress pattern for a sentence
    :param sentence: list(str)
    :return: list(list(str))
    """

    stresses = []

    for word in sentence:

        phones = pronouncing.phones_for_word(word)
        homonyms = [pronouncing.stresses(p) for p in phones]
        homonyms_int = [int(p) for p in homonyms]

        if len(homonyms_int) == 0:
            homonyms = ['0']
            homonyms_int = [0]

        word_stress = homonyms[homonyms_int.index(min(homonyms_int))]

        stresses.append(word_stress)

    if stresses[0][0] == str(0):
        return None

    words = sentence[:]
    phrases = []
    current_phrase = []

    for word in sentence:

        # if re.search("[a-zA-Z0-9]", word) is None:
        #     continue

        current_phrase.append(word)

        if len("".join(stresses[:len(current_phrase)])) >= MIN_LENGTH:

            next_stresses = "".join(stresses[len(current_phrase):])

            if len(next_stresses) > 1 and next_stresses[0] == str(1) and \
                    re.search("[a-zA-Z]", words[len(current_phrase)]):

                phrases.append(current_phrase)
                words = words[len(current_phrase):]
                stresses = stresses[len(current_phrase):]
                current_phrase = []

    if len(current_phrase) > 0:
        phrases.append(current_phrase)

    return([" ".join(p) for p in phrases])
Пример #11
0
def stress_pattern():
    phones_list = pronouncing.phones_for_word("snappiest")
    meter = pronouncing.stresses(phones_list[0])
    print meter  # 102
    # 1 : primary stress, 2: secondary stress , 0: unstressed
    ## search by stress pattern
    stress_first = pronouncing.search_stresses('100100')
    stress_either = pronouncing.search_stresses(
        '^00[12]00[12]$')  ## either 1 or 2 in the []
    print stress_first
    print stress_either
Пример #12
0
def count_syllables(words):
    syllables = 0
    for word in words.split():
        word = word.strip().strip(string.punctuation)
        try:
            phones_list = pronouncing.phones_for_word(word)
            stresses = pronouncing.stresses(phones_list[0])
            syllables += min(MAX_SYLLABLES_PER_WORD, len(stresses))
        except:
            # if we don't know, just do a quick approximation here; it shouldn't come up too often
            syllables += min(MAX_SYLLABLES_PER_WORD, round(len(word) / 3))
    return syllables
Пример #13
0
def convert_to_word(token: str) -> Word:
    pronunciations = p.phones_for_word(token)
    stress_patterns = [
        p.stresses(pronunciation).replace("2", "1")
        for pronunciation in pronunciations
    ]
    if stress_patterns:
        # pick one arbitrarily
        return Word(token, stress_patterns[0])
    else:
        number_syllables_guess = len(re.findall(r"[aeiou]+", token))
        return Word(token, "?" * number_syllables_guess)
Пример #14
0
def generate_joo_joo_eyeball(syllable_count):
    text = [0]
    while(text[0] != syllable_count):
        text = random.choice(joo_joo_eyeball)
    text = text[1] # Discard count, we don't need it
    result = []
    for word in text.split():
        pronunciations = pronouncing.phones_for_word(word)
        pat = pronouncing.stresses(pronunciations[0])
        replacement = random.choice(pronouncing.search_stresses("^"+pat+"$"))
        result.append(replacement)
    return ' '.join(result)
Пример #15
0
    def calculate_scores(poem_lines):
        ### keep only two-lines poem
        if len(poem_lines) != 2:
            raise ValueError("can only score 2-line poems/couplets")

        ### calculate informations needed for scoring
        num_words = 0
        stress_strings = []
        all_poem_words = []
        last_words = []

        for pl in poem_lines:
            try:
                pwords = pl.split()
            except AttributeError:
                pwords = pl

            num_words += len(pwords)
            last_words.append(pwords[-1])
            all_poem_words.extend(pwords)

            stress_string = ""
            for pword in pwords:
                try:
                    stress_string += pronouncing.stresses(
                        pronouncing.phones_for_word(pword)[0]
                    )
                except:
                    pass

            stress_strings.append(stress_string)

        ### rhyme score
        rhyme_score_ = rhyme_score(
            last_words[0], last_words[1], penalize_short_word=False
        )

        ### stress score
        stress_string_score = difflib.SequenceMatcher(
            None, stress_strings[0], stress_strings[1]
        ).ratio()

        ### combined score
        ret = (
            CoupletScorer.rhyme_weight * rhyme_score_
            + CoupletScorer.stress_weight * stress_string_score
        )
        return [
            ret,
            rhyme_score_,
            stress_string_score,
        ]
Пример #16
0
def unstressed(word, syll, cache={}):
    if word == '*':
        return True
    try:
        return cache[word, syll]
    except KeyError:
        pass
    stress = pronouncing.stresses(prons[word.lower()])
    if '0' not in stress:
        stress = re.sub('2', '0', stress)
    result = len(stress) == 1 or stress[syll] == '0'
    cache[word, syll] = result
    return result
Пример #17
0
 def cut_into_iamb(self, new_sent):
     count = 0
     cut_sent = ""
     sents = []
     for word in new_sent.split():
         pronunciations = pronouncing.phones_for_word(word)
         pat = pronouncing.stresses(pronunciations[0])
         cut_sent += word + " "
         count += len(pat)
         if count >= 10:
             sents.append(cut_sent)
             cut_sent = ""
             count = 0
     return sents
Пример #18
0
def getWordStresses(word: str):
    word = numbersToWords(word)
    if " " in word:
        return word.split()

    for override, stresses in PRONUNCIATION_OVERRIDES:
        if word.lower() == override.lower():
            return stresses

    phones = pronouncing.phones_for_word(word)
    if not phones:
        # Hacky way of discarding candidate title
        return "1111111111"

    stresses = pronouncing.stresses(phones[0])
    return stresses
Пример #19
0
def getWordStresses(word: str):
    word = numbersToWords(word)
    if " " in word:
        return word.split()

    for override, stresses in PRONUNCIATION_OVERRIDES:
        if word == override:
            return stresses

    try:
        phones = pronouncing.phones_for_word(word)
        stresses = pronouncing.stresses(phones[0])
    except IndexError:
        # Hacky way of discarding candidate title
        return "1111111111"
    return stresses
def summon_seas():
    # Find words with mattching stress patterns
    caspian_phones = pr.phones_for_word("caspian")
    caspian_stresses = pr.stresses(caspian_phones[0])
    words_with_stress_pattern = pr.search_stresses(caspian_stresses)

    # Find words with correct syllable count
    three_syllable_words = []
    for word in words_with_stress_pattern:
        word_phones = pr.phones_for_word(word)
        syllable_count = pr.syllable_count(word_phones[0])
        if syllable_count == 3:
            sea = string.capwords((word + " sea"))
            three_syllable_words.append(sea)

    return three_syllable_words
Пример #21
0
    def stresses(self):
        """
        Return a string of the stresses for the given word.

        Consumers of this string make the following assumptions:
         - syllables with a "1" should be stressed by the meter
         - syllables with a "2" can be stressed or unstressed by the meter
         - syllables with a "0" should be unstressed by the meter
        """
        word_stresses = stresses(self.phones)
        # Poets often signal syllables that would normally be silent this way.
        if "è" in self.word:
            word_stresses += "2"
        # Words of one syllable can usually be pronounced either way.
        if word_stresses in ("1", "0"):
            word_stresses = "2"
        return word_stresses
Пример #22
0
def stresses_for_word_sequence(word_sequence):
    """Gets the CMUdict stress sequence for a given word sequence.

    Args:
        word_sequence (list): A list of words.

    Returns:
        string: A stress sequence where 0 is zero stress, 1 is primary stress,
            and 2 is secondary stress.

    """
    stress_sequence = []
    for word in word_sequence:
        result = pronouncing.phones_for_word(word)
        if result:
            stress_sequence.append(pronouncing.stresses(result[0]))
        else:
            return ''
    return ''.join(stress_sequence)
Пример #23
0
def word_matches_stress(word: str, stress_pattern_match: str) -> bool:
    '''
    eg: stress_pattern_match = "010"
    '''
    pronunciations = p.phones_for_word(
        word
    )  # word can have more than 1 pronunciation. eg: lead of a pencil, someone lead someone
    for pronunciation in pronunciations:
        original_stress_pattern = p.stresses(pronunciation)

        # we consider both 1 and 2 as a stressed syllable
        # our generated pattern match is only ever 1s and 0s
        stress_pattern = original_stress_pattern.replace("2", "1")

        # in case 1 pronunciation matches but the other one doesn't
        if stress_pattern == stress_pattern_match:
            return True

    return False
Пример #24
0
def find_words():
    # Find words with mattching stress patterns
    beluga_phones = pr.phones_for_word("beluga")
    last_beluga_phone = pr.phones_for_word("beluga")[0].split(" ")[-1]
    beluga_stresses = pr.stresses(beluga_phones[0])
    words_with_stress_pattern = pr.search_stresses(beluga_stresses)

    # Find words with matching end phone
    words_ending_with_ah = pr.search(last_beluga_phone + "$")

    # Find words with correct syllable count
    words_with_stress_and_ending = list(
        set(words_with_stress_pattern).intersection(words_ending_with_ah))
    three_syllable_words = []
    for word in words_with_stress_and_ending:
        word_phones = pr.phones_for_word(word)
        syllable_count = pr.syllable_count(word_phones[0])
        if syllable_count == 3:
            three_syllable_words.append(word)

    return three_syllable_words
Пример #25
0
def get_places():
    keepers = []  
    
    place_lists = [
        [place["city"] for place in pycorpora.geography.us_cities['cities']],
        [place["city"] for place in pycorpora.geography.norwegian_cities['cities']],
        pycorpora.geography.english_towns_cities['towns'],
        pycorpora.geography.english_towns_cities['cities'],
        [river["name"] for river in pycorpora.geography.rivers["rivers"]],
        pycorpora.geography.countries['countries'],
        [place["name"] for place in pycorpora.geography.canadian_municipalities["municipalities"]],
        [place['name'] for place in pycorpora.geography.london_underground_stations['stations']]  
    ]
    
    big_places_list = []
    
    for pl in place_lists:
        big_places_list += pl
    
    for place in big_places_list:
        
        pronunciation_list = pronouncing.phones_for_word(place)   
        
        if (len(pronunciation_list) > 0):
            syllable_count = pronouncing.syllable_count(pronunciation_list[0])
            stresses = pronouncing.stresses(pronunciation_list[0])
            
            
            
            if (syllable_count == 3 and stresses[1] == '1'):
                keepers.append(place)
            elif (syllable_count == 2 and stresses[0] == '1'):
                keepers.append(place)

                    
                    
    return keepers    
Пример #26
0
def get_word_stresses(word: str) -> str:
    """
	Using the pronouncing library, get the stress pattern of a single word.
	Numbers will be changed into words, e.g. 10 -> ten, and then the stress
	of that checked. If a number is 4 digits, it will be treated as a year, e.g.
	1918 -> "nineteen eighteen".

	Parameters:
	  word (str): The word to check.

	Returns:
	  A string of 0s, 1s, or 2s, representing the stress pattern of any given
	  word. If a word isn't recognized, or for any other reason there's an
	  error, the string will contain an A for easy checking.

	"""

    # If the word is "500", numbers_to_words changes that to "five hundred", two
    # seperate words. Return a list of every word, so get_title_stresses() can
    # go over them again.
    word = numbers_to_words(word)
    if " " in word:
        return word.split()

    # We want to forceably change the stress for certain words (found in
    # constants.py).
    for override, stresses in PRONUNCIATION_OVERRIDES:
        if word.lower() == override.lower():
            return stresses

    phones = pronouncing.phones_for_word(word)
    if not phones:
        return "A"

    stresses = pronouncing.stresses(phones[0])
    return stresses
Пример #27
0
def stress_pattern(phones):
    return pronouncing.stresses(''.join(p for p in phones))
Пример #28
0
 def test_stresses(self):
     stresses = pronouncing.stresses('P ER0 M IH1 T')
     self.assertEqual('01', stresses)
     stresses = pronouncing.stresses('P ER1 M IH2 T')
     self.assertEqual('12', stresses)
Пример #29
0
def stress_pattern(phones):
    return pronouncing.stresses(''.join(p for p in phones))
def rhyme_degree(target_word, test_word):
    """Returns a number between 0 and 1 as the degree of rhyming between two
    words, with 1 being an exact rhyme and 0 being no similarity at all."""

    if test_word in pnc.rhymes(target_word):
        print('\rFound rhyme pair from the pronouncing library:')
        print(target_word, 'and', test_word)
        return 1

    # extract word part from last stressed syllable excluding that syll's onset
    rhymes = {target_word: None, test_word: None}
    for word in rhymes:
        try:
            # get pronounciation for word
            pron = pnc.phones_for_word(word)[0]
        except IndexError:  # in case one of the words is not in the dictionary
            return 0
        # get stress pattern and find last stressed syllables
        stress = pnc.stresses(pron)
        last_stress = max([stress.rfind('1'), stress.rfind('2')])
        try:
            sylls = ARPA.syllabifyARPA(pron, return_list=True)
        except ValueError:  # in case the word cannot be syllabified
            return 0
        sylls = sylls[last_stress:]
        first_onset = re.split(ARPA.VOWELS_REGEX, sylls[0])[0]
        sylls[0] = sylls[0].replace(first_onset, '', 1)
        rhymes[word] = sylls

    # test for matching vowels and consonant clusters in onset and coda
    # the stressed vowel weighs double
    phones = 1 + max([
        sum(len(syll.split()) for syll in rhyme) for rhyme in rhymes.values()
    ])
    matches = 0
    for target_syll, test_syll in zip(rhymes[target_word], rhymes[test_word]):
        target_vowel = [
            phone for phone in target_syll.split()
            if re.match(ARPA.VOWELS_REGEX, phone)
        ][0]
        test_vowel = [
            phone for phone in test_syll.split()
            if re.match(ARPA.VOWELS_REGEX, phone)
        ][0]
        target_clusters = target_syll.split(target_vowel)
        test_clusters = test_syll.split(test_vowel)
        # measure match of syllable onsets
        matches += len(
            set(target_clusters[0].strip().split()).intersection(
                set(test_clusters[0].strip().split())))
        # measure match of vowels
        if target_vowel[:2] == test_vowel[:2]:  # test for the vowel itself
            matches += 1
            # test for similar stress
            if (target_vowel[-1] in ['1', '2']
                    and target_vowel[-1] == test_vowel[-1]):
                matches += 1
        # measure match of syllable codas
        matches += len(
            set(target_clusters[1].strip().split()).intersection(
                set(test_clusters[1].strip().split())))
    degree = matches / phones
    if degree > 0.7:
        print('\rFound rhyme pair with a rhyming degree of: ', degree)
        print(rhymes)
    return degree
Пример #31
0
import enchant

import pronouncing

dictionary = enchant.request_dict("en_US")
print dictionary.suggest("untrimm'd")
print dictionary.suggest("don't")

phones = pronouncing.phones_for_word("dont")
print phones
if phones:
    first_phone = phones[0]
    stresses = pronouncing.stresses(first_phone)
    print stresses
Пример #32
0
 def test_stresses(self):
     stresses = pronouncing.stresses("P ER0 M IH1 T")
     self.assertEqual("01", stresses)
     stresses = pronouncing.stresses("P ER1 M IH2 T")
     self.assertEqual("12", stresses)
Пример #33
0
 def test_stresses(self):
     stresses = pronouncing.stresses('P ER0 M IH1 T')
     self.assertEqual('01', stresses)
     stresses = pronouncing.stresses('P ER1 M IH2 T')
     self.assertEqual('12', stresses)
Пример #34
0
import enchant

import pronouncing

dictionary = enchant.request_dict("en_US")
print(dictionary.suggest("untrimm'd"))
print(dictionary.suggest("don't"))

phones = pronouncing.phones_for_word("dont")
print(phones)
if phones:
    first_phone = phones[0]
    stresses = pronouncing.stresses(first_phone)
    print(stresses)