def findLineStress(line): '''find accentual stress of a given line, based on CMU dict. Still a bit unclever. _parameters_ line: line of text _returns_ parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle) syllableLengths: list of syllable lengths corresponding to the parses in parselist wordCount: count of words in the line ''' line = prepString(removeMarkupWords(line)) words = line.split() wordCount = len(words) parses = [''] for word in words: pros = pronouncing.phones_for_word(word) if pros: for phonelist in [pronouncing.phones_for_word(word)]: stressOptions = copy.deepcopy(parses) currLen = len(parses) newparse = [] # I don't really need to loop through pronunciations, just distinct stress patterns, so a little inefficient here for pronunciation in phonelist: wordStress = pronouncing.stresses(pronunciation) for option in range(currLen): newparse.append('' + str(stressOptions[option]) + str(wordStress)) parses = newparse return list(set(parses)), [len(parse) for parse in list(set(parses))], wordCount
def findLineStress(tokenized_line): ''' find accentual stress of a given tokenized line, based on CMU dict. Uses relative stress per word, so somewhat limited. Parameters ---------- tokenized_line : list list of tokens from line, usually preprocessed to remove non-words Returns ------- parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle) ''' parses = [''] for word in tokenized_line: pros = pronouncing.phones_for_word(word) if pros: for phonelist in [pronouncing.phones_for_word(word)]: stressOptions = deepcopy(parses) currLen = len(parses) newparse = [] # I don't really need to loop through pronunciations # just distinct stress patterns, so a little inefficient here for pronunciation in phonelist: wordStress = pronouncing.stresses(pronunciation) for option in range(currLen): newparse.append('' + str(stressOptions[option]) + str(wordStress)) parses = newparse return list(set(parses))
def getStressStruct(line): stresses = "" for word in line: phones = pronouncing.phones_for_word(word) if phones: stress_list = [pronouncing.stresses(phone) for phone in phones] if len(stress_list) > 1: def xnor(a, b): if '2' in a or '2' in b: return '0' * len(b) else: if len(a) == len(b): return str(bin(~(int(a, 2) ^ int(b, 2)))) elif len(a) > len(b): return '0' * len(a) else: return '0' * len(b) matches = reduce(xnor, stress_list) if matches: for index, stress in enumerate(stress_list[0]): if matches[index] == 1: stresses += stress else: stresses += "*" else: stresses += "*" * len(stress_list[0]) else: stresses += stress_list[0] else: stresses += "X" return stresses
def is_iambic(phrase): """ check that we satisfy iambic meter. return 1 if so, otherwise 0. definitely an imperfect check... if we end up needing to check a word that's not in the CMU dictionary, just return 0. """ meter = '' for word in phrase.split(): word = word.strip().strip(string.punctuation).lower() try: phones_list = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones_list[0]) if len(stresses) == 1: if stresses == '1': stresses = '2' # allow ambiguity for 1-syllable words with stress 1 meter += stresses # just default to the first pronunciation if > 1 given except: return 0 # word not found meter = [int(x) for x in meter] even_stresses_full = [meter[i] for i in range(0, len(meter), 2)] odd_stresses_full = [meter[i] for i in range(1, len(meter), 2)] even_stresses = set(even_stresses_full) odd_stresses = set(odd_stresses_full) if 0 in odd_stresses: return 0 if 1 in even_stresses: return 0 return 1
def rhyme_same_stress(word): timeout_timer = 0 # print('in the stress loop') while (True): phones = pronouncing.phones_for_word(word) phone = random.choice(phones) word_stress = pronouncing.stresses(phone) rhyme = rhyme_type_random(word) phones = pronouncing.phones_for_word(rhyme) for phone in phones: rhyme_stress = pronouncing.stresses(phone) if word_stress == rhyme_stress: return rhyme print(timeout_timer) if timeout_timer == 10: return rhyme timeout_timer += 1
def is_final_syllable_stressed(word): phones_list = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones_list[0]) if (stresses[-1] == '1'): return True return False
def is_iambic(self, new_sent): sent_pat = "" for word in new_sent.split(): pronunciations = pronouncing.phones_for_word(word) pat = pronouncing.stresses(pronunciations[0]) sent_pat += pat if sent_pat == self.iamb_pat: return True else: return False
def get_stresses_oneword(word): phones_list = pronouncing.phones_for_word(word) if len(phones_list) > 0: phones = phones_list[0] stresses = pronouncing.stresses(phones) if len(stresses) == 0 or len(stresses) == 1: stresses = "3" else: num_syllables = dirtysyllables(word) stresses = '3' * num_syllables return stresses
def getWordStresses(word: str): word = numbersToWords(word) if " " in word: return word.split() try: phones = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones[0]) except IndexError: # Hacky way of discarding candidate title return "1111111111" return stresses
def stress(sentence): """ Returns stress pattern for a sentence :param sentence: list(str) :return: list(list(str)) """ stresses = [] for word in sentence: phones = pronouncing.phones_for_word(word) homonyms = [pronouncing.stresses(p) for p in phones] homonyms_int = [int(p) for p in homonyms] if len(homonyms_int) == 0: homonyms = ['0'] homonyms_int = [0] word_stress = homonyms[homonyms_int.index(min(homonyms_int))] stresses.append(word_stress) if stresses[0][0] == str(0): return None words = sentence[:] phrases = [] current_phrase = [] for word in sentence: # if re.search("[a-zA-Z0-9]", word) is None: # continue current_phrase.append(word) if len("".join(stresses[:len(current_phrase)])) >= MIN_LENGTH: next_stresses = "".join(stresses[len(current_phrase):]) if len(next_stresses) > 1 and next_stresses[0] == str(1) and \ re.search("[a-zA-Z]", words[len(current_phrase)]): phrases.append(current_phrase) words = words[len(current_phrase):] stresses = stresses[len(current_phrase):] current_phrase = [] if len(current_phrase) > 0: phrases.append(current_phrase) return([" ".join(p) for p in phrases])
def stress_pattern(): phones_list = pronouncing.phones_for_word("snappiest") meter = pronouncing.stresses(phones_list[0]) print meter # 102 # 1 : primary stress, 2: secondary stress , 0: unstressed ## search by stress pattern stress_first = pronouncing.search_stresses('100100') stress_either = pronouncing.search_stresses( '^00[12]00[12]$') ## either 1 or 2 in the [] print stress_first print stress_either
def count_syllables(words): syllables = 0 for word in words.split(): word = word.strip().strip(string.punctuation) try: phones_list = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones_list[0]) syllables += min(MAX_SYLLABLES_PER_WORD, len(stresses)) except: # if we don't know, just do a quick approximation here; it shouldn't come up too often syllables += min(MAX_SYLLABLES_PER_WORD, round(len(word) / 3)) return syllables
def convert_to_word(token: str) -> Word: pronunciations = p.phones_for_word(token) stress_patterns = [ p.stresses(pronunciation).replace("2", "1") for pronunciation in pronunciations ] if stress_patterns: # pick one arbitrarily return Word(token, stress_patterns[0]) else: number_syllables_guess = len(re.findall(r"[aeiou]+", token)) return Word(token, "?" * number_syllables_guess)
def generate_joo_joo_eyeball(syllable_count): text = [0] while(text[0] != syllable_count): text = random.choice(joo_joo_eyeball) text = text[1] # Discard count, we don't need it result = [] for word in text.split(): pronunciations = pronouncing.phones_for_word(word) pat = pronouncing.stresses(pronunciations[0]) replacement = random.choice(pronouncing.search_stresses("^"+pat+"$")) result.append(replacement) return ' '.join(result)
def calculate_scores(poem_lines): ### keep only two-lines poem if len(poem_lines) != 2: raise ValueError("can only score 2-line poems/couplets") ### calculate informations needed for scoring num_words = 0 stress_strings = [] all_poem_words = [] last_words = [] for pl in poem_lines: try: pwords = pl.split() except AttributeError: pwords = pl num_words += len(pwords) last_words.append(pwords[-1]) all_poem_words.extend(pwords) stress_string = "" for pword in pwords: try: stress_string += pronouncing.stresses( pronouncing.phones_for_word(pword)[0] ) except: pass stress_strings.append(stress_string) ### rhyme score rhyme_score_ = rhyme_score( last_words[0], last_words[1], penalize_short_word=False ) ### stress score stress_string_score = difflib.SequenceMatcher( None, stress_strings[0], stress_strings[1] ).ratio() ### combined score ret = ( CoupletScorer.rhyme_weight * rhyme_score_ + CoupletScorer.stress_weight * stress_string_score ) return [ ret, rhyme_score_, stress_string_score, ]
def unstressed(word, syll, cache={}): if word == '*': return True try: return cache[word, syll] except KeyError: pass stress = pronouncing.stresses(prons[word.lower()]) if '0' not in stress: stress = re.sub('2', '0', stress) result = len(stress) == 1 or stress[syll] == '0' cache[word, syll] = result return result
def cut_into_iamb(self, new_sent): count = 0 cut_sent = "" sents = [] for word in new_sent.split(): pronunciations = pronouncing.phones_for_word(word) pat = pronouncing.stresses(pronunciations[0]) cut_sent += word + " " count += len(pat) if count >= 10: sents.append(cut_sent) cut_sent = "" count = 0 return sents
def getWordStresses(word: str): word = numbersToWords(word) if " " in word: return word.split() for override, stresses in PRONUNCIATION_OVERRIDES: if word.lower() == override.lower(): return stresses phones = pronouncing.phones_for_word(word) if not phones: # Hacky way of discarding candidate title return "1111111111" stresses = pronouncing.stresses(phones[0]) return stresses
def getWordStresses(word: str): word = numbersToWords(word) if " " in word: return word.split() for override, stresses in PRONUNCIATION_OVERRIDES: if word == override: return stresses try: phones = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones[0]) except IndexError: # Hacky way of discarding candidate title return "1111111111" return stresses
def summon_seas(): # Find words with mattching stress patterns caspian_phones = pr.phones_for_word("caspian") caspian_stresses = pr.stresses(caspian_phones[0]) words_with_stress_pattern = pr.search_stresses(caspian_stresses) # Find words with correct syllable count three_syllable_words = [] for word in words_with_stress_pattern: word_phones = pr.phones_for_word(word) syllable_count = pr.syllable_count(word_phones[0]) if syllable_count == 3: sea = string.capwords((word + " sea")) three_syllable_words.append(sea) return three_syllable_words
def stresses(self): """ Return a string of the stresses for the given word. Consumers of this string make the following assumptions: - syllables with a "1" should be stressed by the meter - syllables with a "2" can be stressed or unstressed by the meter - syllables with a "0" should be unstressed by the meter """ word_stresses = stresses(self.phones) # Poets often signal syllables that would normally be silent this way. if "è" in self.word: word_stresses += "2" # Words of one syllable can usually be pronounced either way. if word_stresses in ("1", "0"): word_stresses = "2" return word_stresses
def stresses_for_word_sequence(word_sequence): """Gets the CMUdict stress sequence for a given word sequence. Args: word_sequence (list): A list of words. Returns: string: A stress sequence where 0 is zero stress, 1 is primary stress, and 2 is secondary stress. """ stress_sequence = [] for word in word_sequence: result = pronouncing.phones_for_word(word) if result: stress_sequence.append(pronouncing.stresses(result[0])) else: return '' return ''.join(stress_sequence)
def word_matches_stress(word: str, stress_pattern_match: str) -> bool: ''' eg: stress_pattern_match = "010" ''' pronunciations = p.phones_for_word( word ) # word can have more than 1 pronunciation. eg: lead of a pencil, someone lead someone for pronunciation in pronunciations: original_stress_pattern = p.stresses(pronunciation) # we consider both 1 and 2 as a stressed syllable # our generated pattern match is only ever 1s and 0s stress_pattern = original_stress_pattern.replace("2", "1") # in case 1 pronunciation matches but the other one doesn't if stress_pattern == stress_pattern_match: return True return False
def find_words(): # Find words with mattching stress patterns beluga_phones = pr.phones_for_word("beluga") last_beluga_phone = pr.phones_for_word("beluga")[0].split(" ")[-1] beluga_stresses = pr.stresses(beluga_phones[0]) words_with_stress_pattern = pr.search_stresses(beluga_stresses) # Find words with matching end phone words_ending_with_ah = pr.search(last_beluga_phone + "$") # Find words with correct syllable count words_with_stress_and_ending = list( set(words_with_stress_pattern).intersection(words_ending_with_ah)) three_syllable_words = [] for word in words_with_stress_and_ending: word_phones = pr.phones_for_word(word) syllable_count = pr.syllable_count(word_phones[0]) if syllable_count == 3: three_syllable_words.append(word) return three_syllable_words
def get_places(): keepers = [] place_lists = [ [place["city"] for place in pycorpora.geography.us_cities['cities']], [place["city"] for place in pycorpora.geography.norwegian_cities['cities']], pycorpora.geography.english_towns_cities['towns'], pycorpora.geography.english_towns_cities['cities'], [river["name"] for river in pycorpora.geography.rivers["rivers"]], pycorpora.geography.countries['countries'], [place["name"] for place in pycorpora.geography.canadian_municipalities["municipalities"]], [place['name'] for place in pycorpora.geography.london_underground_stations['stations']] ] big_places_list = [] for pl in place_lists: big_places_list += pl for place in big_places_list: pronunciation_list = pronouncing.phones_for_word(place) if (len(pronunciation_list) > 0): syllable_count = pronouncing.syllable_count(pronunciation_list[0]) stresses = pronouncing.stresses(pronunciation_list[0]) if (syllable_count == 3 and stresses[1] == '1'): keepers.append(place) elif (syllable_count == 2 and stresses[0] == '1'): keepers.append(place) return keepers
def get_word_stresses(word: str) -> str: """ Using the pronouncing library, get the stress pattern of a single word. Numbers will be changed into words, e.g. 10 -> ten, and then the stress of that checked. If a number is 4 digits, it will be treated as a year, e.g. 1918 -> "nineteen eighteen". Parameters: word (str): The word to check. Returns: A string of 0s, 1s, or 2s, representing the stress pattern of any given word. If a word isn't recognized, or for any other reason there's an error, the string will contain an A for easy checking. """ # If the word is "500", numbers_to_words changes that to "five hundred", two # seperate words. Return a list of every word, so get_title_stresses() can # go over them again. word = numbers_to_words(word) if " " in word: return word.split() # We want to forceably change the stress for certain words (found in # constants.py). for override, stresses in PRONUNCIATION_OVERRIDES: if word.lower() == override.lower(): return stresses phones = pronouncing.phones_for_word(word) if not phones: return "A" stresses = pronouncing.stresses(phones[0]) return stresses
def stress_pattern(phones): return pronouncing.stresses(''.join(p for p in phones))
def test_stresses(self): stresses = pronouncing.stresses('P ER0 M IH1 T') self.assertEqual('01', stresses) stresses = pronouncing.stresses('P ER1 M IH2 T') self.assertEqual('12', stresses)
def rhyme_degree(target_word, test_word): """Returns a number between 0 and 1 as the degree of rhyming between two words, with 1 being an exact rhyme and 0 being no similarity at all.""" if test_word in pnc.rhymes(target_word): print('\rFound rhyme pair from the pronouncing library:') print(target_word, 'and', test_word) return 1 # extract word part from last stressed syllable excluding that syll's onset rhymes = {target_word: None, test_word: None} for word in rhymes: try: # get pronounciation for word pron = pnc.phones_for_word(word)[0] except IndexError: # in case one of the words is not in the dictionary return 0 # get stress pattern and find last stressed syllables stress = pnc.stresses(pron) last_stress = max([stress.rfind('1'), stress.rfind('2')]) try: sylls = ARPA.syllabifyARPA(pron, return_list=True) except ValueError: # in case the word cannot be syllabified return 0 sylls = sylls[last_stress:] first_onset = re.split(ARPA.VOWELS_REGEX, sylls[0])[0] sylls[0] = sylls[0].replace(first_onset, '', 1) rhymes[word] = sylls # test for matching vowels and consonant clusters in onset and coda # the stressed vowel weighs double phones = 1 + max([ sum(len(syll.split()) for syll in rhyme) for rhyme in rhymes.values() ]) matches = 0 for target_syll, test_syll in zip(rhymes[target_word], rhymes[test_word]): target_vowel = [ phone for phone in target_syll.split() if re.match(ARPA.VOWELS_REGEX, phone) ][0] test_vowel = [ phone for phone in test_syll.split() if re.match(ARPA.VOWELS_REGEX, phone) ][0] target_clusters = target_syll.split(target_vowel) test_clusters = test_syll.split(test_vowel) # measure match of syllable onsets matches += len( set(target_clusters[0].strip().split()).intersection( set(test_clusters[0].strip().split()))) # measure match of vowels if target_vowel[:2] == test_vowel[:2]: # test for the vowel itself matches += 1 # test for similar stress if (target_vowel[-1] in ['1', '2'] and target_vowel[-1] == test_vowel[-1]): matches += 1 # measure match of syllable codas matches += len( set(target_clusters[1].strip().split()).intersection( set(test_clusters[1].strip().split()))) degree = matches / phones if degree > 0.7: print('\rFound rhyme pair with a rhyming degree of: ', degree) print(rhymes) return degree
import enchant import pronouncing dictionary = enchant.request_dict("en_US") print dictionary.suggest("untrimm'd") print dictionary.suggest("don't") phones = pronouncing.phones_for_word("dont") print phones if phones: first_phone = phones[0] stresses = pronouncing.stresses(first_phone) print stresses
def test_stresses(self): stresses = pronouncing.stresses("P ER0 M IH1 T") self.assertEqual("01", stresses) stresses = pronouncing.stresses("P ER1 M IH2 T") self.assertEqual("12", stresses)
import enchant import pronouncing dictionary = enchant.request_dict("en_US") print(dictionary.suggest("untrimm'd")) print(dictionary.suggest("don't")) phones = pronouncing.phones_for_word("dont") print(phones) if phones: first_phone = phones[0] stresses = pronouncing.stresses(first_phone) print(stresses)