def test_rhyming_part(self): part = pronouncing.rhyming_part("S L IY1 P ER0") self.assertEqual(part, "IY1 P ER0") part = pronouncing.rhyming_part("S L IY1 P AH0 L IY0") self.assertEqual(part, "IY1 P AH0 L IY0") part = pronouncing.rhyming_part("M ER0 M AE0 N S K") self.assertEqual(part, "M ER0 M AE0 N S K")
def pick_rhyme_for_word(self, word, forbidden_pronunciations=[], line=True): pronunciations = pronouncing.phones_for_word(word) for forbidden in forbidden_pronunciations: if forbidden in pronunciations: # what the f**k... how can this happen? pronunciations.remove(forbidden) else: print("WHAAAT?? word: {} pronunciations: {} forbidden: {}".format(word, pronunciations, forbidden)) if len(pronunciations) > 0: # choose a pronunciation of the word at random and extract the rhyme phonemes chosen_pronunciation = random.choice(pronunciations) rhyming_part = pronouncing.rhyming_part(chosen_pronunciation) # consider the other words which rhyme with these phonemes various_rhymes = self.by_rhyming_part[rhyming_part] rhyme_words = list(various_rhymes.keys()) if len(rhyme_words) > 1 and word in rhyme_words: # sometimes it doesn't show up, if it's the only line with that ending. rhyme_words.remove(word) # don't rhyme it with itself if len(rhyme_words) == 0: # sadness. try another pronunciation return self.pick_rhyme_for_word(word, forbidden_pronunciations + [chosen_pronunciation], line=line) rhyme_word = random.choice(rhyme_words) if line: # return a whole line return random.choice(various_rhymes[rhyme_word]) # just return a word return rhyme_word # if we don't have any pronunciations... just return the word return word
def __init__(self, seed_word): self.all_lines = generate_poetry_corpus_lines() self.by_rhyming_part = self.generate_rhyming_part_defaultdict() # Set up ability to seed by word, TODO neaten self.seed_word = seed_word phones = pronouncing.phones_for_word(self.seed_word)[0] self.rhyming_part_for_word = pronouncing.rhyming_part(phones)
def rhyme(word, phones=None): """ Returns a list of rhymes for a word. The conditions for this 'normal' rhyme between words are: (1) last stressed vowel and subsequent phonemes match If phones argument not given, phones/pronunciation used will default to the first in the list of phones returned for word. If no rhyme is found, an empty list is returned. This is the 'default' rhyme, same definition used by the pronoucning module for its 'rhymes' function. This is also like the shared set of perfect and identical rhymes, except the identical word will be removed from the returned rhymes list. :param word: a word :param phones: specific CMUdict phonemes string for word (default None) :return: a rhyme for word """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError(phones + " not phones for " + word) if not phones: raise ValueError("phonemes string is empty") return [ w for w in pronouncing.rhyme_lookup.get( pronouncing.rhyming_part(phones), []) if (w != word) ]
def extract_rhyming_lines(cache): cleaned = [] phoneme_dict = pronouncing.cmudict.dict() # Remove all comments not within the range and ensure that last word has a phonetic equivalent for comment in cache: if 20 < len(comment) < 100: lastword_search = re.search(r"([a-zA-Z]+)[^a-zA-Z]*$", comment) if lastword_search: word = lastword_search.groups()[0].lower() phonemes = phoneme_dict[word] if phonemes and not re.search(r"(?:ing|ed|ion)$", word) and not re.search( r"\d[^a-zA-Z]*$", comment): cleaned.append((comment, word)) # Create a dictionary of word subsets that rhyme. Ex: {-ight:{might, right, tight} # Note this is not an exact representation. It is a simplified explanation rhyme_subsets = {} for comment_pair in cleaned: word = comment_pair[1] rhyme_subset = pronouncing.rhyming_part( pronouncing.phones_for_word(word)[0]) if rhyme_subset not in rhyme_subsets: rhyme_subsets[rhyme_subset] = [comment_pair] elif all(word != pairs[1] for pairs in rhyme_subsets[rhyme_subset] ): # Check to ensure that words don't repeat rhyme_subsets[rhyme_subset].append(comment_pair) return rhyme_subsets
def get_rhyming_groups(group_size, number_groups, pool): """Returns a list of rhyming groups of the given size from the given candidate pool. Args: group_size (int): number of lines in the rhyming group. number_groups (int): number of rhyming groups. pool (list) : candidate pool from which to draw lines. Raises: InsufficientSentencesError: if the candidate pool is not rich enough. """ clusters = defaultdict(list) while len(list(filter(lambda c: len(c) >= group_size, clusters.values()))) < number_groups: try: sentence = pool.pop() except KeyError: raise InsufficientSentencesError( 'Candidate pool is not rich enough!') last_word = sentence.split(" ")[-1] last_word_phones = pronouncing.phones_for_word(last_word)[0] rhyming_part = pronouncing.rhyming_part(last_word_phones) if last_word not in [s.split(" ")[-1] for s in clusters[rhyming_part]]: clusters[rhyming_part].append(sentence) groups = list(filter(lambda c: len(c) >= group_size, clusters.values())) random.shuffle(groups) return [random.sample(group, group_size) for group in groups]
def get_rhymes(word): pronunciations = pronouncing.phones_for_word(word) rhymes = set() for pronunciation in pronunciations: rhyming_part = pronouncing.rhyming_part(pronunciation) curr_rhymes = pronouncing.search(rhyming_part + "$") rhymes.update(curr_rhymes) return rhymes
def __init__(self, seed_word, min_line_len=32, max_line_len=48): max_line_choices = [48, 65, 80, 120] self.all_lines = generate_poetry_corpus_lines() self.by_rhyming_part = self.generate_rhyming_part_defaultdict(min_line_len,random.choice(max_line_choices)) # Set up ability to seed by word, TODO neaten self.seed_word = seed_word.lower() phones = pronouncing.phones_for_word(self.seed_word)[0] self.rhyming_part_for_word = pronouncing.rhyming_part(phones)
def rhyme_scheme(self): self.poem.rhyme_count = 0 rhyme_parts = [] rhyme_scheme = [''] * len(self.poem.verses) rhyme_ordinal = 0 for i, verse in enumerate(self.poem.verses): if verse == "": rhyme_parts.append("") rhyme_scheme[i] = " " continue phones = pr.phones_for_word(verse[-1]) if len(phones) == 0: rhyme_parts.append("") continue rhyming_parts = [ *map(lambda phone: pr.rhyming_part(phone), phones) ] rhyme_parts.append(rhyming_parts) # god of complexity forgive me for i, i_part_variants in enumerate(rhyme_parts): for i_part in i_part_variants: for j, j_part_variants in enumerate(rhyme_parts[i + 1:]): j = j + i + 1 for j_part in j_part_variants: if i_part == j_part: # the rhyming part maches, now let's check if it's the same word i_word = self.poem.verses[i][-1] j_word = self.poem.verses[j][-1] if len(rhyme_scheme[i]) != 0: letter = rhyme_scheme[i] else: rhyme_ordinal += 1 letter = self.cardinal_n_to_ordinal_letter( rhyme_ordinal) rhyme_scheme[i] = letter rhyme_scheme[j] = letter if i_word == j_word: continue self.poem.rhyme_count += 1 # fills empty cells in the rhyme scheme for i, letter in enumerate(rhyme_scheme): if letter == '': rhyme_ordinal += 1 rhyme_scheme[i] = self.cardinal_n_to_ordinal_letter( rhyme_ordinal).lower() self.poem.rhyme_scheme = rhyme_scheme
def rhyming_part(self): """Return the rhyming part of the original word.""" phones = self.phones if phones == '': return None result = rhyming_part(phones) for stress in "012": result = result.replace(stress, "") return result
def rhyme(word, cache={}): try: return cache[word] except KeyError: pass p = prons[word.lower()] r = pronouncing.rhyming_part(p) if r.endswith('M'): r = r[:-1] + 'N' cache[word] = r return r
def lines_by_rhyme(line_phones_pairs): rhyme_dict = {} for line, line_phones in line_phones_pairs: rhyming_part = pronouncing.rhyming_part(line_phones) if rhyming_part in rhyme_dict: rhyme_dict[rhyming_part].add(line) else: rhyme_dict[rhyming_part] = set([line]) return rhyme_dict
def get_rhyme(word): phones = pronouncing.phones_for_word(word) phone_parts = pronouncing.rhyming_part(phones[0]) rhymes = pronouncing.search(phone_parts + "$") # ensures rhyming word is not the same as given word rhyme = word.lower() while rhyme == word.lower(): rhyme = random.choice(rhymes) return rhyme
def rhymes( word1, word2 ): #This function has been tranformed and is my own now. Sorry hyperreality. """ For each word, get a list of various syllabic pronunications. Then check whether the last level number of syllables is pronounced the same. If so, the words probably rhyme """ pronunciations = [ pronouncing.rhyming_part(pronunciation) for pronunciation in pronouncing.phones_for_word(word1) ] pronunciations2 = [ pronouncing.rhyming_part(pronunciation) for pronunciation in pronouncing.phones_for_word(word2) ] if not (pronunciations and pronunciations2): print("no pronunciation for at least one of", word1, word2) return False # Work around some limitations of CMU equivalents = {"ER0": "R"} def replace_syllables(syllables): return [ equivalents[syl] if syl in equivalents else syl for syl in syllables ] for syllables in pronunciations: syllables = replace_syllables(syllables) for syllables2 in pronunciations2: syllables2 = replace_syllables(syllables2) if syllables == syllables2: print(word1, "rhymes with", word2) return True print(word1, "does not rhyme with", word2) return False
def get_rhyme_dict(pruned_df): # this data structure is ripped almost exactly from https://github.com/aparrish/pronouncingpy by_rhyming_part = defaultdict(lambda: defaultdict(list)) for i, line in enumerate(pruned_df.text): if (i % 500000 == 0): print(thinking_messages()) match = re.search(r'(\b\w+\b)\W*$', line) if match: last_word = match.group() pronunciations = pronouncing.phones_for_word(last_word) if len(pronunciations) > 0: rhyming_part = pronouncing.rhyming_part(pronunciations[0]) # group by rhyming phones (for rhymes) and words (to avoid duplicate words) by_rhyming_part[rhyming_part][last_word.lower()].append(line) return by_rhyming_part
def _determine_rhyme_from_line(line): """ Return the rhyming part of a string. If no rhyming part is detected, return None. """ end_word = line.split()[-1] end_phone = pronouncing.phones_for_word(end_word) if end_phone != []: end_rhyme = pronouncing.rhyming_part(end_phone[0]) else: end_rhyme = None return end_rhyme
def rhymes_all(word): """ The original function prnouncing.rhymes only looks at the first (primary?) phonetical pronounciation to find rhyme words. This make for example 'live' only rhyme with 'five' and not with 'give'. This function loops over all pronounciations and finds all rhyme words, so it makes 'live' rhyme with both 'five' and 'give'. """ phones = pronouncing.phones_for_word(word) if len(phones) > 0: return [ w for phone in phones for w in pronouncing.rhyme_lookup.get( pronouncing.rhyming_part(phone), []) if w != word ] else: return []
def build_rhyming_dict(self): by_rhyming_part = defaultdict(lambda: defaultdict(list)) final_word = re.compile(r'(\b\w+\b)\W*$') for line in self.all_lines: text = line['s'] if not(32 < len(text) < 48): # only use lines of uniform lengths continue match = final_word.search(text) if match: last_word = match.group() pronunciations = pronouncing.phones_for_word(last_word) for pronunciation in pronunciations: rhyming_part = pronouncing.rhyming_part(pronunciation) # group by rhyming phones (for rhymes) and words (to avoid duplicate words) by_rhyming_part[rhyming_part][last_word.lower()].append(text) return by_rhyming_part
def get_fortune(word, fortunes, actions): phones = pronouncing.phones_for_word(word) phone_parts = pronouncing.rhyming_part(phones[0]) rhymes = pronouncing.search(phone_parts + "$") for fortune in fortunes: last_word = fortune.split(" ")[-1] if last_word in rhymes: return fortune, "fortune" for action in actions: last_word = action.split(" ")[-1] if last_word in rhymes: return action, "action" return "Sorry!", "error"
def random_general_rhyme(word, phones=None, search_option="end"): """ Return a list of rhymes where a random combination of phonemes match The conditions for a general rhyme between words are: (1) Any possible phonetic similarity between the final stressed vowel and subsequent phonemes. If phones argument not given, phones/pronunciation used will default to the first in the list of phones returned for word. If no rhyme is found, an empty list is returned. :param word: a word :param phones: specific CMUdict phonemes string for word (default None) :param search_option option for regex search. (default "end") :return: a list of rhymes for word, where specific rhyme is random """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError(phones + " not phones for +" + word) if not phones: raise ValueError("phonemes string is empty") rp = pronouncing.rhyming_part(phones) search_combos = wildcard_mix_phones_regex_searches(rp) while search_combos: search = random.choice(search_combos) if search_option == "end": rhymes = pronouncing.search(search + "$") elif search_option == "begin": rhymes = pronouncing.search("^" + search) elif search_option == "whole": rhymes = pronouncing.search("^" + search + "$") else: raise ValueError( "search_option should be 'end', 'begin', or 'whole'") if rhymes: rhymes = unique(rhymes) if word in rhymes: rhymes.remove(word) return rhymes else: search_combos.remove(search) print("random general rhyme: tried all combos, didn't find anything!") return []
def generate_rhyming_part_defaultdict() -> defaultdict: """Returns a default dict structure of keys: Rhyming parts (strs) values: defaultdicts, of words corresponding to that rhyming part (strs) : lists of lines that end with those words (lists of strs)""" all_lines = generate_poetry_corpus_lines() by_rhyming_part = defaultdict(lambda: defaultdict(list)) for line in all_lines: text = line['s'] if not (32 < len(text) < 48): # only use lines of uniform lengths continue match = re.search(r'(\b\w+\b)\W*$', text) if match: last_word = match.group() pronunciations = pronouncing.phones_for_word(last_word) if len(pronunciations) > 0: rhyming_part = pronouncing.rhyming_part(pronunciations[0]) # group by rhyming phones (for rhymes) and words (to avoid duplicate words) by_rhyming_part[rhyming_part][last_word.lower()].append(text) return by_rhyming_part
def near_rhyme(word, phones=None, stress=True, consonant_tail=0): """ Returns a list of words that almost rhyme The conditions for a near rhyme between words are: (1) At least one of the phonemes after and including the last stressed syllable match, except for the case where they all do. If phones argument not given, phones/pronunciation used will default to the first in the list of phones returned for word. If no rhyme is found, an empty list is returned. :param word: a word :param phones: specific CMUdict phonemes string for word (default None) :param stress: if vowels will match stress (default True) :param consannt_tail: number of :return: a list of near rhymes for word """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError(phones + " not phones for" + word) if not phones: raise ValueError("phonemes string is empty") rp = pronouncing.rhyming_part(phones) search_combos = wildcard_mix_phones_regex_searches(rp, stress) rhymes = [] for search in search_combos: rhymes += pronouncing.search(search + "( .{1,3}){0," + str(consonant_tail) + "}$") if rhymes: rhymes = unique(rhymes) if word in rhymes: rhymes.remove(word) return rhymes print("random general rhyme: tried all combos, didn't find anything!") return []
def generate_rhyming_part_defaultdict(self, min_len, max_len) -> defaultdict: """Returns a default dict structure of keys: Rhyming parts (strs) values: defaultdicts, of words corresponding to that rhyming part (strs) : lists of lines that end with those words (lists of strs) Code borrowed directly from Allison Parrish's examples.""" by_rhyming_part = defaultdict(lambda: defaultdict(list)) for line in self.all_lines: text = line['s'] # Uniform lengths original: if not(32 < len(text) < 48) if not(min_len < len(text) < max_len): # only use lines of uniform lengths continue match = re.search(r'(\b\w+\b)\W*$', text) if match: last_word = match.group() pronunciations = pronouncing.phones_for_word(last_word) if len(pronunciations) > 0: rhyming_part = pronouncing.rhyming_part(pronunciations[0]) # group by rhyming phones (for rhymes) and words (to avoid duplicate words) by_rhyming_part[rhyming_part][last_word.lower()].append(text) return by_rhyming_part
def write_poem(): df = load_data() print("Here I go! \n") length, line_breaks, rhyme_scheme = define_structure() # pick the first line and get the last word of that first line index, first_line, last_word = pick_first_line(df) # prune the dataframe so that we restrict the number of syllables and the meter pruned_df = df[df.meter == df.meter[index]] pruned_df = df[(df.syllables > df.syllables[index] - 3) & (df.syllables < df.syllables[index] + 2)] # get the rhyme_dict for the pruned df so we can rhyme lines rhyme_dict = get_rhyme_dict(pruned_df) # Frankenbot's done print("\n VOILA!! \n") print("*********************************************************") print("\n") # print the first line print(first_line) # set break variable False so we don't line break before the first line break_here = False # now make the rest of the poem line = first_line while (length > 0): if break_here and line_breaks > 0: print("\n") line_breaks -= 1 break_here = False # the random number will determine what we do... x = random.randint(1, 6) y = random.randint(1, 6) magic_number = x + y # line break on the next line if (magic_number < 6): break_here = True # if the rhyme scheme is random, print a rhyming line by getting the rhyming part of the last word, # then choosing a random rhyming line from the rhyme_dict # if we roll greater than or equal to 7 all hell breaks loose - no more rhyming if (magic_number >= 8 and rhyme_scheme == "random"): line = random.choice(list(pruned_df.text)) print(line) length -= 1 continue if (rhyme_scheme == "random"): last_word = get_last_word(line) try: p = pronouncing.phones_for_word(last_word) rp = pronouncing.rhyming_part(p[0]) random_key = random.choice(list(rhyme_dict[rp].keys())) new_line = random.choice(rhyme_dict[rp][random_key]) except: new_line = random.choice(list(pruned_df.text)) print(line) line = new_line length -= 1 if (rhyme_scheme == "AABB"): last_word = get_last_word(line) # get line which rhymes with last line try: p = pronouncing.phones_for_word(last_word) rp = pronouncing.rhyming_part(p[0]) random_key = random.choice(list(rhyme_dict[rp].keys())) new_line = random.choice(rhyme_dict[rp][random_key]) except: new_line = random.choice(list(pruned_df.text)) print(new_line) # new couplet starting new_line = random.choice(list(pruned_df.text)) print(new_line) line = new_line length -= 2 if (rhyme_scheme == "ABAB"): word_a = get_last_word(line) try: p = pronouncing.phones_for_word(word_a) rp = pronouncing.rhyming_part(p[0]) random_key = random.choice(list(rhyme_dict[rp].keys())) new_line_a = random.choice(rhyme_dict[rp][random_key]) except: new_line_a = random.choice(list(pruned_df.text)) line_b = random.choice(list(pruned_df.text)) word_b = get_last_word(line_b) try: p = pronouncing.phones_for_word(word_b) rp = pronouncing.rhyming_part(p[0]) random_key = random.choice(list(rhyme_dict[rp].keys())) new_line_b = random.choice(rhyme_dict[rp][random_key]) except: new_line_b = random.choice(list(pruned_df.text)) print(line_b) print(new_line_a) print(new_line_b) line = random.choice(list(pruned_df.text)) length -= 3 print("\n")
def calcRhymeDensity(text, rhymeType='perfect', rhymeLocation='all', lineStartStop=(1, -2), printExamples=False): '''calculates rhyme density (count of rhymes over n-1 words). \n\n _parameters_ text: input text for measurement rhymeType: 'perfect' is a perfect rhyme, 'vowel' is a rhyming in the vowel sound + stress only rhymeLocation: choose to look at 'all' text, 'section' by line numbers, or 'end' (last word in each line) lineStartStop: tuple of (start,stop) line numbers printExamples: if True, print most common values of the selected rhymeType _returns_ rhyme_cnt: count of rhymes of specified rhymeType and rhymeLocation wordCount: count of words of specified rhymeType and rhymeLocation rhymeDensity: rhyme_cnt/float(wordCount-1) ''' # restrict location to (end=last word, internal line = line, all= full text) # count tokens # # initialize rhymePart_cnt = Counter() rhyme_cnt = 0 # prepare data text = prepString(removeMarkupWords(text)) if rhymeLocation == 'all': words = text.split() if rhymeLocation == 'end': lines = text.split("\n") words = [line.split()[-1] for line in lines if len(line.split()) > 0] if rhymeLocation == 'section': lines = text.split("\n") words = [ line.split()[-1] for line in lines[lineStartStop[0]:lineStartStop[1] + 1] if len(line.split()) > 0 ] # wordCount = len(words) #print(words) for word in words: pros = pronouncing.phones_for_word(word) if pros: phonelist = pros[0] #using first pronunciation for now if len(phonelist) > 0: if rhymeType == 'perfect': rhymePart_cnt[pronouncing.rhyming_part(phonelist)] += 1 #if rhymeType == 'rime': # pass #if rhymeType == 'soft': # pass #if rhymeType == 'consonant': # pass elif rhymeType == 'vowel': rhymePart_cnt[pronouncing.rhyming_part(phonelist).split() [0]] += 1 for v in rhymePart_cnt.values(): rhyme_cnt += v - 1 if wordCount > 1: rhymeDensity = rhyme_cnt / float(wordCount - 1) else: rhymeDensity = 0.0 if printExamples == True: print(rhymePart_cnt.most_common(5)) return rhymeDensity, rhyme_cnt, wordCount
def syllrhyme(word): try: p = prons[word.lower()] except KeyError: return 0, "" return pronouncing.syllable_count(p), pronouncing.rhyming_part(p)
def calculate_rhyme_density(tokens, rhymeType='perfect', rhymeLocation='all'): """ Computes rhyme density for a list of tokens Parameters: ----------- rhymeType : str - 'perfect' is a perfect rhyme - 'stressed' is a rhyming in the vowel sound + stress only - 'allVowels' is a rhyming at all vowel syllables rhymeLocation : str choose to look at 'all' text or 'end' (last word in each line) """ assert rhymeType in ['perfect', 'stressed', 'allVowels'], "Unexpected value for rhymeType" assert rhymeLocation in ['all', 'end'], "Unexpected value for rhymeLocation" rhymePart_cnt = Counter() rhyme_cnt = 0 distinct_rhyme_cnt = 0 if rhymeLocation == 'all': tokens = parse_tokens(tokens, lines=False, tags=False, contraction=True) elif rhymeLocation == 'end': tokens = [line[-1] for line in parse_tokens(tokens, lines=True, tags=False, contraction=True)\ if line] # only retrieve first pronunciation from `phones_for_words` # we can enhance here by doing permutations of pronunciations pros = [pronouncing.phones_for_word(token)[0] for token in tokens\ if pronouncing.phones_for_word(token)] for pro in pros: if rhymeType == 'perfect': rhymePart_cnt[pronouncing.rhyming_part(pro)] += 1 elif rhymeType == 'stressed': # look at only stressed syllables # slightly modified logic from JP implementation rhyming_parts = pronouncing.rhyming_part(pro).split() if rhyming_parts: rhyming_parts = [ part for part in rhyming_parts if part[-1] in ['1', '2'] ] if rhyming_parts: rhyming_parts = rhyming_parts[0] else: continue rhymePart_cnt[rhyming_parts] += 1 elif rhymeType == 'allVowels': # look at all vowel parts - new method rhyming_parts = pronouncing.rhyming_part(pro).split() rhyming_parts = [ part for part in rhyming_parts if part[-1].isdigit() ] for rhyme in rhyming_parts: rhymePart_cnt[rhyme] += 1 for v in rhymePart_cnt.values(): rhyme_cnt += v - 1 # denominator - word for 'perfect'; vowel syllables for 'vowel' # denominator = sum(rhymePart_cnt.values())-1 denominator = len(tokens) - 1 if denominator > 0: rhymeDensity = rhyme_cnt / denominator else: rhymeDensity = None # return tokens, pros, rhymePart_cnt, rhyming_parts, rhyme_cnt, rhymeDensity return rhymeDensity