Пример #1
0
 def test_rhyming_part(self):
     part = pronouncing.rhyming_part("S L IY1 P ER0")
     self.assertEqual(part, "IY1 P ER0")
     part = pronouncing.rhyming_part("S L IY1 P AH0 L IY0")
     self.assertEqual(part, "IY1 P AH0 L IY0")
     part = pronouncing.rhyming_part("M ER0 M AE0 N S K")
     self.assertEqual(part, "M ER0 M AE0 N S K")
Пример #2
0
 def test_rhyming_part(self):
     part = pronouncing.rhyming_part("S L IY1 P ER0")
     self.assertEqual(part, "IY1 P ER0")
     part = pronouncing.rhyming_part("S L IY1 P AH0 L IY0")
     self.assertEqual(part, "IY1 P AH0 L IY0")
     part = pronouncing.rhyming_part("M ER0 M AE0 N S K")
     self.assertEqual(part, "M ER0 M AE0 N S K")
Пример #3
0
 def pick_rhyme_for_word(self, word, forbidden_pronunciations=[], line=True):
     pronunciations = pronouncing.phones_for_word(word)
     for forbidden in forbidden_pronunciations:
         if forbidden in pronunciations:
             # what the f**k... how can this happen?
             pronunciations.remove(forbidden)
         else:
             print("WHAAAT?? word: {} pronunciations: {} forbidden: {}".format(word, pronunciations, forbidden))
     if len(pronunciations) > 0:
         # choose a pronunciation of the word at random and extract the rhyme phonemes
         chosen_pronunciation = random.choice(pronunciations)
         rhyming_part = pronouncing.rhyming_part(chosen_pronunciation)
         # consider the other words which rhyme with these phonemes
         various_rhymes = self.by_rhyming_part[rhyming_part]
         rhyme_words = list(various_rhymes.keys())
         if len(rhyme_words) > 1 and word in rhyme_words:
             # sometimes it doesn't show up, if it's the only line with that ending.
             rhyme_words.remove(word) # don't rhyme it with itself
         if len(rhyme_words) == 0:
             # sadness. try another pronunciation
             return self.pick_rhyme_for_word(word,
                                             forbidden_pronunciations + [chosen_pronunciation],
                                             line=line)
         rhyme_word = random.choice(rhyme_words)
         if line:
             # return a whole line
             return random.choice(various_rhymes[rhyme_word])
         # just return a word
         return rhyme_word
     # if we don't have any pronunciations... just return the word
     return word
Пример #4
0
 def __init__(self, seed_word):
     self.all_lines = generate_poetry_corpus_lines()
     self.by_rhyming_part = self.generate_rhyming_part_defaultdict()
     # Set up ability to seed by word, TODO neaten
     self.seed_word = seed_word
     phones = pronouncing.phones_for_word(self.seed_word)[0]
     self.rhyming_part_for_word = pronouncing.rhyming_part(phones)
Пример #5
0
def rhyme(word, phones=None):
    """ Returns a list of rhymes for a word.

    The conditions for this 'normal' rhyme between words are:
    (1) last stressed vowel and subsequent phonemes match
    If phones argument not given, phones/pronunciation used will default to the
    first in the list of phones returned for word. If no rhyme is found, an
    empty list is returned.

    This is the 'default' rhyme, same definition used by the pronoucning
    module for its 'rhymes' function. This is also like the shared set of
    perfect and identical rhymes, except the identical word will be removed
    from the returned rhymes list.


    :param word: a word
    :param phones: specific CMUdict phonemes string for word (default None) 
    :return: a rhyme for word
    """

    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError(phones + " not phones for " + word)
    if not phones:
        raise ValueError("phonemes string is empty")
    return [
        w for w in pronouncing.rhyme_lookup.get(
            pronouncing.rhyming_part(phones), []) if (w != word)
    ]
Пример #6
0
def extract_rhyming_lines(cache):
    cleaned = []
    phoneme_dict = pronouncing.cmudict.dict()
    # Remove all comments not within the range and ensure that last word has a phonetic equivalent
    for comment in cache:
        if 20 < len(comment) < 100:
            lastword_search = re.search(r"([a-zA-Z]+)[^a-zA-Z]*$", comment)
            if lastword_search:
                word = lastword_search.groups()[0].lower()
                phonemes = phoneme_dict[word]
                if phonemes and not re.search(r"(?:ing|ed|ion)$",
                                              word) and not re.search(
                                                  r"\d[^a-zA-Z]*$", comment):
                    cleaned.append((comment, word))

    # Create a dictionary of word subsets that rhyme. Ex: {-ight:{might, right, tight}
    # Note this is not an exact representation. It is a simplified explanation
    rhyme_subsets = {}
    for comment_pair in cleaned:
        word = comment_pair[1]
        rhyme_subset = pronouncing.rhyming_part(
            pronouncing.phones_for_word(word)[0])
        if rhyme_subset not in rhyme_subsets:
            rhyme_subsets[rhyme_subset] = [comment_pair]
        elif all(word != pairs[1] for pairs in rhyme_subsets[rhyme_subset]
                 ):  # Check to ensure that words don't repeat
            rhyme_subsets[rhyme_subset].append(comment_pair)

    return rhyme_subsets
Пример #7
0
def get_rhyming_groups(group_size, number_groups, pool):
    """Returns a list of rhyming groups of the given size from the given candidate pool.

    Args:
        group_size (int): number of lines in the rhyming group.
        number_groups (int): number of rhyming groups.
        pool (list) : candidate pool from which to draw lines.

    Raises:
         InsufficientSentencesError: if the candidate pool is not rich enough.
    """
    clusters = defaultdict(list)
    while len(list(filter(lambda c: len(c) >= group_size,
                          clusters.values()))) < number_groups:
        try:
            sentence = pool.pop()
        except KeyError:
            raise InsufficientSentencesError(
                'Candidate pool is not rich enough!')
        last_word = sentence.split(" ")[-1]
        last_word_phones = pronouncing.phones_for_word(last_word)[0]
        rhyming_part = pronouncing.rhyming_part(last_word_phones)
        if last_word not in [s.split(" ")[-1] for s in clusters[rhyming_part]]:
            clusters[rhyming_part].append(sentence)
    groups = list(filter(lambda c: len(c) >= group_size, clusters.values()))
    random.shuffle(groups)
    return [random.sample(group, group_size) for group in groups]
Пример #8
0
def get_rhymes(word):
    pronunciations = pronouncing.phones_for_word(word)
    rhymes = set()
    for pronunciation in pronunciations:
        rhyming_part = pronouncing.rhyming_part(pronunciation)
        curr_rhymes = pronouncing.search(rhyming_part + "$")
        rhymes.update(curr_rhymes)
    return rhymes
Пример #9
0
 def __init__(self, seed_word, min_line_len=32, max_line_len=48):
     max_line_choices = [48, 65, 80, 120]
     self.all_lines = generate_poetry_corpus_lines()
     self.by_rhyming_part = self.generate_rhyming_part_defaultdict(min_line_len,random.choice(max_line_choices))
     # Set up ability to seed by word, TODO neaten
     self.seed_word = seed_word.lower()
     phones = pronouncing.phones_for_word(self.seed_word)[0]
     self.rhyming_part_for_word = pronouncing.rhyming_part(phones)
Пример #10
0
    def rhyme_scheme(self):
        self.poem.rhyme_count = 0
        rhyme_parts = []
        rhyme_scheme = [''] * len(self.poem.verses)
        rhyme_ordinal = 0

        for i, verse in enumerate(self.poem.verses):
            if verse == "":
                rhyme_parts.append("")
                rhyme_scheme[i] = " "
                continue

            phones = pr.phones_for_word(verse[-1])

            if len(phones) == 0:
                rhyme_parts.append("")
                continue

            rhyming_parts = [
                *map(lambda phone: pr.rhyming_part(phone), phones)
            ]

            rhyme_parts.append(rhyming_parts)

        # god of complexity forgive me
        for i, i_part_variants in enumerate(rhyme_parts):
            for i_part in i_part_variants:
                for j, j_part_variants in enumerate(rhyme_parts[i + 1:]):
                    j = j + i + 1
                    for j_part in j_part_variants:
                        if i_part == j_part:
                            # the rhyming part maches, now let's check if it's the same word
                            i_word = self.poem.verses[i][-1]
                            j_word = self.poem.verses[j][-1]

                            if len(rhyme_scheme[i]) != 0:
                                letter = rhyme_scheme[i]
                            else:
                                rhyme_ordinal += 1
                                letter = self.cardinal_n_to_ordinal_letter(
                                    rhyme_ordinal)
                            rhyme_scheme[i] = letter
                            rhyme_scheme[j] = letter

                            if i_word == j_word:
                                continue

                            self.poem.rhyme_count += 1

        # fills empty cells in the rhyme scheme
        for i, letter in enumerate(rhyme_scheme):
            if letter == '':
                rhyme_ordinal += 1
                rhyme_scheme[i] = self.cardinal_n_to_ordinal_letter(
                    rhyme_ordinal).lower()

            self.poem.rhyme_scheme = rhyme_scheme
Пример #11
0
 def rhyming_part(self):
     """Return the rhyming part of the original word."""
     phones = self.phones
     if phones == '':
         return None
     result = rhyming_part(phones)
     for stress in "012":
         result = result.replace(stress, "")
     return result
Пример #12
0
def rhyme(word, cache={}):
    try:
        return cache[word]
    except KeyError:
        pass
    p = prons[word.lower()]
    r = pronouncing.rhyming_part(p)
    if r.endswith('M'):
        r = r[:-1] + 'N'
    cache[word] = r
    return r
Пример #13
0
def lines_by_rhyme(line_phones_pairs):
    rhyme_dict = {}
    for line, line_phones in line_phones_pairs:
        rhyming_part = pronouncing.rhyming_part(line_phones)

        if rhyming_part in rhyme_dict:
            rhyme_dict[rhyming_part].add(line)
        else:
            rhyme_dict[rhyming_part] = set([line])

    return rhyme_dict
Пример #14
0
def get_rhyme(word):
    phones = pronouncing.phones_for_word(word)
    phone_parts = pronouncing.rhyming_part(phones[0])
    rhymes = pronouncing.search(phone_parts + "$")

    # ensures rhyming word is not the same as given word
    rhyme = word.lower()
    while rhyme == word.lower():
        rhyme = random.choice(rhymes)

    return rhyme
Пример #15
0
def rhymes(
    word1, word2
):  #This function has been tranformed and is my own now. Sorry hyperreality.
    """
	For each word, get a list of various syllabic pronunications. Then check whether the last level number of syllables is pronounced the same. If so, the words probably rhyme
	"""

    pronunciations = [
        pronouncing.rhyming_part(pronunciation)
        for pronunciation in pronouncing.phones_for_word(word1)
    ]
    pronunciations2 = [
        pronouncing.rhyming_part(pronunciation)
        for pronunciation in pronouncing.phones_for_word(word2)
    ]
    if not (pronunciations and pronunciations2):
        print("no pronunciation for at least one of", word1, word2)
        return False

    # Work around some limitations of CMU
    equivalents = {"ER0": "R"}

    def replace_syllables(syllables):
        return [
            equivalents[syl] if syl in equivalents else syl
            for syl in syllables
        ]

    for syllables in pronunciations:
        syllables = replace_syllables(syllables)

        for syllables2 in pronunciations2:
            syllables2 = replace_syllables(syllables2)
            if syllables == syllables2:
                print(word1, "rhymes with", word2)
                return True
    print(word1, "does not rhyme with", word2)
    return False
Пример #16
0
def get_rhyme_dict(pruned_df):
    # this data structure is ripped almost exactly from https://github.com/aparrish/pronouncingpy
    by_rhyming_part = defaultdict(lambda: defaultdict(list))
    for i, line in enumerate(pruned_df.text):
        if (i % 500000 == 0):
            print(thinking_messages())
        match = re.search(r'(\b\w+\b)\W*$', line)
        if match:
            last_word = match.group()
            pronunciations = pronouncing.phones_for_word(last_word)
            if len(pronunciations) > 0:
                rhyming_part = pronouncing.rhyming_part(pronunciations[0])
                # group by rhyming phones (for rhymes) and words (to avoid duplicate words)
                by_rhyming_part[rhyming_part][last_word.lower()].append(line)
    return by_rhyming_part
Пример #17
0
def _determine_rhyme_from_line(line):
    """
    Return the rhyming part of a string.

    If no rhyming part is detected, return None.
    """
    end_word = line.split()[-1]
    end_phone = pronouncing.phones_for_word(end_word)

    if end_phone != []:
        end_rhyme = pronouncing.rhyming_part(end_phone[0])
    else:
        end_rhyme = None

    return end_rhyme
Пример #18
0
def rhymes_all(word):
    """
    The original function prnouncing.rhymes only looks at the first (primary?) phonetical pronounciation
    to find rhyme words. This make for example 'live' only rhyme with 'five' and not with 'give'.
    This function loops over all pronounciations and finds all rhyme words, so it makes 'live' rhyme with
    both 'five' and 'give'.
    """
    phones = pronouncing.phones_for_word(word)
    if len(phones) > 0:
        return [
            w for phone in phones for w in pronouncing.rhyme_lookup.get(
                pronouncing.rhyming_part(phone), []) if w != word
        ]
    else:
        return []
Пример #19
0
 def build_rhyming_dict(self):
     by_rhyming_part = defaultdict(lambda: defaultdict(list))
     final_word = re.compile(r'(\b\w+\b)\W*$')
     for line in self.all_lines:
         text = line['s']
         if not(32 < len(text) < 48): # only use lines of uniform lengths
             continue
         match = final_word.search(text)
         if match:
             last_word = match.group()
             pronunciations = pronouncing.phones_for_word(last_word)
             for pronunciation in pronunciations:
                 rhyming_part = pronouncing.rhyming_part(pronunciation)
                 # group by rhyming phones (for rhymes) and words (to avoid duplicate words)
                 by_rhyming_part[rhyming_part][last_word.lower()].append(text)
     return by_rhyming_part
Пример #20
0
def get_fortune(word, fortunes, actions):
    phones = pronouncing.phones_for_word(word)
    phone_parts = pronouncing.rhyming_part(phones[0])
    rhymes = pronouncing.search(phone_parts + "$")

    for fortune in fortunes:
        last_word = fortune.split(" ")[-1]
        if last_word in rhymes:
            return fortune, "fortune"

    for action in actions:
        last_word = action.split(" ")[-1]
        if last_word in rhymes:
            return action, "action"

    return "Sorry!", "error"
Пример #21
0
def random_general_rhyme(word, phones=None, search_option="end"):
    """ Return a list of rhymes where a random combination of phonemes match
    
    The conditions for a general rhyme between words are:
    (1) Any possible phonetic similarity between the final stressed vowel and
        subsequent phonemes.
    If phones argument not given, phones/pronunciation used will default to the
    first in the list of phones returned for word. If no rhyme is found, an
    empty list is returned.


    :param word: a word
    :param phones: specific CMUdict phonemes string for word (default None)
    :param search_option option for regex search. (default "end")
    :return: a list of rhymes for word, where specific rhyme is random
    """
    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError(phones + " not phones for +" + word)
    if not phones:
        raise ValueError("phonemes string is empty")
    rp = pronouncing.rhyming_part(phones)
    search_combos = wildcard_mix_phones_regex_searches(rp)
    while search_combos:
        search = random.choice(search_combos)
        if search_option == "end":
            rhymes = pronouncing.search(search + "$")
        elif search_option == "begin":
            rhymes = pronouncing.search("^" + search)
        elif search_option == "whole":
            rhymes = pronouncing.search("^" + search + "$")
        else:
            raise ValueError(
                "search_option should be 'end', 'begin', or 'whole'")
        if rhymes:
            rhymes = unique(rhymes)
            if word in rhymes:
                rhymes.remove(word)
            return rhymes
        else:
            search_combos.remove(search)
    print("random general rhyme: tried all combos, didn't find anything!")
    return []
Пример #22
0
def generate_rhyming_part_defaultdict() -> defaultdict:
    """Returns a default dict structure of 
    keys: Rhyming parts (strs)
    values: defaultdicts,
    of words corresponding to that rhyming part (strs)
    : lists of lines that end with those words (lists of strs)"""
    all_lines = generate_poetry_corpus_lines()
    by_rhyming_part = defaultdict(lambda: defaultdict(list))
    for line in all_lines:
        text = line['s']
        if not (32 < len(text) < 48):  # only use lines of uniform lengths
            continue
        match = re.search(r'(\b\w+\b)\W*$', text)
        if match:
            last_word = match.group()
            pronunciations = pronouncing.phones_for_word(last_word)
            if len(pronunciations) > 0:
                rhyming_part = pronouncing.rhyming_part(pronunciations[0])
                # group by rhyming phones (for rhymes) and words (to avoid duplicate words)
                by_rhyming_part[rhyming_part][last_word.lower()].append(text)
    return by_rhyming_part
Пример #23
0
def near_rhyme(word, phones=None, stress=True, consonant_tail=0):
    """ Returns a list of words that almost rhyme

    The conditions for a near rhyme between words are:
    (1) At least one of the phonemes after and including the last stressed
        syllable match, except for the case where they all do.
    If phones argument not given, phones/pronunciation used will default to the
    first in the list of phones returned for word. If no rhyme is found, an
    empty list is returned.


    :param word: a word
    :param phones: specific CMUdict phonemes string for word (default None)
    :param stress: if vowels will match stress (default True)
    :param consannt_tail: number of
    :return: a list of near rhymes for word
    """
    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError(phones + " not phones for" + word)
    if not phones:
        raise ValueError("phonemes string is empty")

    rp = pronouncing.rhyming_part(phones)
    search_combos = wildcard_mix_phones_regex_searches(rp, stress)
    rhymes = []
    for search in search_combos:
        rhymes += pronouncing.search(search + "( .{1,3}){0," +
                                     str(consonant_tail) + "}$")
    if rhymes:
        rhymes = unique(rhymes)
        if word in rhymes:
            rhymes.remove(word)
        return rhymes
    print("random general rhyme: tried all combos, didn't find anything!")
    return []
Пример #24
0
 def generate_rhyming_part_defaultdict(self, min_len, max_len) -> defaultdict:
     """Returns a default dict structure of 
     keys: Rhyming parts (strs)
     values: defaultdicts,
     of words corresponding to that rhyming part (strs)
     : lists of lines that end with those words (lists of strs)
     Code borrowed directly from Allison Parrish's examples."""
     by_rhyming_part = defaultdict(lambda: defaultdict(list))
     for line in self.all_lines:
         text = line['s']
         # Uniform lengths original: if not(32 < len(text) < 48)
         if not(min_len < len(text) < max_len): # only use lines of uniform lengths
             continue
         match = re.search(r'(\b\w+\b)\W*$', text)
         if match:
             last_word = match.group()
             pronunciations = pronouncing.phones_for_word(last_word)
             if len(pronunciations) > 0:
                 rhyming_part = pronouncing.rhyming_part(pronunciations[0])
                 # group by rhyming phones (for rhymes) and words (to avoid duplicate words)
                 by_rhyming_part[rhyming_part][last_word.lower()].append(text)
     return by_rhyming_part
Пример #25
0
def write_poem():

    df = load_data()

    print("Here I go! \n")

    length, line_breaks, rhyme_scheme = define_structure()

    # pick the first line and get the last word of that first line
    index, first_line, last_word = pick_first_line(df)

    # prune the dataframe so that we restrict the number of syllables and the meter
    pruned_df = df[df.meter == df.meter[index]]
    pruned_df = df[(df.syllables > df.syllables[index] - 3)
                   & (df.syllables < df.syllables[index] + 2)]

    # get the rhyme_dict for the pruned df so we can rhyme lines
    rhyme_dict = get_rhyme_dict(pruned_df)

    # Frankenbot's done
    print("\n VOILA!! \n")
    print("*********************************************************")
    print("\n")

    # print the first line
    print(first_line)

    # set break variable False so we don't line break before the first line
    break_here = False

    # now make the rest of the poem
    line = first_line
    while (length > 0):

        if break_here and line_breaks > 0:
            print("\n")
            line_breaks -= 1
            break_here = False

        # the random number will determine what we do...
        x = random.randint(1, 6)
        y = random.randint(1, 6)
        magic_number = x + y

        # line break on the next line
        if (magic_number < 6):
            break_here = True

        # if the rhyme scheme is random, print a rhyming line by getting the rhyming part of the last word,
        # then choosing a random rhyming line from the rhyme_dict
        # if we roll greater than or equal to 7 all hell breaks loose - no more rhyming

        if (magic_number >= 8 and rhyme_scheme == "random"):
            line = random.choice(list(pruned_df.text))
            print(line)
            length -= 1
            continue

        if (rhyme_scheme == "random"):
            last_word = get_last_word(line)
            try:
                p = pronouncing.phones_for_word(last_word)
                rp = pronouncing.rhyming_part(p[0])
                random_key = random.choice(list(rhyme_dict[rp].keys()))
                new_line = random.choice(rhyme_dict[rp][random_key])
            except:
                new_line = random.choice(list(pruned_df.text))

            print(line)
            line = new_line
            length -= 1

        if (rhyme_scheme == "AABB"):
            last_word = get_last_word(line)
            # get line which rhymes with last line
            try:
                p = pronouncing.phones_for_word(last_word)
                rp = pronouncing.rhyming_part(p[0])
                random_key = random.choice(list(rhyme_dict[rp].keys()))
                new_line = random.choice(rhyme_dict[rp][random_key])
            except:
                new_line = random.choice(list(pruned_df.text))
            print(new_line)

            # new couplet starting
            new_line = random.choice(list(pruned_df.text))
            print(new_line)
            line = new_line
            length -= 2

        if (rhyme_scheme == "ABAB"):
            word_a = get_last_word(line)
            try:
                p = pronouncing.phones_for_word(word_a)
                rp = pronouncing.rhyming_part(p[0])
                random_key = random.choice(list(rhyme_dict[rp].keys()))
                new_line_a = random.choice(rhyme_dict[rp][random_key])
            except:
                new_line_a = random.choice(list(pruned_df.text))

            line_b = random.choice(list(pruned_df.text))
            word_b = get_last_word(line_b)
            try:
                p = pronouncing.phones_for_word(word_b)
                rp = pronouncing.rhyming_part(p[0])
                random_key = random.choice(list(rhyme_dict[rp].keys()))
                new_line_b = random.choice(rhyme_dict[rp][random_key])
            except:
                new_line_b = random.choice(list(pruned_df.text))

            print(line_b)
            print(new_line_a)
            print(new_line_b)

            line = random.choice(list(pruned_df.text))
            length -= 3

    print("\n")
Пример #26
0
def calcRhymeDensity(text,
                     rhymeType='perfect',
                     rhymeLocation='all',
                     lineStartStop=(1, -2),
                     printExamples=False):
    '''calculates rhyme density (count of rhymes over n-1 words). \n\n
    
       _parameters_
       text: input text for measurement
       rhymeType: 'perfect' is a perfect rhyme, 'vowel' is a rhyming in the vowel sound + stress only
       rhymeLocation: choose to look at 'all' text, 'section' by line numbers, or 'end' (last word in each line)    
       lineStartStop: tuple of (start,stop) line numbers
       printExamples: if True, print most common values of the selected rhymeType
       
       _returns_
       rhyme_cnt: count of rhymes of specified rhymeType and rhymeLocation
       wordCount: count of words of specified rhymeType and rhymeLocation
       rhymeDensity: rhyme_cnt/float(wordCount-1)
    '''
    # restrict location to (end=last word, internal line = line, all= full text)
    # count tokens
    #

    # initialize
    rhymePart_cnt = Counter()
    rhyme_cnt = 0

    # prepare data
    text = prepString(removeMarkupWords(text))

    if rhymeLocation == 'all':
        words = text.split()

    if rhymeLocation == 'end':
        lines = text.split("\n")
        words = [line.split()[-1] for line in lines if len(line.split()) > 0]

    if rhymeLocation == 'section':
        lines = text.split("\n")
        words = [
            line.split()[-1]
            for line in lines[lineStartStop[0]:lineStartStop[1] + 1]
            if len(line.split()) > 0
        ]

    #
    wordCount = len(words)
    #print(words)
    for word in words:
        pros = pronouncing.phones_for_word(word)
        if pros:
            phonelist = pros[0]  #using first pronunciation for now
            if len(phonelist) > 0:
                if rhymeType == 'perfect':
                    rhymePart_cnt[pronouncing.rhyming_part(phonelist)] += 1

                #if rhymeType == 'rime':
                #    pass
                #if rhymeType == 'soft':
                #    pass
                #if rhymeType == 'consonant':
                #    pass

                elif rhymeType == 'vowel':
                    rhymePart_cnt[pronouncing.rhyming_part(phonelist).split()
                                  [0]] += 1

    for v in rhymePart_cnt.values():
        rhyme_cnt += v - 1

    if wordCount > 1:
        rhymeDensity = rhyme_cnt / float(wordCount - 1)
    else:
        rhymeDensity = 0.0

    if printExamples == True:
        print(rhymePart_cnt.most_common(5))

    return rhymeDensity, rhyme_cnt, wordCount
Пример #27
0
def syllrhyme(word):
    try:
        p = prons[word.lower()]
    except KeyError:
        return 0, ""
    return pronouncing.syllable_count(p), pronouncing.rhyming_part(p)
Пример #28
0
def calculate_rhyme_density(tokens, rhymeType='perfect', rhymeLocation='all'):
    """
    Computes rhyme density for a list of tokens
    
    Parameters:
    -----------
    rhymeType : str
        - 'perfect' is a perfect rhyme
        - 'stressed' is a rhyming in the vowel sound + stress only
        - 'allVowels' is a rhyming at all vowel syllables
        
    rhymeLocation : str
        choose to look at 'all' text or 'end' (last word in each line)
    """

    assert rhymeType in ['perfect', 'stressed',
                         'allVowels'], "Unexpected value for rhymeType"
    assert rhymeLocation in ['all',
                             'end'], "Unexpected value for rhymeLocation"

    rhymePart_cnt = Counter()
    rhyme_cnt = 0
    distinct_rhyme_cnt = 0

    if rhymeLocation == 'all':
        tokens = parse_tokens(tokens,
                              lines=False,
                              tags=False,
                              contraction=True)

    elif rhymeLocation == 'end':
        tokens = [line[-1] for line in parse_tokens(tokens, lines=True, tags=False, contraction=True)\
                  if line]

    # only retrieve first pronunciation from `phones_for_words`
    # we can enhance here by doing permutations of pronunciations
    pros = [pronouncing.phones_for_word(token)[0] for token in tokens\
            if pronouncing.phones_for_word(token)]
    for pro in pros:
        if rhymeType == 'perfect':
            rhymePart_cnt[pronouncing.rhyming_part(pro)] += 1
        elif rhymeType == 'stressed':
            # look at only stressed syllables
            # slightly modified logic from JP implementation
            rhyming_parts = pronouncing.rhyming_part(pro).split()
            if rhyming_parts:
                rhyming_parts = [
                    part for part in rhyming_parts if part[-1] in ['1', '2']
                ]
            if rhyming_parts:
                rhyming_parts = rhyming_parts[0]
            else:
                continue
            rhymePart_cnt[rhyming_parts] += 1
        elif rhymeType == 'allVowels':
            # look at all vowel parts - new method
            rhyming_parts = pronouncing.rhyming_part(pro).split()
            rhyming_parts = [
                part for part in rhyming_parts if part[-1].isdigit()
            ]
            for rhyme in rhyming_parts:
                rhymePart_cnt[rhyme] += 1

    for v in rhymePart_cnt.values():
        rhyme_cnt += v - 1

    # denominator - word for 'perfect'; vowel syllables for 'vowel'
    # denominator = sum(rhymePart_cnt.values())-1
    denominator = len(tokens) - 1

    if denominator > 0:
        rhymeDensity = rhyme_cnt / denominator
    else:
        rhymeDensity = None


#     return tokens, pros, rhymePart_cnt, rhyming_parts, rhyme_cnt, rhymeDensity
    return rhymeDensity