示例#1
0
def generate_line(syllable_number, rhyme_word, word_list):
    phones = pronouncing.phones_for_word(rhyme_word)
    phone = random.choice(phones)
    rhyme_syllabi_count = pronouncing.syllable_count(phone)
    line_syllabi_count = rhyme_syllabi_count
    line = rhyme_word
    timeout_counter = 0
    # print("in line syllabi count loop")
    while line_syllabi_count != syllable_number:
        over_limit = (line_syllabi_count > syllable_number)
        timeout = (timeout_counter > 100000)
        # print(line_syllabi_count)
        # print(syllable_number)
        # print(timeout_counter)
        # print(timeout_counter > 100000)
        # print(over_limit)
        # print(over_limit and timeout)
        if over_limit is False:
            word = random.choice(word_list).rstrip()
            phones = pronouncing.phones_for_word(word)
            if len(phones) != 0:
                word_syllabi_count = pronouncing.syllable_count(phone)
                line_syllabi_count += word_syllabi_count
                line = word + " " + line
        elif over_limit:
            if timeout:
                return line
            line_syllabi_count = rhyme_syllabi_count
            line = rhyme_word

        timeout_counter += 1
    return line
示例#2
0
    def _new_sentence2(self, syls):
        syls = int(syls)
        sent = None
        phones = []
        while sent == None or sum([pnc.syllable_count(p)
                                   for p in phones]) != syls:
            print(sent)
            print(sum([pnc.syllable_count(p) for p in phones]) - syls)
            sent = self.text_model.make_short_sentence(
                syls * self.config.poem_avg_char_per_syl,
                tries=100,
                max_overlap_ratio=self.config.markovify_max_overlap_ratio,
                max_overlap_total=self.config.markovify_max_overlap_total)
            if sent == None:
                continue

            sentNoPunctuation = sent[0:-1]
            try:
                phones = [
                    pnc.phones_for_word(p)[0]
                    for p in sentNoPunctuation.split()
                ]
            except IndexError:
                # Word not found in dictionary
                phones = []

        return ''.join(c for c in sent if c not in string.punctuation)
 def test_syllable_count(self):
     self.assertEqual(pronouncing.syllable_count("CH IY1 Z"), 1)
     self.assertEqual(pronouncing.syllable_count("CH EH1 D ER0"), 2)
     self.assertEqual(pronouncing.syllable_count("AE1 F T ER0 W ER0 D"), 3)
     self.assertEqual(
         pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T"), 4)
     self.assertEqual(
         pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T L IY0"), 5)
 def test_syllable_count(self):
     self.assertEqual(pronouncing.syllable_count("CH IY1 Z"), 1)
     self.assertEqual(pronouncing.syllable_count("CH EH1 D ER0"), 2)
     self.assertEqual(pronouncing.syllable_count("AE1 F T ER0 W ER0 D"), 3)
     self.assertEqual(
         pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T"), 4)
     self.assertEqual(
         pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T L IY0"),
         5)
示例#5
0
def syllables(word):
    """
    Get the number of syllables in a word
    :param word: string
    :return: number_syllables: int

    >>> syllables('dog')
    1
    >>> syllables('5-foot')
    2
    >>> syllables('mr.')
    2
    >>> syllables('ms.')
    1
    """
    consonants = ['A', 'E', 'I', 'O', 'U']
    try:
        if word in consonants:
            return 1
        if '-' in word:
            total_syllables = 0
            word_split = re.split("- | ' ", word)
            for word in word_split:
                total_syllables += syllables(word)
            return total_syllables
        # Syllables using pronouncing package
        pronouncing_list = pronouncing.phones_for_word(word)[0]
        syll_count = pronouncing.syllable_count(pronouncing_list)
        return syll_count
    except:  # case where the word is not in cmudict.entries()
        regex = re.compile('[aeiou]{2}')
        word_pron = regex.sub('a', word)
        regex = re.compile('[aeiou]')
        number_syllables = len(regex.findall(word_pron))
        return number_syllables
示例#6
0
 def get_syllables(self, word):
     """Given a word, count and return the number of syllables in that word"""
     word = str(word)
     pronunciation_list = pronouncing.phones_for_word(word)
     if len(pronunciation_list) < 1:
         return 1
     return pronouncing.syllable_count(pronunciation_list[0])
def get_foods():
    keepers = []
    
    food_lists = [
        pycorpora.foods.fruits["fruits"],
        pycorpora.foods.sausages["sausages"],
        pycorpora.foods.vegetables["vegetables"],
        pycorpora.foods.pizzaToppings["pizzaToppings"],
        pycorpora.foods.breads_and_pastries["breads"],
        pycorpora.foods.breads_and_pastries["pastries"],
        pycorpora.foods.beer_styles["beer_styles"],
        pycorpora.foods.bad_beers["bad_beers"],
        pycorpora.foods.tea["teas"],
        pycorpora.foods.apple_cultivars["cultivars"],
        pycorpora.foods.condiments["condiments"],
        pycorpora.foods.iba_cocktails["cocktails"],
        [food["name"] for food in pycorpora.foods.sandwiches["sandwiches"]]
    ]
    
    big_food_list = []
    
    for fl in food_lists:
        big_food_list += fl
    
    for food in big_food_list:
        pronunciation_list = pronouncing.phones_for_word(food)
        if (len(pronunciation_list) > 0):
            syllable_count = pronouncing.syllable_count(pronunciation_list[0])
            if (syllable_count <= 2):
                keepers.append(food)
    
    return keepers
示例#8
0
    def get_speed2(self, df2, df4):
        # get speech rate (syllables per second/mins)
        arpa_li = []
        df5 = df4[df4['arpa'] != 'sp']
        df5.index = range(len(df5))

        words_series = df5['word'].notnull()  # word indicated by 'True'
        words_index = words_series[
            words_series].index.values  # get row indices for words
        i = 0
        while i < len(words_index) - 1:
            arpa_li.append(''.join(
                df5['arpa'][words_index[i]:words_index[i + 1]].to_string(
                    header=False, index=False).split('\n')))
            i += 1
        arpa_li.append(''.join(df5['arpa'][words_index[i]:len(df5)].to_string(
            header=False, index=False).split('\n')))

        syll = [pronouncing.syllable_count(str(p)) for p in arpa_li]
        nsyll = sum(syll)
        nsyll_per_sec = nsyll / (df2['end'].iloc[-1] * 10**(-7))
        #nsyll_per_min = nsyll/(df2['end'].iloc[-1] * 10**(-7)) * 60
        ASD = (df2['end'].iloc[-1] * 10**(-7)) / nsyll  # for seconds

        # get articulation rate (denominator is total phonation time)
        tokens_per_sec2 = self.ntokens / self.length_pho
        #tokens_per_min2 = ntokens/length_pho * 60
        words_per_sec2 = len(self.token_counts) / self.length_pho
        #words_per_min2 = len(token_counts)/length_pho * 60
        nsyll_per_sec2 = nsyll / self.length_pho
        #nsyll_per_min2 =
        ASD2 = self.length_pho / nsyll

        return nsyll_per_sec, ASD, tokens_per_sec2, words_per_sec2, nsyll_per_sec2, ASD2, df5
示例#9
0
def total_syllables_in_string(text):
    try:
        phones = [pronouncing.phones_for_word(p)[0] for p in text.split()]
        return sum([pronouncing.syllable_count(p) for p in phones])
    except IndexError:
        logging.error("Syllables could not be found for %s.", text)
        return False
    def _new_sentence(self, syls):
        """Create sentence with Markovify, check that it has correct number of syllables,
        return type None if this fails."""

        syls = int(syls)
        sent = self.text_model.make_short_sentence(
            syls * self.config.poem_avg_char_per_syl,
            tries=100,
            max_overlap_ratio=self.config.markovify_max_overlap_ratio,
            max_overlap_total=self.config.markovify_max_overlap_total)

        if sent == None:
            return None

        # Might be double work checking for punctuation
        sentNoPunctuation = sent[0:-1]
        try:
            phones = [
                pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split()
            ]
        except IndexError:
            # Word not found in dictionary
            phones = []

        if sum([pnc.syllable_count(p) for p in phones]) != syls or not sent:
            return None
        else:
            return ''.join(c for c in sent if c not in string.punctuation)
def get_syllable_count(word):
    # Return number of syllables or invalid
    pron_list = pronouncing.phones_for_word(word)
    if not pron_list:
        return "invalid"

    return pronouncing.syllable_count(pron_list[0])
示例#12
0
def word_syllables(word):
    if word:
        word = strip_punc(word.lower())
        phones = pronouncing.phones_for_word(word)
        count = pronouncing.syllable_count(phones[0])
        return count
    else:
        return 0
示例#13
0
 def phones_count(self, new_sent):
     try:
         phones = [
             pronouncing.phones_for_word(p)[0] for p in new_sent.split()
         ]
         return sum([pronouncing.syllable_count(p) for p in phones])
     except:
         return 0
def count_syllables(word):
    phones = pronouncing.phones_for_word(word)
    count_list = [pronouncing.syllable_count(x) for x in phones]
    if len(count_list) > 0:
        result = max(count_list)
    else:
        result = 0
    return result
示例#15
0
def syllable_count(sentence):
    phones = []
    for p in sentence.split():
        if not pronouncing.phones_for_word(p.strip()) == []:
            phones.append(pronouncing.phones_for_word(p.strip())[0])
        else:
            phones.append("")
    return sum([pronouncing.syllable_count(p) for p in phones])
示例#16
0
 def get_pronouncing_num(word):
     try:
         pronunciating_list = pronouncing.phones_for_word(word)
         num = pronouncing.syllable_count(pronunciating_list[0])
     except Exception as e:
         print("音节计算异常,异常单词:" + word)
         return math.ceil(2)
     else:
         return num
示例#17
0
def get_syllable_count(word):
    pronunciation_list = pronouncing.phones_for_word(word)

    try:
        syllable_count = pronouncing.syllable_count(pronunciation_list[0])
    except IndexError:
        syllable_count = 0

    return syllable_count
示例#18
0
def get_entry():
    ewords = []
    for entry in pycorpora.architecture.rooms['rooms']:
        plist = pronouncing.phones_for_word(entry)
        if(len(plist)> 0):
            scount = pronouncing.syllable_count(plist[0])
            if (scount == 2):
                ewords.append(entry)
    return ewords
示例#19
0
def syllable_counts(sentence):
    count = 0
    for word in sentence.words:
        word = word.lower()
        p = pr.phones_for_word(word)
        if len(p) > 0:
            sc = pr.syllable_count(p[0])
            count += sc
    return count
示例#20
0
文件: rhyme.py 项目: jsmilan/LyricGen
def getSyllableCount(word):
    # Return the number of syllables
    #print "\n\n********** GET SYLLABLE COUNT: " + str(word) + "********\n\n"
    phones = pronouncing.phones_for_word(word)
    # Return 1 syllable as default
    if len(phones) < 1:
        return 1
    else:
        return pronouncing.syllable_count(phones[0])
示例#21
0
def get_object():
    owords = []
    for ranobject in pycorpora.objects.objects['objects']:
        plist = pronouncing.phones_for_word(ranobject)
        if(len(plist)> 0):
            scount = pronouncing.syllable_count(plist[0])
            if (scount ==2):
                owords.append(ranobject)
    return owords
示例#22
0
def count_syllables_in_word(word):
    word = word.lower()

    if not re.match('.*[a-z]', word):  #contains only non-alpha characters
        return 0
    phones = pronouncing.phones_for_word(word)
    if len(phones) > 0:
        return sum([pronouncing.syllable_count(p) for p in phones[0]])
    else:
        return None
示例#23
0
def get_animal():
    keepers = []
    for animal in pycorpora.animals.common["animals"]:
        pronunciation_list = pronouncing.phones_for_word(animal)
        if (len(pronunciation_list) > 0):
            syllable_count = pronouncing.syllable_count(pronunciation_list[0])
            if (syllable_count <= 2):
                keepers.append(animal)
            
    return random.choice(keepers)
示例#24
0
def get_syllables(word):
    count = 0
    pronunciation_list = pronouncing.phones_for_word(word)
    try:
        value = pronouncing.syllable_count(pronunciation_list[0])
    except IndexError:
        value = estimate_syllables(word)
        count += value

    return count
示例#25
0
文件: train_hmm.py 项目: joshc/sonnet
def est_num_syllables(word):
    """
    :param word: A string.
    :return: An estimate of the number of syllables in the word.
        If it's in the dictionary, return the actual number of syllables.
        Else return # characters / 4.
    """
    phones = pronouncing.phones_for_word(word)
    if phones:
        return pronouncing.syllable_count(phones[0])
    else:
        return len(word) / 4
示例#26
0
def countSyllables(sentence):  # counts syllables in a sentnece
    words = sentence.split()
    syllables = 0
    for word in words:
        table = str.maketrans(dict.fromkeys('!.,?;:)('))
        word = word.translate(table)
        try:
            pronunciation_list = pronouncing.phones_for_word(word)
            syllables += pronouncing.syllable_count(pronunciation_list[0])
        except Exception as e:
            continue
    return syllables
示例#27
0
文件: sonnet.py 项目: chrstnb/sonnet
def newWord(grammar, dic):	
	global syllables
	go = 1
	while go:
		word = dic[grammar][randrange(0, (len(dic[grammar]) - 1))]
		p = pronouncing.phones_for_word(word)
		if len(p) > 0:
			go = 0
	s = pronouncing.syllable_count(p[0])
	syllables += s
	if syllables > 10:
		return '-1'
	return word
示例#28
0
def syllable_counter(lines):
    '''
    Function to count all syllables in a list of strings.

    NOTE: This does not factor in multi-syllabic digits,
    times (i.e. 1:03), and most likely other non-"word" words.


    Input
    -----
    lines : list (str)
        List of strings to count.


    Output
    ------
    sum(total) : int
        Total number of syllables in the input list.



    [Modified from Allison Parrish's example in the documention
     for her library, pronouncing]:
    https://pronouncing.readthedocs.io/en/latest/tutorial.html

    '''
    # create empty list
    total = []

    # loop over list
    for line in lines:

        # turn each word into a string of its phonemes
        # if else statement ensures that each word is counted with
        # at least one syllable, even if that word is not in the
        # pronouncing library's dictionary (using phoneme for 'I'
        # as a placeholder for single syllable)
        phonemes = [
            pronouncing.phones_for_word(word)[0]
            if pronouncing.phones_for_word(word) else 'AY1'
            for word in line.split()
        ]

        # count the syllables in each string and add the total
        # syllables per line to the total list
        total.append(
            sum([pronouncing.syllable_count(phoneme) for phoneme in phonemes]))

    # return the total number of syllables
    return sum(total)
示例#29
0
    def _syllables_in_word(word):
        """Returns the number of syllables in the word.

        Arguments:
          word (:py:class:`str`): The word to calculate the syllable
            count for.

        Returns:
          :py:class:`int`: The number of syllables in the word (or
            ``None`` if the count could not be calculated).

        """
        phones = phones_for_word(word)
        if phones:
            return syllable_count(phones[0])
 def to_syl_count(self, wordl):
     for word in wordl:
         try:
             # This uses the CMU dictionary of (American) pronounciations
             c = syllable_count(phones_for_word(word.lower())[0])
             self.stats.inc_value('readingage_in_cmu_dict')
             yield c
         except IndexError:
             # TODO: Should consider using a fallback rule/letter-based
             #       syllable counter in case the CMU pronouncing dictionary
             #       doesn't have the word. Not disastrous, as we use the
             #       full word count when calculating average sentence
             #       length, and only the words for which we have syllable
             #       counts when calculating average syllables.
             self.stats.inc_value('readingage_not_in_cmu_dict')
示例#31
0
def algorithm_accuracy():
    word_file = open('common_words.txt', encoding="utf8").read()
    word_file = word_file.split()

    estimate_sum = 0
    actual_sum = 0

    for i in word_file:
        estimate = estimate_syllables(i)
        pronunciation_list = pronouncing.phones_for_word(i)
        actual = pronouncing.syllable_count(pronunciation_list[0])
        estimate_sum += estimate
        actual_sum += actual

    return (actual_sum - estimate_sum) / actual_sum
示例#32
0
def get_random_word(part_of_speech, syllables=None, skip_plurals=False):
    """
    Get this kind of word with this many syllables.
    If none found with this many syllables, any'll do.
    """
    words = get_random_words_from_wordnik(part_of_speech)
    for word in words:
        pronunciation_list = pronouncing.phones_for_word(word)
        for pronunciation in pronunciation_list:
            count = pronouncing.syllable_count(pronunciation)

            print(count, word)
            if skip_plurals and word[-1].lower() == "s" and word[-2] != "s":
                # Might be plural, just skip it
                print("Plural? Skip!")
                continue

            if count == syllables:
                # Bingo!
                return word

    # Any'll do
    return random.choice(words)
示例#33
0
文件: generate.py 项目: mewo2/syllpos
import nltk
from nltk.corpus import brown
import pronouncing

from collections import defaultdict

pronouncing.init_cmu()

print "Building syllable count DB"
sylcount = {}
for word, phones in pronouncing.pronunciations:
    if word in sylcount: continue
    sylcount[word] = pronouncing.syllable_count(phones)

print "Counting syllables"
by_pos_count = defaultdict(set)
for word, tag in brown.tagged_words():
    if word[0].isupper() and not tag.startswith("NP"):
        continue
    tag = tag.split('-')[0]
    try:
        count = sylcount[word.lower()]
    except:
        continue
    by_pos_count[tag, count].add(word)

print "Writing output"
for tag, count in by_pos_count.keys():
    filename = "wordlists/%s-%d.txt" % (tag, count)
    with open(filename, "w") as f:
        for word in sorted(by_pos_count[tag, count]):
示例#34
0
def my(word):
    phones = pronouncing.phones_for_word(word)
    if phones:
        return pronouncing.syllable_count(phones[0])
    else:
        return syllables_en.count(word)
示例#35
0
wjdata_list = wjdata['results'][0]['captions']

# create empty storage for selected captiosn with fitting syllables (with either 5 or 7 syllables)
syllables5 = []
syllables7 = []
syllables23 = []

# check all captions for fitting syllables (using pronouncingpy + CMU pronouncing dictionary)
# add them to the empty storage
for i in range (1, 83):

	try:
		text = wjdata['results'][0]['captions'][i - 1]

		phones = [pronouncing.phones_for_word(p)[0] for p in text.split()]
		count = sum([pronouncing.syllable_count(p) for p in phones])
		for y in range (1, 2):
			if int(count) == 5:
				syllables5.append(wjdata['results'][0]['captions'][i - 1])
		for x in range (0, 1):
			if int(count) == 7:
				syllables7.append(wjdata['results'][0]['captions'][i - 1])
		for z in range (0, 1):
			if int(count) == 3 or int(count) == 2:
				syllables23.append(wjdata['results'][0]['captions'][i - 1])

# skip over errors caused by non-indexed word <UNK> in captions
	except IndexError:
    		pass
	continue