def generate_line(syllable_number, rhyme_word, word_list): phones = pronouncing.phones_for_word(rhyme_word) phone = random.choice(phones) rhyme_syllabi_count = pronouncing.syllable_count(phone) line_syllabi_count = rhyme_syllabi_count line = rhyme_word timeout_counter = 0 # print("in line syllabi count loop") while line_syllabi_count != syllable_number: over_limit = (line_syllabi_count > syllable_number) timeout = (timeout_counter > 100000) # print(line_syllabi_count) # print(syllable_number) # print(timeout_counter) # print(timeout_counter > 100000) # print(over_limit) # print(over_limit and timeout) if over_limit is False: word = random.choice(word_list).rstrip() phones = pronouncing.phones_for_word(word) if len(phones) != 0: word_syllabi_count = pronouncing.syllable_count(phone) line_syllabi_count += word_syllabi_count line = word + " " + line elif over_limit: if timeout: return line line_syllabi_count = rhyme_syllabi_count line = rhyme_word timeout_counter += 1 return line
def _new_sentence2(self, syls): syls = int(syls) sent = None phones = [] while sent == None or sum([pnc.syllable_count(p) for p in phones]) != syls: print(sent) print(sum([pnc.syllable_count(p) for p in phones]) - syls) sent = self.text_model.make_short_sentence( syls * self.config.poem_avg_char_per_syl, tries=100, max_overlap_ratio=self.config.markovify_max_overlap_ratio, max_overlap_total=self.config.markovify_max_overlap_total) if sent == None: continue sentNoPunctuation = sent[0:-1] try: phones = [ pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split() ] except IndexError: # Word not found in dictionary phones = [] return ''.join(c for c in sent if c not in string.punctuation)
def test_syllable_count(self): self.assertEqual(pronouncing.syllable_count("CH IY1 Z"), 1) self.assertEqual(pronouncing.syllable_count("CH EH1 D ER0"), 2) self.assertEqual(pronouncing.syllable_count("AE1 F T ER0 W ER0 D"), 3) self.assertEqual( pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T"), 4) self.assertEqual( pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T L IY0"), 5)
def test_syllable_count(self): self.assertEqual(pronouncing.syllable_count("CH IY1 Z"), 1) self.assertEqual(pronouncing.syllable_count("CH EH1 D ER0"), 2) self.assertEqual(pronouncing.syllable_count("AE1 F T ER0 W ER0 D"), 3) self.assertEqual( pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T"), 4) self.assertEqual( pronouncing.syllable_count("IH2 N T ER0 M IH1 T AH0 N T L IY0"), 5)
def syllables(word): """ Get the number of syllables in a word :param word: string :return: number_syllables: int >>> syllables('dog') 1 >>> syllables('5-foot') 2 >>> syllables('mr.') 2 >>> syllables('ms.') 1 """ consonants = ['A', 'E', 'I', 'O', 'U'] try: if word in consonants: return 1 if '-' in word: total_syllables = 0 word_split = re.split("- | ' ", word) for word in word_split: total_syllables += syllables(word) return total_syllables # Syllables using pronouncing package pronouncing_list = pronouncing.phones_for_word(word)[0] syll_count = pronouncing.syllable_count(pronouncing_list) return syll_count except: # case where the word is not in cmudict.entries() regex = re.compile('[aeiou]{2}') word_pron = regex.sub('a', word) regex = re.compile('[aeiou]') number_syllables = len(regex.findall(word_pron)) return number_syllables
def get_syllables(self, word): """Given a word, count and return the number of syllables in that word""" word = str(word) pronunciation_list = pronouncing.phones_for_word(word) if len(pronunciation_list) < 1: return 1 return pronouncing.syllable_count(pronunciation_list[0])
def get_foods(): keepers = [] food_lists = [ pycorpora.foods.fruits["fruits"], pycorpora.foods.sausages["sausages"], pycorpora.foods.vegetables["vegetables"], pycorpora.foods.pizzaToppings["pizzaToppings"], pycorpora.foods.breads_and_pastries["breads"], pycorpora.foods.breads_and_pastries["pastries"], pycorpora.foods.beer_styles["beer_styles"], pycorpora.foods.bad_beers["bad_beers"], pycorpora.foods.tea["teas"], pycorpora.foods.apple_cultivars["cultivars"], pycorpora.foods.condiments["condiments"], pycorpora.foods.iba_cocktails["cocktails"], [food["name"] for food in pycorpora.foods.sandwiches["sandwiches"]] ] big_food_list = [] for fl in food_lists: big_food_list += fl for food in big_food_list: pronunciation_list = pronouncing.phones_for_word(food) if (len(pronunciation_list) > 0): syllable_count = pronouncing.syllable_count(pronunciation_list[0]) if (syllable_count <= 2): keepers.append(food) return keepers
def get_speed2(self, df2, df4): # get speech rate (syllables per second/mins) arpa_li = [] df5 = df4[df4['arpa'] != 'sp'] df5.index = range(len(df5)) words_series = df5['word'].notnull() # word indicated by 'True' words_index = words_series[ words_series].index.values # get row indices for words i = 0 while i < len(words_index) - 1: arpa_li.append(''.join( df5['arpa'][words_index[i]:words_index[i + 1]].to_string( header=False, index=False).split('\n'))) i += 1 arpa_li.append(''.join(df5['arpa'][words_index[i]:len(df5)].to_string( header=False, index=False).split('\n'))) syll = [pronouncing.syllable_count(str(p)) for p in arpa_li] nsyll = sum(syll) nsyll_per_sec = nsyll / (df2['end'].iloc[-1] * 10**(-7)) #nsyll_per_min = nsyll/(df2['end'].iloc[-1] * 10**(-7)) * 60 ASD = (df2['end'].iloc[-1] * 10**(-7)) / nsyll # for seconds # get articulation rate (denominator is total phonation time) tokens_per_sec2 = self.ntokens / self.length_pho #tokens_per_min2 = ntokens/length_pho * 60 words_per_sec2 = len(self.token_counts) / self.length_pho #words_per_min2 = len(token_counts)/length_pho * 60 nsyll_per_sec2 = nsyll / self.length_pho #nsyll_per_min2 = ASD2 = self.length_pho / nsyll return nsyll_per_sec, ASD, tokens_per_sec2, words_per_sec2, nsyll_per_sec2, ASD2, df5
def total_syllables_in_string(text): try: phones = [pronouncing.phones_for_word(p)[0] for p in text.split()] return sum([pronouncing.syllable_count(p) for p in phones]) except IndexError: logging.error("Syllables could not be found for %s.", text) return False
def _new_sentence(self, syls): """Create sentence with Markovify, check that it has correct number of syllables, return type None if this fails.""" syls = int(syls) sent = self.text_model.make_short_sentence( syls * self.config.poem_avg_char_per_syl, tries=100, max_overlap_ratio=self.config.markovify_max_overlap_ratio, max_overlap_total=self.config.markovify_max_overlap_total) if sent == None: return None # Might be double work checking for punctuation sentNoPunctuation = sent[0:-1] try: phones = [ pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split() ] except IndexError: # Word not found in dictionary phones = [] if sum([pnc.syllable_count(p) for p in phones]) != syls or not sent: return None else: return ''.join(c for c in sent if c not in string.punctuation)
def get_syllable_count(word): # Return number of syllables or invalid pron_list = pronouncing.phones_for_word(word) if not pron_list: return "invalid" return pronouncing.syllable_count(pron_list[0])
def word_syllables(word): if word: word = strip_punc(word.lower()) phones = pronouncing.phones_for_word(word) count = pronouncing.syllable_count(phones[0]) return count else: return 0
def phones_count(self, new_sent): try: phones = [ pronouncing.phones_for_word(p)[0] for p in new_sent.split() ] return sum([pronouncing.syllable_count(p) for p in phones]) except: return 0
def count_syllables(word): phones = pronouncing.phones_for_word(word) count_list = [pronouncing.syllable_count(x) for x in phones] if len(count_list) > 0: result = max(count_list) else: result = 0 return result
def syllable_count(sentence): phones = [] for p in sentence.split(): if not pronouncing.phones_for_word(p.strip()) == []: phones.append(pronouncing.phones_for_word(p.strip())[0]) else: phones.append("") return sum([pronouncing.syllable_count(p) for p in phones])
def get_pronouncing_num(word): try: pronunciating_list = pronouncing.phones_for_word(word) num = pronouncing.syllable_count(pronunciating_list[0]) except Exception as e: print("音节计算异常,异常单词:" + word) return math.ceil(2) else: return num
def get_syllable_count(word): pronunciation_list = pronouncing.phones_for_word(word) try: syllable_count = pronouncing.syllable_count(pronunciation_list[0]) except IndexError: syllable_count = 0 return syllable_count
def get_entry(): ewords = [] for entry in pycorpora.architecture.rooms['rooms']: plist = pronouncing.phones_for_word(entry) if(len(plist)> 0): scount = pronouncing.syllable_count(plist[0]) if (scount == 2): ewords.append(entry) return ewords
def syllable_counts(sentence): count = 0 for word in sentence.words: word = word.lower() p = pr.phones_for_word(word) if len(p) > 0: sc = pr.syllable_count(p[0]) count += sc return count
def getSyllableCount(word): # Return the number of syllables #print "\n\n********** GET SYLLABLE COUNT: " + str(word) + "********\n\n" phones = pronouncing.phones_for_word(word) # Return 1 syllable as default if len(phones) < 1: return 1 else: return pronouncing.syllable_count(phones[0])
def get_object(): owords = [] for ranobject in pycorpora.objects.objects['objects']: plist = pronouncing.phones_for_word(ranobject) if(len(plist)> 0): scount = pronouncing.syllable_count(plist[0]) if (scount ==2): owords.append(ranobject) return owords
def count_syllables_in_word(word): word = word.lower() if not re.match('.*[a-z]', word): #contains only non-alpha characters return 0 phones = pronouncing.phones_for_word(word) if len(phones) > 0: return sum([pronouncing.syllable_count(p) for p in phones[0]]) else: return None
def get_animal(): keepers = [] for animal in pycorpora.animals.common["animals"]: pronunciation_list = pronouncing.phones_for_word(animal) if (len(pronunciation_list) > 0): syllable_count = pronouncing.syllable_count(pronunciation_list[0]) if (syllable_count <= 2): keepers.append(animal) return random.choice(keepers)
def get_syllables(word): count = 0 pronunciation_list = pronouncing.phones_for_word(word) try: value = pronouncing.syllable_count(pronunciation_list[0]) except IndexError: value = estimate_syllables(word) count += value return count
def est_num_syllables(word): """ :param word: A string. :return: An estimate of the number of syllables in the word. If it's in the dictionary, return the actual number of syllables. Else return # characters / 4. """ phones = pronouncing.phones_for_word(word) if phones: return pronouncing.syllable_count(phones[0]) else: return len(word) / 4
def countSyllables(sentence): # counts syllables in a sentnece words = sentence.split() syllables = 0 for word in words: table = str.maketrans(dict.fromkeys('!.,?;:)(')) word = word.translate(table) try: pronunciation_list = pronouncing.phones_for_word(word) syllables += pronouncing.syllable_count(pronunciation_list[0]) except Exception as e: continue return syllables
def newWord(grammar, dic): global syllables go = 1 while go: word = dic[grammar][randrange(0, (len(dic[grammar]) - 1))] p = pronouncing.phones_for_word(word) if len(p) > 0: go = 0 s = pronouncing.syllable_count(p[0]) syllables += s if syllables > 10: return '-1' return word
def syllable_counter(lines): ''' Function to count all syllables in a list of strings. NOTE: This does not factor in multi-syllabic digits, times (i.e. 1:03), and most likely other non-"word" words. Input ----- lines : list (str) List of strings to count. Output ------ sum(total) : int Total number of syllables in the input list. [Modified from Allison Parrish's example in the documention for her library, pronouncing]: https://pronouncing.readthedocs.io/en/latest/tutorial.html ''' # create empty list total = [] # loop over list for line in lines: # turn each word into a string of its phonemes # if else statement ensures that each word is counted with # at least one syllable, even if that word is not in the # pronouncing library's dictionary (using phoneme for 'I' # as a placeholder for single syllable) phonemes = [ pronouncing.phones_for_word(word)[0] if pronouncing.phones_for_word(word) else 'AY1' for word in line.split() ] # count the syllables in each string and add the total # syllables per line to the total list total.append( sum([pronouncing.syllable_count(phoneme) for phoneme in phonemes])) # return the total number of syllables return sum(total)
def _syllables_in_word(word): """Returns the number of syllables in the word. Arguments: word (:py:class:`str`): The word to calculate the syllable count for. Returns: :py:class:`int`: The number of syllables in the word (or ``None`` if the count could not be calculated). """ phones = phones_for_word(word) if phones: return syllable_count(phones[0])
def to_syl_count(self, wordl): for word in wordl: try: # This uses the CMU dictionary of (American) pronounciations c = syllable_count(phones_for_word(word.lower())[0]) self.stats.inc_value('readingage_in_cmu_dict') yield c except IndexError: # TODO: Should consider using a fallback rule/letter-based # syllable counter in case the CMU pronouncing dictionary # doesn't have the word. Not disastrous, as we use the # full word count when calculating average sentence # length, and only the words for which we have syllable # counts when calculating average syllables. self.stats.inc_value('readingage_not_in_cmu_dict')
def algorithm_accuracy(): word_file = open('common_words.txt', encoding="utf8").read() word_file = word_file.split() estimate_sum = 0 actual_sum = 0 for i in word_file: estimate = estimate_syllables(i) pronunciation_list = pronouncing.phones_for_word(i) actual = pronouncing.syllable_count(pronunciation_list[0]) estimate_sum += estimate actual_sum += actual return (actual_sum - estimate_sum) / actual_sum
def get_random_word(part_of_speech, syllables=None, skip_plurals=False): """ Get this kind of word with this many syllables. If none found with this many syllables, any'll do. """ words = get_random_words_from_wordnik(part_of_speech) for word in words: pronunciation_list = pronouncing.phones_for_word(word) for pronunciation in pronunciation_list: count = pronouncing.syllable_count(pronunciation) print(count, word) if skip_plurals and word[-1].lower() == "s" and word[-2] != "s": # Might be plural, just skip it print("Plural? Skip!") continue if count == syllables: # Bingo! return word # Any'll do return random.choice(words)
import nltk from nltk.corpus import brown import pronouncing from collections import defaultdict pronouncing.init_cmu() print "Building syllable count DB" sylcount = {} for word, phones in pronouncing.pronunciations: if word in sylcount: continue sylcount[word] = pronouncing.syllable_count(phones) print "Counting syllables" by_pos_count = defaultdict(set) for word, tag in brown.tagged_words(): if word[0].isupper() and not tag.startswith("NP"): continue tag = tag.split('-')[0] try: count = sylcount[word.lower()] except: continue by_pos_count[tag, count].add(word) print "Writing output" for tag, count in by_pos_count.keys(): filename = "wordlists/%s-%d.txt" % (tag, count) with open(filename, "w") as f: for word in sorted(by_pos_count[tag, count]):
def my(word): phones = pronouncing.phones_for_word(word) if phones: return pronouncing.syllable_count(phones[0]) else: return syllables_en.count(word)
wjdata_list = wjdata['results'][0]['captions'] # create empty storage for selected captiosn with fitting syllables (with either 5 or 7 syllables) syllables5 = [] syllables7 = [] syllables23 = [] # check all captions for fitting syllables (using pronouncingpy + CMU pronouncing dictionary) # add them to the empty storage for i in range (1, 83): try: text = wjdata['results'][0]['captions'][i - 1] phones = [pronouncing.phones_for_word(p)[0] for p in text.split()] count = sum([pronouncing.syllable_count(p) for p in phones]) for y in range (1, 2): if int(count) == 5: syllables5.append(wjdata['results'][0]['captions'][i - 1]) for x in range (0, 1): if int(count) == 7: syllables7.append(wjdata['results'][0]['captions'][i - 1]) for z in range (0, 1): if int(count) == 3 or int(count) == 2: syllables23.append(wjdata['results'][0]['captions'][i - 1]) # skip over errors caused by non-indexed word <UNK> in captions except IndexError: pass continue