def generate_unigram_sentences(unigram_model, number): for i in range(number): sentence = "" word = None while word not in END_SENTENCE_PUNCT: word = random.choice(unigram_model).strip() sentence, word = add_word_to_sentence(sentence, word) print sentence print "\n"
def generate_unigram_sentences(unigram_model, number): for i in range(number): sentence = "" word = None while word not in END_SENTENCE_PUNCT: word = weighted_random_pick(unigram_model) sentence, word = add_word_to_sentence(sentence, word) try: print sentence except: print "Could not print sentence due to an unrecognized character." print "\n"
def generate_trigram_sentences(trigram_model, number): for i in range(number): one = "." while one in ALL_PUNCT: one = random.choice(trigram_model.keys()) two = "." all_sub_keys_punct = True for key in trigram_model[one].keys(): if key not in ALL_PUNCT: all_sub_keys_punct = False if all_sub_keys_punct: one = "." # Choose new first word if all following tokens are punctuation while two in ALL_PUNCT: two = random.choice(trigram_model[one].keys()) all_sub_keys_punct = True for key in trigram_model[one][two].keys(): if key not in ALL_PUNCT: all_sub_keys_punct = False if all_sub_keys_punct: one = "." # Don't choose a pair of words that are only ever followed by punctuation two = "a" break sentence, x = add_word_to_sentence("", one) sentence, x = add_word_to_sentence(sentence, two) word = None while word not in END_SENTENCE_PUNCT: word = word_from_trigram_model_and_previous_word(trigram_model, one, two) sentence, word = add_word_to_sentence(sentence, word, override=True) if word: one = two two = word try: print sentence except: print "Could not print sentence due to an unrecognized character." print "\n"
def generate_bigram_sentences(bigram_model, number, sentence="", starting_word="."): for i in range(number): if starting_word != "." and not bigram_model.get(starting_word): starting_word = "." sentence = "" print "Error occured, starting word '" + starting_word + "' " while starting_word in END_SENTENCE_PUNCT: starting_word = random.choice(bigram_model.keys()) if not sentence: sentence = starting_word.title() base_word = starting_word word = None while word not in END_SENTENCE_PUNCT: word = word_from_bigram_model_and_previous_word(bigram_model, base_word) sentence, word = add_word_to_sentence(sentence, word) if word: base_word = word try: print sentence except: print "Could not print sentence due to an unrecognized character." print "\n"