示例#1
0
def generate_unigram_sentences(unigram_model, number):
	for i in range(number):
		sentence = ""
		word = None
		while word not in END_SENTENCE_PUNCT:			
			word = random.choice(unigram_model).strip()
			sentence, word = add_word_to_sentence(sentence, word)
		print sentence
		print "\n"
示例#2
0
def generate_unigram_sentences(unigram_model, number):
    for i in range(number):
        sentence = ""
        word = None
        while word not in END_SENTENCE_PUNCT:
            word = weighted_random_pick(unigram_model)
            sentence, word = add_word_to_sentence(sentence, word)
        try:
            print sentence
        except:
            print "Could not print sentence due to an unrecognized character."
        print "\n"
示例#3
0
def generate_trigram_sentences(trigram_model, number):
    for i in range(number):
        one = "."
        while one in ALL_PUNCT:
            one = random.choice(trigram_model.keys())
            two = "."
            all_sub_keys_punct = True
            for key in trigram_model[one].keys():
                if key not in ALL_PUNCT:
                    all_sub_keys_punct = False
            if all_sub_keys_punct:
                one = "."  # Choose new first word if all following tokens are punctuation

            while two in ALL_PUNCT:
                two = random.choice(trigram_model[one].keys())
                all_sub_keys_punct = True
                for key in trigram_model[one][two].keys():
                    if key not in ALL_PUNCT:
                        all_sub_keys_punct = False
                if all_sub_keys_punct:
                    one = "."  # Don't choose a pair of words that are only ever followed by punctuation
                    two = "a"
                    break

        sentence, x = add_word_to_sentence("", one)
        sentence, x = add_word_to_sentence(sentence, two)
        word = None
        while word not in END_SENTENCE_PUNCT:
            word = word_from_trigram_model_and_previous_word(trigram_model, one, two)
            sentence, word = add_word_to_sentence(sentence, word, override=True)
            if word:
                one = two
                two = word
        try:
            print sentence
        except:
            print "Could not print sentence due to an unrecognized character."
        print "\n"
示例#4
0
def generate_bigram_sentences(bigram_model, number, sentence="", starting_word="."):
	for i in range(number):
		if starting_word != "." and not bigram_model.get(starting_word):
			starting_word = "."
			sentence = ""
			print "Error occured, starting word '" + starting_word + "' "
		while starting_word in END_SENTENCE_PUNCT:
			starting_word = random.choice(bigram_model.keys())
		if not sentence: sentence = starting_word.title()
		base_word = starting_word
		word = None
		while word not in END_SENTENCE_PUNCT:
			word = word_from_bigram_model_and_previous_word(bigram_model, base_word)
			sentence, word = add_word_to_sentence(sentence, word)
			if word:
				base_word = word
		try:
			print sentence
		except:
			print "Could not print sentence due to an unrecognized character."
		print "\n"