def sentence_gen(): histo_text = get_words('siddhartha.txt') histo = histogram(histo_text) random_word = sample(histo) random_words = [] for i in range(7): random_words.append(sample(histo)) random_sentence = sentence_maker(random_words) return random_sentence
max_freq = max(word_dict.values()) rand_frequency = random.uniform(0, max_freq) list_from_words = list(word_dict) while True: rand_index = random.randint(0, len(word_dict) - 1) selected_word = list_from_words[rand_index] if word_dict[selected_word] >= rand_frequency: return selected_word def make_sentence(chain, starting_words, sentence_len): previous_words = starting_words selected_list = [starting_words[0], starting_words[1]] for _ in range(sentence_len - 2): selected_word = next_word(chain, previous_words) if selected_word is not None: selected_list.append(selected_word) previous_words = (previous_words[1], selected_word) sentence = ' '.join(selected_list) return sentence if __name__ == '__main__': word_list = get_words('life.txt') chain = markov_chain(word_list) list_from_chain = list(chain) random_words = random.choice(list_from_chain) # print(random_words) sentence = make_sentence(chain, random_words, 5) print(sentence)
import random from dictogram import Dictogram from histogram import get_words from stochastics import sampler corpus = get_words("corpus/corpus-0.txt") def first_order(corpus_of_words): words = corpus_of_words chain = {} corpus_length = len(corpus_of_words) for i, key in enumerate(words): if corpus_length > (i + 1): word = words[i + 1] if key not in chain: chain[key] = Dictogram([word]) else: chain[key].add_count(word) return chain def second_order(corpus_of_words): words = corpus_of_words # list of strings chain = {} # dict to hold Markov states, key: word, value: histogram corpus_length = len(words) for i, word1 in enumerate(words): if i + 2 >= corpus_length:
return self[tokens].sample() def build_sentence(self, num_words, words_list): self.build_state_histogram(words_list) sentence = [] first_words = random.choice(list(self.keys())) tokens = Queue(first_words) sentence.extend(first_words) total_words = len(first_words) while total_words < num_words: next_word = self.get_next_word(tuple(tokens.items())) if next_word == '**STOP**': sentence.append(next_word) break sentence.append(next_word) tokens.dequeue() tokens.enqueue(next_word) total_words += 1 return ' '.join(sentence) if __name__ == '__main__': text = 'text/three_wishes.txt' words_list = histogram.get_words(text) markov = MarkovChain(4) # markov.build_state_histogram() # for key in markov: # print(key, markov[key]) print(markov.build_sentence(6, words_list))
total_freq += index[1] print(total_freq) random_num = random.uniform(0, 1) #returns random sample words for index in listogram: chance += index[1] / total_freq if chance >= random_num: return index[0] if __name__ == '__main__': #using histogram functions to get corpus histo_text = get_words('fish.txt') histo = histogram(histo_text) test = total_freq(histo) print(test) #sampling using dictionary method sample_word = sample(histo) probability = print_probability(histo) print(sample_word) #testing that sampling function actually works # outcomes = test_probability(histo) # results = results_histogram(outcomes) # print(results) #list implementation of sampling
def random_word(source_text): words_list = histogram.get_words(source_text) return random.choice(words_list)
def sample(words_list): word_distribution = get_word_distribution(words_list) ran_num = random.randint(1, len(words_list)) for word in word_distribution: if ran_num in word_distribution[word]: return word def test(words_list): test = [] histogram_ = histogram.histogram_dict(words_list) for _ in range(10000): test.append(sample(words_list)) hist_ran_words = histogram.histogram_dict(test) for word in hist_ran_words: print(word, 'test', hist_ran_words[word]/10000, 'distribution', round((histogram_[word]/len(words_list)), 4)) return # params = sys.argv[1:] # words_list = histogram.get_words(params[0]) words_list = histogram.get_words('text/three_wishes.txt') print(sample(words_list)) # print(get_word_distribution(words_list)) # test(words_list)
dictionary = words if type(words) == list(): for word in words: dictionary = {} dictionary[word] = words.count(word) random_value = random.random() total_value = sum(dictionary.values()) total = 0 # number_wins = {} for key, value in dictionary.items(): total += value if total / total_value >= random_value: return key # if key in number_wins: # number_wins[key] += 1 # else: # number_wins[key] = 1 # break if __name__ == '__main__': words = get_words('animals.txt') histogram_dict = get_words_counts_dict(words) # sampler(words) print(test_sampler(words, 10000))