def generate_words_list(src_text, chars_max_count=0, words_max_count=0): if chars_max_count <= 0 and words_max_count <= 0: raise ValueError('Either chars_max_count or words_max_count must be positive.') output_words = [] src_text = src_text.lower() cleared_text = Text.clear_input_text(src_text) words = Text.split_text_alnum(cleared_text) punct_words = Text.split_text_punct(cleared_text) chain = MarkovChain() for i in range(1, len(words)): chain.add_pair(words[i - 1].root, words[i]) punct_chain = MarkovChain() for i in range(len(punct_words) - 1): if not Text.is_punct(punct_words[i]): if Text.is_punct(punct_words[i + 1]): punct_chain.add_pair(punct_words[i].root, punct_words[i + 1].root) else: punct_chain.add_pair(punct_words[i].root, '') suffix_chain = MarkovChain() # (previous suffix, current root) -> current suffix for i in range(1, len(words)): suffix_chain.add_pair((words[i - 1].suffix, words[i].root), words[i].suffix) next_word, prev_word = Text.Word(''), Text.Word('') generated_chars = 0 while True: prev_word = next_word if next_word.is_empty(): next_word = random.choice(words) else: next_word = chain.get_next_word(next_word.root, lambda: Text.Word('')) suffix = suffix_chain.get_next_word((prev_word.suffix, next_word.root), lambda: '') if len(suffix) == 0: suffix = next_word.suffix punct = punct_chain.get_next_word(next_word.root, lambda: '') if len(output_words) == 0 or output_words[-1].is_ending_word(): res_word = Text.PunctedWord(next_word.root.capitalize(), suffix, punct) else: res_word = Text.PunctedWord(next_word.root, suffix, punct) output_words += [res_word] generated_chars += len(res_word) if chars_max_count > 0 and generated_chars > chars_max_count: break if words_max_count > 0 and len(output_words) > words_max_count: break output_words[-1].punct = '.' return output_words
def render_page(): my_list = read_file('tintern_abbey.txt') chain = MarkovChain(my_list) num_words = int(10) - 1 my_sentence = chain.walk(num_words) my_list2 = read_file("the_rime.txt") chain2 = MarkovChain(my_list2) num_words2 = int(10) - 1 my_sentence2 = chain2.walk(num_words2) return render_template('index.html', sentence=my_sentence, sentence2=my_sentence2)
def make_the_words(): # build Histogram # my_file = open("./words.txt", "r") # # absolute path -> ./file.ext ## more fuctional for live deploy # lines = my_file.readlines() filename = "transient.txt" lines = open(filename, "r").readlines() transient_txt_words = [] # word_list for line in lines: wordslist = line.split(' ') for word in wordslist: word = word.strip(' . , ;" \n _ ?') transient_txt_words.append(word) my_histogram = histogram(transient_txt_words) # put together words into a sentence sentence = '' num_words = 10 ''' # comment out to impliment markov for i in range(num_words): word = sample_by_frequency(my_histogram) sentence = sentence + " " + word ''' # uncomment to impliment markov markovchain = MarkovChain(transient_txt_words) sentence = markovchain.walk(num_words) return sentence
def markov(num=0): list_of_words = words_list() markovChain = MarkovChain(list_of_words) sentence = markovChain.walk(10) return sentence
def __init__(self, sources, order): ''' sources: list of text sources order: Markov Chain order. The higher the order, the more the model "remembers" its history. ''' self.order = order self.sentences = self.parse_sources(sources) self.markov_chain = MarkovChain(order, self.sentences)
def __init__(self, filename): self.filename = filename self.ticks = None self.tempo = get_tempo(filename) self.chain = MarkovChain() self.parse()
def run(self): print ("Loading input and generating...") fileload, resolution, format = loadMidi.load('midi/bach_simple.mid') stringNotes = convert.listToString(fileload) mc = MarkovChain(1) mc.add_string(stringNotes) markovNotes = ' '.join(mc.generate_text(50)) writeMidi.writeList(convert.stringToList(markovNotes), resolution, format) print ('Process complete, output is in ./rebuilt.mid')
def generate_text(input, output): if os.path.isfile(args.o): os.remove(args.o) num_words = args.n contents = read_file(input) wordlist = contents.split(' ') markov = MarkovChain(wordlist) with open(output, 'a+') as f: f.write(markov.generate_text(num_words))
def gen_word(): my_file = open("./words.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) sentence = "" num_words = 10 # for i in range(num_words): # word = sample(my_histogram) # sentence += " " + word markovchain = MarkovChain(lines) sentence = markovchain.walk(num_words) return sentence
def generate_words(): '''my_histogram = (lines) sentence = "" num_words = 10 for i in range (num_words): word = weighted_sample(my_histogram) sentence += " " + word return sentence''' markovchain = MarkovChain( ["one", "fish", "two", "fish", "red", "fish", "blue", "fish"]) return markovchain.walk(10)
def main(): print("Loading data...") text = load_data("data/philosophy").lower() text = preprocess(text) print("Building Markov model...") mc = MarkovChain() mc.train_ngram(1, text) mc.train_ngram(2, text) mc.train_ngram(3, text) bot = Bot(mc) bot.run()
def handle_data(): word1 = request.form["1"] word2 = request.form["2"] m = MarkovChain("Database/w3_.db", n = 3) # pprint(m.query(*(options.predict[0]))) ngram = [word1, word2] res = m.query(*ngram).keys() # Actual Result print(res) # word = list(res)[random.randint(0, 2)] # print(word, end = ' ') response = jsonify(dict(zip(["predict1", "predict2", "predict3"], list(res)[:3]))) response.headers.add('Access-Control-Allow-Origin', '*') return response
def create_sentence(word_num): source_text = "nietsche.txt" with open(source_text, "r") as file: og_text = file.read() word_list = og_text.split() for index, word in enumerate(word_list): word_list[index] = word.rstrip() chain = MarkovChain(word_list) chain.print_chain() sentence_words = [] sentence = chain.walk(word_num) return sentence
def run_dir(base_path, authors_path): text = '' path = base_path + authors_path + '/' files = [name for name in os.listdir(path) if '.txt' in name] for f in files: with open(path + f, 'r') as f: text += f.read() # special treatment for wittgenstein formulas text = re.sub(r'“(.+?)”', '', text) markov = MarkovChain(text=text) bipolar_discourse = markov.generate(100) print repr(bipolar_discourse) save(authors_path + '.txt', bipolar_discourse)
def main(): # Get our training data, Brown corpus b_sents = brown.sents() b_pos = nltk.pos_tag_sents(b_sents) # Filter to only verb phrases b_verbs = verb_filter(b_pos) # Fit our MarkovChain b_mc = MarkovChain(order=1) b_mc.fit(b_verbs) b_mc.normalize_transition_matrix() examples = load_examples('examples.json') for ex in examples: compare_sentences(mc=b_mc, s1=ex['good'], s2=ex['bad'])
def generate_words(): my_file = open("./words.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) sentence = "" num_words = 10 # for i in range(num_words): # word = sample_by_frequency(my_histogram) # sentence += " " + word markovchain = MarkovChain(word_list) sentence = markovchain.walk(num_words) return sentence
def generate_words(): words_list = [] with open('./EAP.text') as f: lines = f.readlines() for line in lines: for word in line.split(): words_list.append(word) #lines = Dictogram(['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish']) markovchain = MarkovChain(words_list) '''sentence = "" num_words = 20 for i in range(num_words): word = lines.sample() sentence += " " + word return sentence''' sentence = markovchain.walk(24) return render_template('index.html', sentence=sentence)
def hello(): # hs = histogram("words.txt") # samp = sample(hs) my_file = open("./words.txt", "r") lines = my_file.readlines() word_list = [] for line in lines: for word in line.split(): word_list.append(word) print(word_list) markovchain = MarkovChain(word_list) # return samp # num_words = 10 return (markovchain.walk(20))
def generate_words(): #Build a histogram my_file = histogram("./text.txt") lines = my_file.readlines() my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) sentence = "" num_words = 10 # for i in range(num_words): # #sample/frequency goes here # word = sample(my_histogram) # sentence += " " + word # return sentence markovchain = MarkovChain(word_list) sentence = markovchain.walk(num_words) return sentence
def generate_words(): #build a histogram my_file = open("words.txt","r") lines = my_file.readlines() my_histogram = Histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) word = weighted_sample(my_histogram) #return word sentence = "" num_words = 10 # for i in range(num_words): # word = weighted_sample(my_histogram) # sentence += " " + word markovChain = MarkovChain(word_list) sentence = markovChain.walk(num_words) print("sentence", sentence) return sentence
class MarkovTests(unittest.TestCase): # setup chain object source_file = "the_black_cat.txt" with open(source_file, encoding="utf-8") as f: text = f.read() m = MarkovChain(text) def test_start(self): """Check that the correct start is used""" start = "From" predicted = self.m.predict(20, start) self.assertTrue(predicted.startswith(start)) pass def test_length(self): """Check that the chain outputs the correct number of words""" n = 100 predicted = self.m.predict(100) tokens = findall(d_re_pattern, predicted, d_re_flags) expected = n actual = len(tokens) self.assertEqual(actual, expected)
def __init__(self, bot_token, master=None): self.bot_token = bot_token self.session_id = None self.acl = {master.upper(): 1000} if master is not None else {} self._default_handlers() self.mc = MarkovChain() self.nn_temp = 0.7 self.seq_num = None self.hb_every = -1 self.hb_task = None self.ident = ('', '', '') #user,disc,nick self.ident_id = '' self.guilds = {} self.cmd_re = re.compile('\\.(\\S+)') self.approver = srl_approve.SRLApprove()
def generate_words(): #build a histogram # my_file = open("despacito.txt","r") lines = "one fish two fish red fish blue fish" my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) word = sample(my_histogram) #return word sentence = "" num_words = 10 # for i in range(num_words): # word = weighted_sample(my_histogram) # sentence += " " + word markovChain = MarkovChain(word_list) sentence = markovChain.walk(num_words) print("sentence", sentence) return sentence
def render_page(): my_list = read_file('plato.txt') chain = MarkovChain(my_list) num_words = int(10) - 1 my_sentence = chain.walk(num_words) my_sentence_2 = chain.walk(num_words) my_sentence_3 = chain.walk(num_words) my_sentence_4 = chain.walk(num_words) my_sentence_5 = chain.walk(num_words) return render_template('index.html', sentence=my_sentence, sentence2=my_sentence_2, sentence3=my_sentence_3, sentence4=my_sentence_4, sentence5=my_sentence_5)
def main(): # read file, return list of tweets list_of_tweets = read_file() # returns 2 lists # first is list of each word used in all of my tweets # second is list of unigrams and frequencies i.e. {(word, frequency)} lengths, list_of_words, word_freq = get_counts(list_of_tweets) # print(word_freq) # get top users I have replied to top_users = get_top_users(word_freq) # return bigrams where {(word1, word2): frequency} bigrams = get_bigrams(list_of_tweets) # print(bigrams) # print(bigrams.keys()) # create model for calculating the probability of a given sentence model = bigram_model(word_freq, bigrams) # print(model) #create Markov chain m = MarkovChain() m.learn(bigrams.keys()) # print(m.memory) # generate length probability dictionary {length of sentence: frequency of sentences of that length} length_prob = length_probability(lengths) # output generated sentences, must have probability > 0.5 generated_tweets = generate_sentences(length_prob, m, model, word_freq, 50) for tweet in generated_tweets: print(tweet)
def __init__(self, title_bank): self.markov = MarkovChain(3) for item in title_bank.values(): self.markov.add(item['title'].replace('—', '-'))
def __init__(self, nick, log=False): GenericChatBot.__init__(self, nick, log) MucRoomHandler.__init__(self, nick) self.responder = MarkovChain(PidginLogs('~/.purple/logs/jabber/'))
def randomized_markov(word_list, num): markov_chain = MarkovChain(word_list) return f'''
from markov import MarkovChain dan = MarkovChain('dan.txt') # baldur = MarkovChain('baldur.txt') print(dan.get_new_sentence(12).upper()) # print(baldur.get_new_sentence(15))
app = FlaskAPI(__name__) CORS(app) words = open('./text_files/words.txt', 'r') words = ''.join(words.readlines()).split() # listo = Listogram(words).listogram_samples(10) # dicto = Dictogram(words).dictogram_samples(10) # histograms = { # 0: listo, # 1: dicto # } MC = MarkovChain(words) @app.route("/", methods=['GET']) def notes_list(): """ List or create notes. """ request.method == 'GET' sentences = MC.walk(10) return sentences # @app.route("/int:key", methods=['GET']) # def one_note(key):