def generate_status(tweet_list): '''(list) -> str, returns markov-generated status''' tweet_text = ' '.join(tweet_list) try: mc = MarkovGenerator(tweet_text, 90, tokenize_fun=twitter_tokenize) status = mc.generate_words().lower() return status except ValueError as e: print e pass
def test_train(self): generator = MarkovGenerator( MarkovGenerator.TrainStrategy(database=self.db, text_path='test_texts/test.txt', window_size=2)) manual_model = { 'end бои': { 'у': 1 }, 'бои у': { 'сопоцкина': 1 }, 'у сопоцкина': { 'и': 1 }, 'сопоцкина и': { 'друскеник': 1 }, 'и друскеник': { 'закончились': 1 }, 'друскеник закончились': { 'отступлением': 1 }, 'закончились отступлением': { 'германцев': 1 }, 'отступлением германцев': { '.': 1 }, 'германцев .': { 'end': 1 }, } model = dict() tokenizer = generator.strategy.tokenizer for line in self.db['model'].find(): line_key = ' '.join( [tokenizer.idx2word(idx) for idx in line['key'].split()]) line_value = { tokenizer.idx2word(key): value for key, value in line['value'].items() } model[line_key] = line_value self.assertEqual(manual_model, model)
def __init__(self, context, config): self.db = context.db.get('MarkovPlugin') self.tables = defaultdict(MarkovGenerator) for item in self.db.getAll(): self.tables[item.key] = MarkovGenerator.from_dict(item.value) print self.tables
class TextGrid(object): def __init__(self, path): # pp = pprint.PrettyPrinter(indent=4) self.generator = MarkovGenerator(n=1, max=3) self.path = path + '/textGrids/' #let's get the text from the textGrids, save the annotations in a dict, key=filename self.annotations = dict() for tgFile in os.listdir(self.path): if tgFile[-9:] == '.TextGrid': #print tgFile tg = tgt.read_textgrid(self.path + tgFile) file_annotations = [i for t in tg.tiers for i in t] for i in range(len(file_annotations)): a1 = file_annotations[i] filename = tgFile[:-9] self.annotations[a1.text] = (filename, a1) if i == len(file_annotations)-1: continue else: a2 = file_annotations[i+1] self.feedMarkov(a1,a2) # pp.pprint(self.annotations) def generateText(self, numLines): output = list() for i in range(numLines): output.append(self.generator.generate()) return output def feedMarkov(self, a1, a2): a1.text = a1.text.strip() a2.text = a2.text.strip() endFirst = a1.text[-1] startSecond = a2.text[0] first = (endFirst, a1) second = (startSecond, a2) self.generator.feed(first, second)
def generate_random_content(): global contents global titles def random_content(): return ".\n\n".join([(mg.say().strip()) for _i in range(4)]) start = time.time() print("Generating random content...", end="") sys.stdout.flush() mg = MarkovGenerator(2) txt = file("/rinjani/var/data/milton-paradise.txt").read() txt = re.sub('["*]*-', "", txt) mg.learn(txt) contents = [random_content() for _i in range(200)] titles = [re.sub(r"[^\w\s]+", "", truncate_words(mg.say().strip(), 10)) for _i in range(100)] print(" finished in %ds." % (time.time() - start)) sys.stdout.flush()
def generate_random_content(): global contents global titles def random_content(): return ".\n\n".join([(mg.say().strip()) for _i in range(4)]) start = time.time() print("Generating random content...", end="") sys.stdout.flush() mg = MarkovGenerator(2) txt = file('/rinjani/var/data/milton-paradise.txt').read() txt = re.sub('["*]*-', '', txt) mg.learn(txt) contents = [random_content() for _i in range(200)] titles = [ re.sub(r'[^\w\s]+', '', truncate_words(mg.say().strip(), 10)) for _i in range(100) ] print(" finished in %ds." % (time.time() - start)) sys.stdout.flush()
def __init__(self, path): # pp = pprint.PrettyPrinter(indent=4) self.generator = MarkovGenerator(n=1, max=3) self.path = path + '/textGrids/' #let's get the text from the textGrids, save the annotations in a dict, key=filename self.annotations = dict() for tgFile in os.listdir(self.path): if tgFile[-9:] == '.TextGrid': #print tgFile tg = tgt.read_textgrid(self.path + tgFile) file_annotations = [i for t in tg.tiers for i in t] for i in range(len(file_annotations)): a1 = file_annotations[i] filename = tgFile[:-9] self.annotations[a1.text] = (filename, a1) if i == len(file_annotations)-1: continue else: a2 = file_annotations[i+1] self.feedMarkov(a1,a2)
24 : [0,0,0,1,1,0,0,0],\ 25 : [0,0,0,1,1,0,0,1],\ 26 : [0,0,0,1,1,0,1,0],\ 27 : [0,0,0,1,1,0,1,1],\ 28 : [0,0,0,1,1,1,0,0],\ 29 : [0,0,0,1,1,1,0,1],\ 30 : [0,0,0,1,1,1,1,0],\ 54 : [0,0,1,1,0,1,1,0],\ 60 : [0,0,1,1,1,1,0,0],\ 62 : [0,0,1,1,1,1,1,0],\ 102 : [0,1,1,0,0,1,1,0],\ 126 : [0,1,1,1,1,1,1,0],\ 188 : [1,0,1,1,1,1,0,0]\ } generator = MarkovGenerator(n=3,max=2) # initialize the generator. for line in sys.stdin: line = line.lower() line = line.strip() generator.feed(line) def wordFromRule(lines,j): # inputs list and gets an n-gram. if j > 0: m = generator.generate(lines,j) # print m m = m.split(" ") return m[len(m)-1] else: return "" def newgen(x, y, rule):
import random from markov import MarkovGenerator from markov_by_char import CharacterMarkovGenerator # word MarkovGenerator generator = MarkovGenerator(n=2, max=500) # character MarkovGenerator #generator = CharacterMarkovGenerator(n=3, max=100) for line in open('white-album.txt'): line = line.strip() generator.feed(line) generator.feed(line) for line in open('black-album.txt'): line = line.strip() generator.feed(line) for i in range(3): print generator.generate()
import sys reload(sys) sys.setdefaultencoding('utf8') from markov import MarkovGenerator file = open("input/" + sys.argv[1]) lines = file.readlines() generator = MarkovGenerator(n=2, max=3000) for line in lines: generator.feed(line) text = generator.generate() print text
lines = file.readlines() nountypes = ["NN", "NNP", "NNS", "NNPS"] punc = [".",",",";","?","-",] newlines = "" for line in lines: tokens = nltk.word_tokenize(line) tagged = nltk.pos_tag(tokens) newline = line for idx, tag in enumerate(tagged): if any(tag[1] in n for n in nountypes): newword = random.choice(prefixes).rstrip().lower() + tag[0] newline = newline.replace(tag[0], newword) newlines += newline newlines += "\n" generator = MarkovGenerator(n=1, max=1000) generator.feed(newlines) genpoem = generator.generate() print newlines f = open(text+"-pref.txt", "w") f.write(newlines) f.close() p = open(text+"-markov.txt", "w") p.write(genpoem) p.close()
import random import sys from textblob import TextBlob, Word from nplc import NPLC #this object creates noun phrases from texts from rhymebot import rhyme_set, rhyming_word # #### add markov generator named underground and feed it all the source text #### from markov import MarkovGenerator underground = MarkovGenerator(5, 6) utext = [] for line in open('notesfromunderground.txt', 'r'): utext.append(line + ' ') utext_sentences = TextBlob(" ".join(utext).decode('ascii', errors='replace')).sentences for sentence in utext_sentences: underground.feed(sentence) print 'loaded 1/3' sailortext = [] for line in open('sailorswordbook.txt', 'r'): sailortext.append(line + ' ') sailor_sentences = TextBlob(" ".join(sailortext).decode('ascii', errors='replace')).sentences for sentence in sailor_sentences: underground.feed(sentence) print 'loaded 2/3' posttext = [] for line in open('etiquette.txt', 'r'): posttext.append(line + ' ')