示例#1
0
def generate_status(tweet_list):
    '''(list) -> str,
    returns markov-generated status'''
    tweet_text = ' '.join(tweet_list)
    try:
        mc = MarkovGenerator(tweet_text, 90, tokenize_fun=twitter_tokenize)
        status = mc.generate_words().lower()
        return status
    except ValueError as e:
        print e
        pass
示例#2
0
    def test_train(self):
        generator = MarkovGenerator(
            MarkovGenerator.TrainStrategy(database=self.db,
                                          text_path='test_texts/test.txt',
                                          window_size=2))

        manual_model = {
            'end бои': {
                'у': 1
            },
            'бои у': {
                'сопоцкина': 1
            },
            'у сопоцкина': {
                'и': 1
            },
            'сопоцкина и': {
                'друскеник': 1
            },
            'и друскеник': {
                'закончились': 1
            },
            'друскеник закончились': {
                'отступлением': 1
            },
            'закончились отступлением': {
                'германцев': 1
            },
            'отступлением германцев': {
                '.': 1
            },
            'германцев .': {
                'end': 1
            },
        }

        model = dict()
        tokenizer = generator.strategy.tokenizer
        for line in self.db['model'].find():
            line_key = ' '.join(
                [tokenizer.idx2word(idx) for idx in line['key'].split()])
            line_value = {
                tokenizer.idx2word(key): value
                for key, value in line['value'].items()
            }
            model[line_key] = line_value

        self.assertEqual(manual_model, model)
示例#3
0
    def __init__(self, context, config):
        self.db = context.db.get('MarkovPlugin')
        self.tables = defaultdict(MarkovGenerator)

        for item in self.db.getAll():
            self.tables[item.key] = MarkovGenerator.from_dict(item.value)
        print self.tables
示例#4
0
class TextGrid(object):
 	
	def __init__(self, path):
		# pp = pprint.PrettyPrinter(indent=4)
		self.generator = MarkovGenerator(n=1, max=3)

		self.path = path + '/textGrids/'
		#let's get the text from the textGrids, save the annotations in a dict, key=filename
		self.annotations = dict()

		for tgFile in os.listdir(self.path):
			if tgFile[-9:] == '.TextGrid':
				#print tgFile
				tg = tgt.read_textgrid(self.path + tgFile)
				file_annotations = [i for t in tg.tiers for i in t]
				for i in range(len(file_annotations)):
					a1 = file_annotations[i]
					filename = tgFile[:-9]
					self.annotations[a1.text] = (filename, a1)
					if i == len(file_annotations)-1:
						continue
					else:
						a2 = file_annotations[i+1]
						self.feedMarkov(a1,a2)

		# pp.pprint(self.annotations)
		

	def generateText(self, numLines):
		output = list()
		for i in range(numLines):
			 output.append(self.generator.generate())
		return output

	def feedMarkov(self, a1, a2):
		a1.text = a1.text.strip()
		a2.text = a2.text.strip()
		endFirst = a1.text[-1]
		startSecond = a2.text[0]
		first = (endFirst, a1)
		second = (startSecond, a2)
		self.generator.feed(first, second)
示例#5
0
文件: dummydata.py 项目: apit/rinjani
def generate_random_content():
    global contents
    global titles

    def random_content():
        return ".\n\n".join([(mg.say().strip()) for _i in range(4)])

    start = time.time()
    print("Generating random content...", end="")
    sys.stdout.flush()

    mg = MarkovGenerator(2)
    txt = file("/rinjani/var/data/milton-paradise.txt").read()
    txt = re.sub('["*]*-', "", txt)
    mg.learn(txt)
    contents = [random_content() for _i in range(200)]
    titles = [re.sub(r"[^\w\s]+", "", truncate_words(mg.say().strip(), 10)) for _i in range(100)]

    print(" finished in %ds." % (time.time() - start))
    sys.stdout.flush()
示例#6
0
def generate_random_content():
    global contents
    global titles

    def random_content():
        return ".\n\n".join([(mg.say().strip()) for _i in range(4)])

    start = time.time()
    print("Generating random content...", end="")
    sys.stdout.flush()

    mg = MarkovGenerator(2)
    txt = file('/rinjani/var/data/milton-paradise.txt').read()
    txt = re.sub('["*]*-', '', txt)
    mg.learn(txt)
    contents = [random_content() for _i in range(200)]
    titles = [
        re.sub(r'[^\w\s]+', '', truncate_words(mg.say().strip(), 10))
        for _i in range(100)
    ]

    print(" finished in %ds." % (time.time() - start))
    sys.stdout.flush()
示例#7
0
	def __init__(self, path):
		# pp = pprint.PrettyPrinter(indent=4)
		self.generator = MarkovGenerator(n=1, max=3)

		self.path = path + '/textGrids/'
		#let's get the text from the textGrids, save the annotations in a dict, key=filename
		self.annotations = dict()

		for tgFile in os.listdir(self.path):
			if tgFile[-9:] == '.TextGrid':
				#print tgFile
				tg = tgt.read_textgrid(self.path + tgFile)
				file_annotations = [i for t in tg.tiers for i in t]
				for i in range(len(file_annotations)):
					a1 = file_annotations[i]
					filename = tgFile[:-9]
					self.annotations[a1.text] = (filename, a1)
					if i == len(file_annotations)-1:
						continue
					else:
						a2 = file_annotations[i+1]
						self.feedMarkov(a1,a2)
	24	: [0,0,0,1,1,0,0,0],\
	25	: [0,0,0,1,1,0,0,1],\
	26	: [0,0,0,1,1,0,1,0],\
	27	: [0,0,0,1,1,0,1,1],\
	28	: [0,0,0,1,1,1,0,0],\
	29	: [0,0,0,1,1,1,0,1],\
	30	: [0,0,0,1,1,1,1,0],\
	54	: [0,0,1,1,0,1,1,0],\
	60	: [0,0,1,1,1,1,0,0],\
	62	: [0,0,1,1,1,1,1,0],\
	102	: [0,1,1,0,0,1,1,0],\
	126	: [0,1,1,1,1,1,1,0],\
	188	: [1,0,1,1,1,1,0,0]\
}

generator = MarkovGenerator(n=3,max=2) # initialize the generator.
for line in sys.stdin:
	line = line.lower()
	line = line.strip()
	generator.feed(line)

def wordFromRule(lines,j): # inputs list and gets an n-gram.
	if j > 0:
		m = generator.generate(lines,j)
		# print m
		m = m.split(" ")
		return m[len(m)-1]
	else:
		return ""

def newgen(x, y, rule):
示例#9
0
import random
from markov import MarkovGenerator
from markov_by_char import CharacterMarkovGenerator

# word MarkovGenerator
generator = MarkovGenerator(n=2, max=500)

# character MarkovGenerator
#generator = CharacterMarkovGenerator(n=3, max=100)

for line in open('white-album.txt'):
  line = line.strip()
  generator.feed(line)
  generator.feed(line)
  
for line in open('black-album.txt'):
  line = line.strip()
  generator.feed(line)

for i in range(3):
    print generator.generate()
示例#10
0
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from markov import MarkovGenerator

file = open("input/" + sys.argv[1])
lines = file.readlines()

generator = MarkovGenerator(n=2, max=3000)
for line in lines:
    generator.feed(line)
text = generator.generate()
print text
示例#11
0
lines = file.readlines()

nountypes = ["NN", "NNP", "NNS", "NNPS"]
punc = [".",",",";","?","-",]

newlines = ""
for line in lines:
	tokens = nltk.word_tokenize(line)
	tagged = nltk.pos_tag(tokens)
	newline = line
	for idx, tag in enumerate(tagged):
		if any(tag[1] in n for n in nountypes):
			newword = random.choice(prefixes).rstrip().lower() + tag[0]
			newline = newline.replace(tag[0], newword)

	newlines += newline
	newlines += "\n"


generator = MarkovGenerator(n=1, max=1000)
generator.feed(newlines)
genpoem = generator.generate()

print newlines

f = open(text+"-pref.txt", "w")
f.write(newlines)
f.close()
p = open(text+"-markov.txt", "w")
p.write(genpoem)
p.close()
import random
import sys
from textblob import TextBlob, Word
from nplc import NPLC	#this object creates noun phrases from texts
from rhymebot import rhyme_set, rhyming_word # 

#### add markov generator named underground and feed it all the source text ####
from markov import MarkovGenerator
underground = MarkovGenerator(5, 6)

utext = []
for line in open('notesfromunderground.txt', 'r'):
	utext.append(line + ' ')
utext_sentences = TextBlob(" ".join(utext).decode('ascii', errors='replace')).sentences
for sentence in utext_sentences:
	underground.feed(sentence)

print 'loaded 1/3'

sailortext = []
for line in open('sailorswordbook.txt', 'r'):
	sailortext.append(line + ' ')
sailor_sentences = TextBlob(" ".join(sailortext).decode('ascii', errors='replace')).sentences
for sentence in sailor_sentences:
	underground.feed(sentence)

print 'loaded 2/3'

posttext = []
for line in open('etiquette.txt', 'r'):
	posttext.append(line + ' ')