class MalayalamMarkov: def __init__(self, input_db=None, output_db=None): malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR) malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE) if input_db: storage = SqliteStorage(db=input_db) elif output_db: storage = SqliteStorage(db=output_db) self.markov = MarkovText(scanner=malayalam_scanner, formatter=malayalam_formatter, storage=storage) def add_text(self, text): if text: self.markov.data(text) def predict(self, start, words, count): results = [] for i in range(count): results.append( self.markov(max_length=words, reply_to=start, reply_mode=ReplyMode.END)) return results def save(self): self.markov.save() def from_db(self, db_filename): storage = SqliteStorage(db=db_filename) self.markov = MarkovText.from_storage(storage)
async def markov(ctx, id): url = "https://www.fimfiction.net/story/download/" + id + "/txt" markovgenerate = MarkovText() print('Getting story with id {0}...'.format(id)) markovgenerate.data(requests.get(url).content.decode(encoding="UTF-8")) print('Story received.') response = markovgenerate(max_length=5000) print('Markov chain response generated.') await ctx.send(response)
def __init__(self, input_db=None, output_db=None): malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR) malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE) if input_db: storage = SqliteStorage(db=input_db) elif output_db: storage = SqliteStorage(db=output_db) self.markov = MarkovText(scanner=malayalam_scanner, formatter=malayalam_formatter, storage=storage)
async def markovgen(self, ctx): randomized_int = random.randint(1, 602) async with aiofiles.open(f"markov/markov ({randomized_int}).txt") as f: text = MarkovText() async for line in f: text.data(line, part=True) clean = await commands.clean_content(fix_channel_mentions=True ).convert(ctx, text()) await ctx.send(clean)
def test_markov_text_generate(mocker, ss, data, args, res): fmt = mocker.patch('markovchain.text.MarkovText.format', wraps=list) markov = MarkovText(parser=Parser(state_sizes=[ss]), scanner=Scanner(lambda x: x), storage=JsonStorage(backward=True)) markov.data(data) if isinstance(res, type): with pytest.raises(res): markov(*args) else: assert markov(*args) == res assert fmt.call_count == 1
def markovchain_example(): markov = MarkovText() with open('word_generation/definitions.txt') as fp: for line in fp: markov.data(line, part=True) markov.data('', part=False) print(markov(max_length=16) + '\n') print(markov(max_length=16, reply_to='sentence start', reply_mode=ReplyMode.END) + '\n')
def main(): conf, kwargs = get_config() chat_logger = logging.getLogger('chat') media_logger = logging.getLogger('media') loop = asyncio.get_event_loop() configure_logger(chat_logger, log_file=conf.get('chat_log_file', None), log_format='%(message)s', log_level=logging.INFO) configure_logger(media_logger, log_file=conf.get('media_log_file', None), log_format='[%(asctime).19s] %(message)s', log_level=logging.INFO) markov = MarkovText.from_file(conf['markov'], storage=SqliteStorage) bot = MarkovBot(markov, chat_logger, media_logger, order=conf.get('order', None), learn=conf.get('learn', False), trigger=conf.get('trigger', None), loop=loop, **kwargs) shell = Shell(conf.get('shell', None), bot, loop=loop) try: task = loop.create_task(bot.run()) if shell.task is not None: task_ = asyncio.gather(task, shell.task) else: task_ = task loop.run_until_complete(task_) except (CytubeError, SocketIOError) as ex: print(repr(ex), file=sys.stderr) except KeyboardInterrupt: return 0 finally: task_.cancel() task.cancel() shell.close() loop.run_until_complete(task) if shell.task is not None: loop.run_until_complete(shell.task) markov.save() loop.close() return 1
def from_db(self, db_filename): storage = SqliteStorage(db=db_filename) self.markov = MarkovText.from_storage(storage)
import finmeter, random, lwvlib, json, re from uralicNLP import semfi from uralicNLP import uralicApi from nltk.tokenize import RegexpTokenizer from collections import Counter import numpy as np from numpy.random import choice from markovchain import JsonStorage from markovchain.text import MarkovText, ReplyMode wv = lwvlib.load("fin-word2vec-lemma.bin", 10000, 500000) vowels = ['a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö'] markov = MarkovText.from_file('kalevala_and_others_markov.json') fallback_markov = MarkovText.from_file('fallback_markov.json') def count_syllables(verse): ''' Count syllables in a verse ''' tokenizer = RegexpTokenizer(r'\w+') n_syllables = 0 for word in tokenizer.tokenize(verse): try: n_syllables += len(finmeter.hyphenate(word).split("-")) except Exception as e: pass # print(e) # print(verse) # print("Error täällä: count_syllables")
#Not my code, this was just to test the Markov Chain Python in-built library. Source of the setup and code goes to: http://dead-beef.tk/markovchain/ from markovchain import JsonStorage from markovchain.text import MarkovText, ReplyMode markov = MarkovText() #with open('data_extracted_6.txt') as fp: with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp: markov.data(fp.read()) with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp: for line in fp: markov.data(line, part=True) markov.data('', part=False) print(markov()) print(markov(max_length=40, reply_to='sentence start', reply_mode=ReplyMode.END)) markov.save('markov.json') markov = MarkovText.from_file('markov.json') #NOTE: Fix the whole Sentence Start. Shinanigan in the output - why is it even printing that out?
def generate_markov(input_path): markov = MarkovText() with open(input_path) as fp: markov.data(fp.read()) return markov
def construct_model(): return MarkovText(parser=Parser([3]))
def test_markov_text_data(mocker): mock = mocker.patch('markovchain.Markov.data', return_value=1) markov = MarkovText() assert markov.data([1, 2], True) == 1 mock.assert_called_once_with([1, 2], True)
def test_markov_text_format(test, join_with): fmt = Mock(return_value=2) markov = MarkovText(formatter=fmt) markov.storage.state_separator = join_with assert markov.format(test) == 2 fmt.assert_called_with(join_with.join(test))