Пример #1
0
class MalayalamMarkov:
    def __init__(self, input_db=None, output_db=None):
        malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR)
        malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE)
        if input_db:
            storage = SqliteStorage(db=input_db)
        elif output_db:
            storage = SqliteStorage(db=output_db)
        self.markov = MarkovText(scanner=malayalam_scanner,
                                 formatter=malayalam_formatter,
                                 storage=storage)

    def add_text(self, text):
        if text:
            self.markov.data(text)

    def predict(self, start, words, count):
        results = []
        for i in range(count):
            results.append(
                self.markov(max_length=words,
                            reply_to=start,
                            reply_mode=ReplyMode.END))
        return results

    def save(self):
        self.markov.save()

    def from_db(self, db_filename):
        storage = SqliteStorage(db=db_filename)
        self.markov = MarkovText.from_storage(storage)
Пример #2
0
async def markov(ctx, id):
    url = "https://www.fimfiction.net/story/download/" + id + "/txt"
    markovgenerate = MarkovText()
    print('Getting story with id {0}...'.format(id))
    markovgenerate.data(requests.get(url).content.decode(encoding="UTF-8"))
    print('Story received.')
    response = markovgenerate(max_length=5000)
    print('Markov chain response generated.')
    await ctx.send(response)
Пример #3
0
 def __init__(self, input_db=None, output_db=None):
     malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR)
     malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE)
     if input_db:
         storage = SqliteStorage(db=input_db)
     elif output_db:
         storage = SqliteStorage(db=output_db)
     self.markov = MarkovText(scanner=malayalam_scanner,
                              formatter=malayalam_formatter,
                              storage=storage)
Пример #4
0
    async def markovgen(self, ctx):
        randomized_int = random.randint(1, 602)
        async with aiofiles.open(f"markov/markov ({randomized_int}).txt") as f:
            text = MarkovText()
            async for line in f:
                text.data(line, part=True)

        clean = await commands.clean_content(fix_channel_mentions=True
                                             ).convert(ctx, text())
        await ctx.send(clean)
Пример #5
0
def test_markov_text_generate(mocker, ss, data, args, res):
    fmt = mocker.patch('markovchain.text.MarkovText.format', wraps=list)
    markov = MarkovText(parser=Parser(state_sizes=[ss]),
                        scanner=Scanner(lambda x: x),
                        storage=JsonStorage(backward=True))
    markov.data(data)
    if isinstance(res, type):
        with pytest.raises(res):
            markov(*args)
    else:
        assert markov(*args) == res
        assert fmt.call_count == 1
Пример #6
0
def markovchain_example():
    markov = MarkovText()

    with open('word_generation/definitions.txt') as fp:
        for line in fp:
            markov.data(line, part=True)
    markov.data('', part=False)

    print(markov(max_length=16) + '\n')
    print(markov(max_length=16, reply_to='sentence start',
                 reply_mode=ReplyMode.END) + '\n')
Пример #7
0
def main():
    conf, kwargs = get_config()
    chat_logger = logging.getLogger('chat')
    media_logger = logging.getLogger('media')
    loop = asyncio.get_event_loop()
    configure_logger(chat_logger,
                     log_file=conf.get('chat_log_file', None),
                     log_format='%(message)s',
                     log_level=logging.INFO)
    configure_logger(media_logger,
                     log_file=conf.get('media_log_file', None),
                     log_format='[%(asctime).19s] %(message)s',
                     log_level=logging.INFO)
    markov = MarkovText.from_file(conf['markov'], storage=SqliteStorage)
    bot = MarkovBot(markov,
                    chat_logger,
                    media_logger,
                    order=conf.get('order', None),
                    learn=conf.get('learn', False),
                    trigger=conf.get('trigger', None),
                    loop=loop,
                    **kwargs)
    shell = Shell(conf.get('shell', None), bot, loop=loop)
    try:
        task = loop.create_task(bot.run())
        if shell.task is not None:
            task_ = asyncio.gather(task, shell.task)
        else:
            task_ = task
        loop.run_until_complete(task_)
    except (CytubeError, SocketIOError) as ex:
        print(repr(ex), file=sys.stderr)
    except KeyboardInterrupt:
        return 0
    finally:
        task_.cancel()
        task.cancel()
        shell.close()
        loop.run_until_complete(task)
        if shell.task is not None:
            loop.run_until_complete(shell.task)
        markov.save()
        loop.close()

    return 1
Пример #8
0
 def from_db(self, db_filename):
     storage = SqliteStorage(db=db_filename)
     self.markov = MarkovText.from_storage(storage)
Пример #9
0
import finmeter, random, lwvlib, json, re
from uralicNLP import semfi
from uralicNLP import uralicApi
from nltk.tokenize import RegexpTokenizer
from collections import Counter
import numpy as np
from numpy.random import choice
from markovchain import JsonStorage
from markovchain.text import MarkovText, ReplyMode

wv = lwvlib.load("fin-word2vec-lemma.bin", 10000, 500000)
vowels = ['a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö']
markov = MarkovText.from_file('kalevala_and_others_markov.json')
fallback_markov = MarkovText.from_file('fallback_markov.json')


def count_syllables(verse):
    ''' Count syllables in a verse '''

    tokenizer = RegexpTokenizer(r'\w+')
    n_syllables = 0
    for word in tokenizer.tokenize(verse):

        try:
            n_syllables += len(finmeter.hyphenate(word).split("-"))
        except Exception as e:
            pass
            # print(e)
            # print(verse)
            # print("Error täällä: count_syllables")
Пример #10
0
#Not my code, this was just to test the Markov Chain Python in-built library. Source of the setup and code goes to: http://dead-beef.tk/markovchain/
from markovchain import JsonStorage
from markovchain.text import MarkovText, ReplyMode

markov = MarkovText()

#with open('data_extracted_6.txt') as fp:
with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp:
    markov.data(fp.read())

with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp:
    for line in fp:
        markov.data(line, part=True)
markov.data('', part=False)

print(markov())
print(markov(max_length=40, reply_to='sentence start', reply_mode=ReplyMode.END))

markov.save('markov.json')

markov = MarkovText.from_file('markov.json')


#NOTE: Fix the whole Sentence Start. Shinanigan in the output - why is it even printing that out?
Пример #11
0
def generate_markov(input_path):
    markov = MarkovText()
    with open(input_path) as fp:
        markov.data(fp.read())

    return markov
Пример #12
0
def construct_model():
    return MarkovText(parser=Parser([3]))
Пример #13
0
def test_markov_text_data(mocker):
    mock = mocker.patch('markovchain.Markov.data', return_value=1)
    markov = MarkovText()
    assert markov.data([1, 2], True) == 1
    mock.assert_called_once_with([1, 2], True)
Пример #14
0
def test_markov_text_format(test, join_with):
    fmt = Mock(return_value=2)
    markov = MarkovText(formatter=fmt)
    markov.storage.state_separator = join_with
    assert markov.format(test) == 2
    fmt.assert_called_with(join_with.join(test))