示例#1
0
def dev():
    import core.markovchain as mc
    mcW = mc.MarkovChain()
    sm = sentence.SentenceMaker(mcW)
    ss = sentences_from_thesaurus(["cat", "anger", "sorry", "cat"], mcW)
    for s in ss:
        sent = sm.polish_sentence(s)
        print("   " + sm.to_string(sent))
示例#2
0
def seq_to_para(seq, mc):
    """Takes a sequence of seed phrases & returns one sentence for each, as a list of lists of words"""
    sm = sentence.SentenceMaker(mc)
    para = []
    for seed in seq:
        seed_tokens = seed.split()
        tokens = sm.generate_sentence_tokens(seed_tokens)
        para.append(tokens)  # ss.to_string(tokens) + "  "
    return para
示例#3
0
 def make_speech_bits(self, seeds):
     n = len(seeds)
     speaker_id = self.speaker_sequence(0, n)
     speech_acts_seq = self.speech_sequence(n)
     bits = []
     ss = sentence.SentenceMaker(self.mc)
     for i in range(n):
         sent_toks = ss.generate_sentence_tokens([seeds[i]], self.target_len[speaker_id[i]])
         sent_toks = ss.polish_sentence(sent_toks)
         bits.append({'speaker_name': self.speakers[speaker_id[i]]["name"],
                      'speech_act': speech_acts_seq[speaker_id[i]],
                      'seq_id': speaker_id[i],
                      'speech': sent_toks,
                      'paragraph': True})
     return(bits)
示例#4
0
def load_text():
    # mcW = mc.MarkovChain()
    # mcW.delete_all_in_redis_careful()

    doc = bs.get_clean_text(107)
    cache = store.files(store.Storage_type.local_dev)
    cache.write_text(doc['text'], "107.txt")
    with open("database/resources/texts/107.txt") as file_in:
        text = file_in.read()

    tokens = tokenize.tokenize(text[2000:3000])
    print(tokens['tokens'][0:50])
    print(tokens['entities'])
    #     mcW.train_words(tokens['tokens'])
    #     mcW.append_ner(tokens['entities'])

    generator = sentence.SentenceMaker(mc)
    s = generator.generate_sentence_tokens(["the", "man"])
    print(" ".join(s))
示例#5
0
def dev():
    mcW = mc.MarkovChain()
    w2vec = w2v.WordVectors()
    seeds = w2vec.path("swim", 20)
    print(seeds)
    generator = sentence.SentenceMaker(mcW)
    for sd in seeds:
        sen = generator.generate_sentence_tokens([sd])
        sen = generator.polish_sentence(sen)
        print("  " + sentence.SentenceMaker.to_string(sen))
    print('')

    nm = names.NameMaker()
    speakers = [nm.random_person() for i in range(1, 4)]
    dm = dialogue.dialogue_maker([n['name'] for n in speakers],
                                 [n['pronoun'] for n in speakers], mcW, seeds)
    dlg = dm.make_dialogue()
    for s in dlg:
        print("  " + sentence.SentenceMaker.to_string(s))

    story_grammar.make_story(generator)
示例#6
0
import argparse
import core.sentence as sentence
import core.markovchain as mc
import core.paragraphs as paras
import nlp.story_grammar as story_grammar
import knowledge.names as names
import core.dialogue as dialogue

mcW = mc.MarkovChain()
generator = sentence.SentenceMaker(mcW)


def make_seeds(seeds, max=5):
    if seeds is None:
        seeds = generator.generate_sentence_tokens(["the"])  # Make a random sentence of seeds
        seeds = seeds[0:max]
    return seeds


def make_sentences(n, seeds=None):
    seeds = make_seeds(seeds)
    # if seeds is None:
    #     seeds = generator.generate_sentence_tokens(["the"]) # Make a random sentence of seeds
    if isinstance(seeds, str):
        seeds = seeds.split()
    p = paras.seq_to_para(seeds[0:n], mcW)
    for sent in p:
        print(generator.to_string(generator.polish_sentence(sent)))


def make_story():