示例#1
0
def ask():
	message = str(request.form['messageText'])

	'''kernel = aiml.Kernel()
			
				if os.path.isfile("bot_brain.brn"):
				    kernel.bootstrap(brainFile = "bot_brain.brn")
				else:
				    kernel.bootstrap(learnFiles = os.path.abspath("aiml/std-startup.xml"), commands = "load aiml b")
				    kernel.saveBrain("bot_brain.brn")'''

	# kernel now ready for use
	while True:
	    if message == "quit":
	        exit()
	    else:
	    	idx_q = handleQueryFunc(message)
	    	#print (idx_q)
	    	output = model.predict(sess, idx_q.T)
	    	#print (output)
	    	q = data_utils.decode(sequence=idx_q[0], lookup=metadata['idx2w'], separator=' ')
	    	bot_response = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ').split(' ')
	    	#print (bot_response)
	    	respose= ' '.join(bot_response)
	    	print (respose)
	    	return make_response(jsonify({'status':'OK','answer':respose}))
示例#2
0
def send(message):

    #print("client",message)

    #print("send실행됐지롱!")
    print(message)
    data_len =limit['maxq']
    print('1')

    q_refine_list = (kor_data2.disintegration_kor(message))[0]

    print('2')
    print(q_refine_list)
    #print("q_refine_list",q_refine_list)
    idx_q = np.zeros([data_len, limit['maxq']], dtype=np.int32)
    print('3')
    message_tokenized = q_refine_list.split(' ')
    indices = kor_data2.pad_seq(message_tokenized, metadata['w2idx'], limit['maxq'])
    print("messagetokenized",message_tokenized)
    #print("indices",indices)
    #print("np.array",np.array(indices))
    #idx_q = idx_q+np.array(indices).reshape(1,limit['maxq']) #maxq행 1열 행렬만들기
    idx_q = idx_q + np.array(indices).reshape(1,limit['maxq'])
    print("idx_q",idx_q)
    output = model.predict(sess, idx_q)
    print("output_origin",output)
    print("output",output[0])
    output_decoded = data_utils.decode(sequence=output[0], lookup=metadata['idx2w'], separator=' ').split(' ')
    reply = ' '.join(output_decoded).replace('unk', '')
    #print("출력되라")
    print(reply)
    return reply
示例#3
0
    def sample_replies(self, sess, valid_set, metadata, batch_n):
        test_x = valid_set.__next__()[0]
        test_y_pred = self.predict(sess, test_x)

        log_file = open('logs/%d.txt' % batch_n, 'w')

        replies = []
        for ii, oi in zip(test_x.T, test_y_pred):
            q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
            decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ')
            if decoded not in replies:
                log_file.write('q: "%s"; a: "%s"\n' % (q, decoded))
                replies.append(decoded)

        log_file.write('%d/%d\n' % (len(replies), test_x.shape[1]))

        log_file.close()
示例#4
0
 def process_line(self, line):
     dic = self.metadata['w2idx']
     dic2 = self.metadata['idx2w']
     en = data.process_line(line, dic).reshape((25, 1))
     res = self.model.predict(self.sess, en)
     en2 = data_utils.decode(res[0], dic2)
     res2 = ""
     for word in en2:
         res2 = res2 + word + " "
     return res2
示例#5
0
def reply():
    input_msg_ = request.form['msg']
    msg = str(input_msg_).lower()
    msg = data.filter_line(msg, "0123456789abcdefghijklmnopqrstuvwxyz ")
    msg_arr = msg.split(' ')
    message = data.zero_pad_line(msg_arr, metadata['w2idx'])
    output = model.predict(sess, np.array(np.array(message.T)))
    decoded = data_utils.decode(sequence=output[0],
                                lookup=metadata['idx2w'],
                                separator=' ').split(' ')
    return jsonify({'text': ' '.join(decoded)})
示例#6
0
    def save_checkpoint(self, global_step, generate_sample=False):
        '''
        Saves the model state dict, and will generate a sample if specified
        '''
        checkpoint_name = os.path.join(self.checkpoints_dir, "model_checkpoint_step_{}.pt".format(global_step))
        torch.save(self.state_dict(), checkpoint_name)

        if generate_sample:
            generation = self.generate(length=120)
            stream = decode(generation)
            stream.write('midi', os.path.join(self.train_sample_dir, 'train_sample_checkpoint_step_{}.mid'.format(global_step)))
示例#7
0
def get_output(question, sess, w2idx, model, metadata):

    import data_utils
    from datasets.facebook2 import data

    # get output for input phrase
    idx_q, idx_a = data.process_input(question, w2idx)
    gen = data_utils.rand_batch_gen(idx_q, idx_a, 1)
    input_ = gen.__next__()[0]
    output = model.predict(sess, input_)

    # return ouput phrase
    for ii, oi in zip(input_.T, output):
        q = data_utils.decode(sequence=ii,
                              lookup=metadata['idx2w'],
                              separator=' ')
        decoded = data_utils.decode(sequence=oi,
                                    lookup=metadata['idx2w'],
                                    separator=' ').split(' ')
        return ' '.join(decoded)
示例#8
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        content = load_file_content(FLAGS.predict_file)
        tag_result = []
        errors_map = {}
        num = 0
        for word_line in content:
            num = num + 1
            sentences = ''.join(word_line)
            sen_list = sentences.split('。')
            flag = False
            if sen_list[-1] == '':
                flag = True
                sen_list = sen_list[:-1]
            word_tag_map = {}
            errors = []
            offset = 0
            for i in range(len(sen_list)):
                line = sen_list[i] + '。'
                if (i == len(sen_list) - 1) and (flag == False):
                    line = line[:-1]
                result = model.evaluate_line(sess,
                                             input_from_line(line, char_to_id),
                                             id_to_tag)
                tag_, errors_ = decode(result, word_line, offset)

                word_tag_map.update(tag_)
                errors = errors + errors_

                offset = offset + len(line)

            errors_map[num] = errors

            tag_result_ = []
            for i in range(len(word_line)):
                if i in word_tag_map.keys():
                    tag_result_.append(word_tag_map[i])
                else:
                    tag_result_.append('O')
            tag_result.append(' '.join(tag_result_) + '\n')

        dump_to_file(tag_result,
                     os.path.join(FLAGS.result_path, 'predict_result.txt'),
                     'w')
示例#9
0
def ask2(str):
    input_ = sentence_to_indexes(str)

    #print(input_.shape)
    #print(input_)
    output = model.predict(sess, input_)
    #print(output)
    #test = model.advance_predict(sess, input_)

    for i in output:
        decoded = data_utils.decode(sequence=i,
                                    lookup=metadata['idx2w'],
                                    separator=' ').split(' ')
        print('>>> {}'.format(' '.join(decoded)))
示例#10
0
 def get_response(self, text, metadata, sess):
     questions = [ text.lower() ]
     questions = [ filter_line(line, EN_WHITELIST) for line in questions ]
     answers = questions
     qlines, alines = filter_data(questions, answers)
     qtokenized = [ [w.strip() for w in wordlist.split(' ') if w] for wordlist in qlines ]
     atokenized = [ [w.strip() for w in wordlist.split(' ') if w] for wordlist in alines ]
     w2idx = pickle.load(open("datasets/cornell_corpus/w2idx.pkl","rb"))
     idx_q, idx_a = zero_pad(qtokenized, atokenized, w2idx)
     query = data_utils.rand_batch_gen(idx_q, idx_a, 1)
     input_q = query.__next__()[0]
     output = self.predict(sess, input_q)
     #print(input_q.shape)
     #replies = []
     for ii, oi in zip(input_q.T, output):
         q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
         decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ')
         '''
         if decoded.count('unk') == 0:
             if decoded not in replies:
                 print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded)))
                 replies.append(decoded)
         '''
         return ' '.join(decoded)
示例#11
0
def run(args):
    files = [INPUT_DIRECTORY + '/' + f for f in listdir(INPUT_DIRECTORY) if isfile(join(INPUT_DIRECTORY, f))]
    files.sort()

    if not os.path.exists(OUTPUT_DIRECTORY) or not os.path.isdir(OUTPUT_DIRECTORY):
        os.mkdir(OUTPUT_DIRECTORY)

    latex_ouput = ''

    for filename in files:
        latex_part = "\\paragraph{"
        latex_part += filename.split('/')[-1].split('.pdf.txt.txt')[0]
        latex_part += "}\n\\begin{enumerate}\n"

        f = open(filename,"r",encoding='utf-8', errors='ignore')
        sentences = f.readlines()
        sentences = [sentence.replace('\n', '') for sentence in sentences]

        for question in QUESTIONS:
            latex_part += "\\item " + question + "\\\\\n"
            latex_part += "$\\longrightarrow$ "
            reset_dict()
            testS, testQ, testA = process_data(sentences, question)
            answer, answer_probability, mem_probs = get_pred(testS, testQ)
            memory_probabilities = np.round(mem_probs, 4)

            best_sentence_index = 0
            best_sentence_score = 0
            # print(len(memory_probabilities.tolist()))
            for index, mem in enumerate(memory_probabilities.tolist()):
                if mem[2] > best_sentence_score:
                    best_sentence_index = index
                    best_sentence_score = mem[2]

            words_l = []
            for idw in testS[0][best_sentence_index]:
                if idw == 0:
                    break
                words_l.append(decode(idw))
            sentence = ' '.join(words_l)
            sentence.replace('%', '\\%')
            sentence.replace('_', '\\_')

            latex_part += sentence + "\n"
        latex_part += "\\end{enumerate}"
        latex_ouput += "\n" + latex_part
    f = open(join(OUTPUT_DIRECTORY, 'latex_out.txt'), 'w')
    f.write(latex_ouput)
示例#12
0
    # get last session
    sess = model.restore_last_session()
    # get string
    input_, answers = test_batch_gen.__next__()
    output = model.predict(sess, input_)

    modelsim = 0
    usersim = 0
    randomsim = 0
    simcount = 0
    lines = []
    # get questions, real answers and model answers
    for ii, ai, oi in zip(input_.T, answers.T, output):

        q = data_utils.decode(sequence=ii,
                              lookup=metadata['idx2w'],
                              separator=' ')
        a = data_utils.decode(sequence=ai,
                              lookup=metadata['idx2w'],
                              separator=' ')
        d = data_utils.decode(sequence=oi,
                              lookup=metadata['idx2w'],
                              separator=' ')
        d = custom_dict.translateSentence(d).strip()

        qarr = q.split(' ')
        aarr = a.split(' ')
        darr = d.split(' ')

        # random answer
        r = randomsentence(len(darr))
            sort_order = np.argsort(step_numbers)
            num_steps = step_numbers[sort_order[-1]]

            # gets the checkpoint path with the greatest number of steps
            last_checkpoint_path = checkpoints[sort_order[-1]]
            full_path = last_checkpoint_path

        print("Loading model weights from {}...".format(full_path))
        lstm2.load_state_dict(torch.load(full_path, map_location=device))

    for i in tqdm.tqdm(range(args.num_samples)):
        generation = lstm.generate(condition=args.condition,
                                   k=None,
                                   length=args.sample_len,
                                   temperature=args.temp)
        stream = decode(generation)

        if args.logdir2 is not '':

            generation2 = lstm2.generate(condition=args.condition,
                                         k=None,
                                         length=args.sample_len2,
                                         temperature=args.temp)
            stream2 = decode(generation2)

            # COMBINE THE SAMPLES
            combined_stream = m21.stream.Stream()
            first_part = m21.stream.Part(id='1')
            first_part.append(stream)
            second_part = m21.stream.Part(id='2')
            second_part.append(stream2)
示例#14
0
output = model.predict(sess, input_, aux_)
=======
input_aux_ = train_batch_gen.__next__()[3]
output, output_bwd = model.predict(sess, input_, input_aux_)
>>>>>>> 0d81aa006b893195d432ed2dfbbf7f930ec62226
print(output.shape)


# In[44]:


replies = []
<<<<<<< HEAD
for ii, oi in zip(input_.T, output):
    q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
    decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ')
    print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded)))
=======
for ii, ai, oi, oi_bwd in zip(input_.T, input_aux_.T, output, output_bwd):
    genre = ''
    
    if ai[0] > 0:
        genre = 'romance'
        lookup_ = rom_metadata
    else:
        genre = 'horror'
        lookup_ = hor_metadata
        
    q = data_utils.decode(sequence=ii, lookup=lookup_[genre+'idx2w'], separator=' ')
    decoded = data_utils.decode(sequence=oi, lookup=lookup_[genre+'idx2w'], separator=' ').split(' ')
示例#15
0
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/cornell_corpus/',
                                emb_dim=emb_dim,
                                num_layers=3)

# In[8]:

#val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32)
#train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

# In[9]:
#sess = model.restore_last_session()
#sess = model.train(train_batch_gen, val_batch_gen)
with tf.Session() as sess:
    saver = tf.train.Saver()
    saver.restore(sess, "./seq2seq_model.ckpt-44000")
    dic = metadata['w2idx']
    dic2 = metadata['idx2w']
    while (True):
        line = raw_input("Please enter your line")
        en = data.process_line(line, dic).reshape((25, 1))
        res = model.predict(sess, en)
        print res
        en2 = data_utils.decode(res[0], dic2)
        res2 = ""
        for word in en2:
            res2 = res2 + word + " "
        print res2
示例#16
0
        loss_path='',
        metadata=i_metadata,
        emb_dim=1024,
        num_layers=3
    )

    d_sess = d_model.restore_last_session()
    i_sess = i_model.restore_last_session()

    return d_model, i_model, d_sess, i_sess, d_metadata, i_metadata

if __name__ == '__main__':
    dm, im, ds, i_s, dmt, imt = get_model()

    txt = 'I like to reading'
    d_q = d_data.split_sentence(txt, dmt)
    input_ = d_q.T
    output_ = dm.predict(ds, input_)
    print(output_)
    answer = data_utils.decode(sequence=output_[0], lookup=dmt['idx2w'], separator=' ')
    print(answer)

    txt = 'I like to reading'
    i_q = IE_data.split_sentence(txt, imt)
    input_ = i_q.T
    output_ = im.predict(i_s, input_)
    print(output_)
    answer = data_utils.decode(sequence=output_[0], lookup=imt['idx2w'], separator=' ')
    print(answer)

yvocab_size = xvocab_size
emb_dim = 1024

import seq2seq_wrapper

import importlib
importlib.reload(seq2seq_wrapper)

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/opensubtitle/',
                                emb_dim=emb_dim,
                                num_layers=3)

sess = model.restore_last_session()

while True:
    query = input('Input:\t')

    if query == 'quit' or query == 'exit':
        exit(0)

    ids = data_utils.encode(sequence=query, lookup=metadata['w2idx'])
    output = model.predict(sess, ids)
    reply = data_utils.decode(sequence=output[0],
                              lookup=metadata['idx2w'],
                              separator=' ')
    print('Output:' + reply)
示例#18
0
                      model_name=model_name,
                      num_layers=num_layers)

sess = model.restore_last_session()
if train_model == True:
    sess = model.train(train_batch_gen, val_batch_gen)
else:
    input_, labels_ = val_batch_gen.__next__()
    output = model.predict(sess, input_)

    replies = []
    lbls = list()
    preds = list()

    for ii, il, oi in zip(input_.T, labels_.T, output):
        q = decode(sequence=ii, lookup=idx2block, separator=' ')
        l = decode(sequence=il, lookup=idx2block, separator=' ')
        o = decode(sequence=oi, lookup=idx2block, separator=' ')
        decoded = o.split(' ')

        if decoded.count('UNK') == 0:
            if decoded not in replies:
                if len(l) == len(o):
                    print('i: [{0}]\na: [{1}]\np: [{2}]\n'.format(
                        q, l, ' '.join(decoded)))
                    print("{}".format("".join(["-" for i in range(80)])))
                    lsplits = l.split()
                    osplits = o.split()
                    for lspl in lsplits:
                        match = re.match(r"(\d+)(\w)", lspl)
                        block, iotype = match.group(1), match.group(2)
def respond(msg):
    encoded_msg = data_utils.encode(msg, w2idx, limit['maxq'])
    response = model.predict(sess, encoded_msg)[0]
    return data_utils.decode(response, idx2w)
示例#20
0
    def train(self, train_set, valid_set, sess=None):

        # we need to save the model periodically
        with self.g.as_default():
            saver = tf.train.Saver()

            # if no session is given
            if not sess:
                # create a session
                sess = tf.Session()
                # init all variables
                sess.run(tf.global_variables_initializer())

            sys.stdout.write('\n<log> Training started </log>\n')
            # run M epochs
            for i in range(self.epochs):
                try:
                    self.train_batch(sess, train_set)
                    print(i)
                    if i and i % (
                            self.epochs //
                            10) == 0:  # TODO : make this tunable by the user

                        # save model to disk
                        saver.save(sess,
                                   self.ckpt_path + self.model_name + '.ckpt',
                                   global_step=i)
                        # evaluate to get validation loss
                        val_loss, replies = self.eval_batches(
                            sess, valid_set, 16)  # TODO : and this
                        # print stats
                        print(
                            '\nModel saved to disk at iteration #{}'.format(i))
                        print('val   loss : {0:.6f}'.format(val_loss))
                        # print('val res:')
                        # print(replies)
                        sys.stdout.flush()

                        # try preset data and save
                        if not self.loss_path == '':
                            with open(
                                    self.loss_path + 'preset' + str(i) +
                                    '.txt', 'w') as f:
                                for sentence in PRESET_DATA:
                                    question = data.split_sentence(
                                        sentence, self.meta_data)
                                    input_ = question.T
                                    output_ = self.predict(sess, input_)
                                    answer = data_utils.decode(
                                        sequence=output_[0],
                                        lookup=self.meta_data['idx2w'],
                                        separator=' ')
                                    f.write(sentence)
                                    f.write('\n')
                                    f.write(answer)
                                    f.write('\n')

                except KeyboardInterrupt:  # this will most definitely happen, so handle it
                    print('Interrupted by user at iteration {}'.format(i))
                    self.session = sess
                    return sess
示例#21
0
val_batch_gen = data_utils.rand_batch_gen(validX, validY, 256)
test_batch_gen = data_utils.rand_batch_gen(testX, testY, 256)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

# In[9]:

sess = model.train(train_batch_gen, val_batch_gen)

# In[7]:

sess = model.restore_last_session()

# In[10]:

input_ = test_batch_gen.__next__()[0]
output = model.predict(sess, input_)
print(output.shape)

# In[11]:

replies = []
for ii, oi in zip(input_.T, output):
    q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
    decoded = data_utils.decode(sequence=oi,
                                lookup=metadata['idx2w'],
                                separator=' ').split(' ')
    if decoded.count('unk') == 0:
        if decoded not in replies:
            print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded)))
            replies.append(decoded)
# load data from pickle and npy files
data_ctl, idx_words, idx_phonemes = data.load_data(PATH='datasets/cmudict/')
(trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_phonemes, idx_words)

# parameters
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 128
xvocab_size = len(data_ctl['idx2pho'].keys())
yvocab_size = len(data_ctl['idx2alpha'].keys())
emb_dim = 128

importlib.reload(seq2seq_wrapper)

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len, yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size,
                                ckpt_path='ckpt/cmudict/', emb_dim=emb_dim, num_layers=3)

val_batch_gen = data_utils.rand_batch_gen(validX, validY, 16)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, 128)

model.train(train_batch_gen, val_batch_gen)

sess = model.restore_last_session()

output = model.predict(sess, val_batch_gen.__next__()[0])
print(output.shape)

for oi in output:
    print(data_utils.decode(sequence=oi, lookup=data_ctl['idx2alpha'], separator=''))
        sort_order = np.argsort(step_numbers)
        num_steps = step_numbers[sort_order[-1]]

        # gets the checkpoint path with the greatest number of steps
        last_checkpoint_path = checkpoints[sort_order[-1]]
        full_path = last_checkpoint_path

    print("Loading MELODY model model weights from {}...".format(full_path))
    melody_lstm.load_state_dict(torch.load(full_path, map_location=device))

    for i in tqdm.tqdm(range(args.num_samples)):

        bass_out, melody_out = melody_lstm.generate(bassline_model=bassline_lstm, k=args.k, bass_temp=args.bass_temp,
                             bass_length=args.bass_sample_len, melody_temp=args.melody_temp, melody_length=args.melody_sample_len)

        bass_stream = decode(bass_out)
        melody_stream = decode(melody_out)

        combined_stream = m21.stream.Stream()
        bass_part = m21.stream.Part(id='bass')
        bass_part.append(bass_stream)
        melody_part = m21.stream.Part(id='melody')
        melody_part.append(melody_stream)

        combined_stream.insert(0, melody_part)
        combined_stream.insert(0, bass_part)

        # melody_stream.mergeElements(bass_stream)
        # melody_stream.show('midi')

        sample_dir = './generated_samples/conditional'