def error_classify(epoch, eval_type='valid', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop global n_earlystopping if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = X_train_passage if eval_type == 'valid' else X_val_passage s2 = X_train_query if eval_type == 'valid' else X_val_query target = y_train if eval_type == 'valid' else y_val query_id = query_id_train if eval_type == 'valid' else query_id_val f_error = get_data_path() + eval_type + 'error.csv' f_right = get_data_path() + eval_type + 'right.csv' label_cnt = {} with open(f_error, 'wb') as wf1, open(f_right, 'wb') as wf2: for i in range(0, len(s1), params['bsize']): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params['bsize']], params['word_emb_dim']) s2_batch, s2_len = get_batch(s2[i:i + params['bsize']], params['word_emb_dim']) s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch) tgt_batch = Variable( torch.LongTensor(target[i:i + params['bsize']])) query_id_batch = query_id[i:i + params['bsize']] # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] for q, t, p, _s1, _s2 in zip(query_id_batch, tgt_batch, pred, s1[i:i + params['bsize']], s2[i:i + params['bsize']]): if label_cnt.has_key(int(t)): label_cnt[int(t)] += 1 else: label_cnt[int(t)] = 1 if int(t) == int(p): wf2.write('{}\t{}\t{}\t{}\t{}\n'.format( q, t, p, ' '.join(_s1), ' '.join(_s2))) else: wf1.write('{}\t{}\t{}\t{}\t{}\n'.format( q, t, p, ' '.join(_s1), ' '.join(_s2))) correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() print(label_cnt) # save model eval_acc = round(100 * correct / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print( 'togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc))
def evaluate(epoch, eval_type='dev', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'dev' and not final_eval: print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = dev['s1'] if eval_type == 'dev' else test['s1'] s2 = dev['s2'] if eval_type == 'dev' else test['s2'] target = dev['label'] if eval_type == 'dev' else test['label'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec, params.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch.to(DEVICE)), Variable( s2_batch.to(DEVICE)) tgt_batch = Variable(torch.LongTensor( target[i:i + params.batch_size])).to(DEVICE) # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = 100 * float(correct) / len(s1) if final_eval: print('finalgrep : accuracy {0} : {1:4.2f}%'.format( eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} : {2:4.2f}'. format(epoch, eval_type, eval_acc)) if eval_type == 'dev' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0][ 'lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}'.format( params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True assert isinstance(eval_acc, float) return eval_acc
def inference(infer_data): if torch.cuda.is_available(): nli_net.cuda() nli_net.eval() prob_res_1 = [] s1 = infer_data['s1'] s2 = infer_data['s2'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size].tolist(), word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size].tolist(), word_vec) # s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch) if torch.cuda.is_available(): s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) else: s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch) # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) # get softmax probability sm = nn.Softmax(dim=1) res = sm(output).data[:, 1] # print res prob_res_1 += res.tolist() return prob_res_1
def evaluate(args, nli_net, test_nlipath, n_classes, word_vec, split="test"): test = get_nli_split(test_nlipath, n_classes, split) for split in ['s1', 's2']: test[split] = np.array( [['<s>'] + [word for word in sent.split() if word in word_vec] + ['</s>'] for sent in test[split]]) # Evaluates on the test set. correct = 0. s1 = test['s1'] s2 = test['s2'] target = test['labels'] outputs = [] for i in range(0, len(s1), args.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + args.batch_size], word_vec, args.word_emb_dim) s2_batch, s2_len = get_batch(s2[i:i + args.batch_size], word_vec, args.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + args.batch_size])).cuda() output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) outputs.extend(output.data.max(1)[1].cpu().numpy()) correct += compute_score_with_logits(output, tgt_batch, n_classes) eval_acc = round(100 * correct.item() / len(s1), 2) print('evaluation accuracy is {0}'.format(eval_acc)) return eval_acc, outputs
def evaluate(m, source, tc=False, td=False): """Compute perplexity on document completion. """ m.eval() with torch.no_grad(): if source == 'val': indices = torch.split(torch.tensor(range(args.num_docs_valid)), args.eval_batch_size) tokens = valid_tokens counts = valid_counts else: indices = torch.split(torch.tensor(range(args.num_docs_test)), args.eval_batch_size) tokens = test_tokens counts = test_counts ## get \beta here beta = m.get_beta() ### do dc and tc here acc_loss = 0 cnt = 0 indices_1 = torch.split(torch.tensor(range(args.num_docs_test_1)), args.eval_batch_size) for idx, ind in enumerate(indices_1): ## get theta from first half of docs data_batch_1 = data.get_batch(test_1_tokens, test_1_counts, ind, args.vocab_size, device) sums_1 = data_batch_1.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch_1 = data_batch_1 / sums_1 else: normalized_data_batch_1 = data_batch_1 theta, _ = m.get_theta(normalized_data_batch_1) ## get prediction loss using second half data_batch_2 = data.get_batch(test_2_tokens, test_2_counts, ind, args.vocab_size, device) sums_2 = data_batch_2.sum(1).unsqueeze(1) res = torch.mm(theta, beta) preds = torch.log(res) recon_loss = -(preds * data_batch_2).sum(1) loss = recon_loss / sums_2.squeeze() loss = loss.mean().item() acc_loss += loss cnt += 1 cur_loss = acc_loss / cnt ppl_dc = round(math.exp(cur_loss), 1) print('*'*100) print('{} Doc Completion PPL: {}'.format(source.upper(), ppl_dc)) print('*'*100) if tc or td: beta = beta.data.cpu().numpy() if tc: print('Computing topic coherence...') get_topic_coherence(beta, train_tokens, vocab) if td: print('Computing topic diversity...') get_topic_diversity(beta, 25) return ppl_dc
def evaluate(epoch, eval_type='valid', final_eval=False): fi = open('result_'+eval_type+str(epoch)+'.txt','w') nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda() # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] for p in pred: fi.write(str(p)+'\n') correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() eval_acc = round(100 * correct / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net, os.path.join(params.outputdir,params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}'.format(params.lrshrink,optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True #if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) #stop_training = adam_stop #adam_stop = True #print('nothing') return eval_acc
def evaluate(epoch, eval_type='valid', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda() # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = round(100 * correct / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}' .format(params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
def evaluate(epoch, eval_type='valid', final_eval=False): classifier.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for i in range(0, len(s1), params.batch_size): s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_embed, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_embed, params.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])) output = classifier((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = round(100 * correct.item() / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(classifier.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}' .format(params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
def main(argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = "0" input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name="input") gt_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name="input_gt") unet_output = unet.unet(name="UNET", input_data=input_images) loss = tf.reduce_mean( tf.keras.losses.sparse_categorical_crossentropy(gt_maps, unet_output)) train_ops = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate).minimize(loss) saver = tf.train.Saver(tf.global_variables()) summaty_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() with tf.Session(graph=tf.get_default_graph()) as sess: sess.run(init) data_generator = data.get_batch(num_workers=FLAGS.number_reasers, batch_size=FLAGS.batch_size) for step in range(FLAGS.max_step): input_list = next(data_generator) peer_loss, _ = sess.run([loss, train_ops], feed_dict={ input_images: input_list[0], gt_maps: input_list[1] }) print("step {}, model loss {}".format(step, peer_loss)) saver.save(sess=sess, save_path=FLAGS.checkpoint_path + str(step) + ".ckpt", global_step=step)
def __init__(self, EPOCH=100000, batch_size=32, embedding_size=300, num_units=300): """ https://arxiv.org/abs/1706.04223 While handling discrete outputs, gradients do not flow over the network parameters, so this paper demonstrates a method which mapping discrete feature to continuous latent space by AE and WGAN. :param LATENT_DIM: Integer. dimension of latent space :param LEARNING_RATE: Float. learning rate for optimizing generator and autoencoder :param LEARNING_RATE_CRITIC: Float. learning rate for optimizing critic :param EPOCH: Integer. # of epochs :param BATCH_SIZE: Integer. batch size """ self.initializer = tf.truncated_normal_initializer(stddev=0.02) self.AE_learning_rate = 1. self.critic_lr= 0.00001 self.gen_lr = 0.00005 self.EPOCH = EPOCH self.batch_size = batch_size self.num_units = num_units self.data, self.sequence_length, self.dict = get_batch() self.num_batch = len(self.data) // self.batch_size self.reverse_dict = {v: k for k, v in self.dict.iteritems()} self.voca_size = len(self.dict) self.max_len = 30 self.embedding_size = embedding_size with tf.variable_scope("embedding"): self.embedding = tf.get_variable("embedding_table", [self.voca_size, self.embedding_size]) self.build_graph()
def main(): X = tf.placeholder('float', [None, N_NUMBERS, N_BITS]) y = tf.placeholder('float', [None, N_NUMBERS, N_BITS]) y_hat = build_model(X) loss = tf.losses.mean_squared_error(y, y_hat) optimizer = tf.train.AdamOptimizer() train_operation = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for epoch in xrange(N_EPOCHS): x_batch, y_batch = get_batch() _, pred, loss_val = sess.run([train_operation, y_hat, loss], feed_dict={X: x_batch, y: y_batch}) if (epoch + 1) % 2000 == 0: print('epoch: {}, loss: {}'.format(epoch + 1, loss_val)) print('Input: ') print(x_batch[0]) print() print('Ground truth:') print(y_batch[0]) print() print('Prediction:') print(convert_prediction(pred[0])) print() print()
def chat(line): """ in test mode, we don't to create the backward path """ global enc_vocab, inv_dec_vocab, model, saver, sess, output_file line = line.decode().lower() start = time.time() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': return 'Hmm...' output_file.write('HUMAN: ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): return 'TL;DR' # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) # print(response) output_file.write('BOT: ' + response + '\n') print(time.time() - start) return response
def evaluate(model, criterion, corpus, data_source, eval_batch_size): model.eval() total_loss = 0. total_words = 0. total_entropy = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args.bptt) output, hidden = model(data, hidden, mean_field_inference=True) output_flat = output.view(-1, ntokens) num_words = output_flat.shape[0] pred_proba = nn.functional.softmax(output_flat, dim=-1) loss = len(data) * criterion(output_flat, targets).item() / num_words entropy = -(pred_proba * pred_proba.log()).sum(1).sum(0).item() total_words += num_words total_entropy += entropy total_loss += loss hidden = repackage_hidden(hidden) return total_loss / (len(data_source) - 1), total_entropy / total_words
def chat(): """ in test mode, we don't to create the backward path """ test_data_buckets, train_data_buckets, train_buckets_scale, metadata = _get_buckets( ) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( test_data_buckets[bucket_id], bucket_id, batch_size=20) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) for logit in output_logits: response = _construct_response(logit, metadata) print(response)
def train_epoch(self): self.model.train() # Turn on the train mode total_loss = 0. start_time = time.time() # ntokens = len(TEXT.vocab.stoi) for batch, i in enumerate(range(0, self.train_data.size(0) - 1, self.bptt)): data, targets = get_batch(self.train_data, i) self.optimizer.zero_grad() output = self.model(data) loss = self.criterion(output.view(-1, self.ntokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.optimizer.step() total_loss += loss.item() log_interval = 200 if batch % log_interval == 0 and batch > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print('{:5d}/{:5d} batches | ' 'lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( batch, len(self.train_data) // self.bptt, self.scheduler.get_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def train(): model = ChatBotModel(False,config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: print('Running Session') sess.run(init) _check_restore_parameters(sess,saver) iteration = model.global_step.eval() total_loss = 0 while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs,decoder_inputs,decoder_masks = data.get_batch(data_buckets[bucket_is],bucket_id,batch_size=config.BATCH_SIZE) start = time.time() _,step_loss,_ = run_step(sess,model,encoder_inputs,decoder_inputs,decoder_masks,bucket_id,False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format(iteration,total_loss/skip_step,time.time() - start)) start = time.time() total_loss = 0 saver.save(sess,os.path.join(config.CPT_PATH,'chatbot'),global_step=model.global_step) if iteration % (10 * skip_step) == 0: _eval_test_set(sess,model,test_buckets) start = time.time() sys.stdout.flush()
def evaluate(epoch, eval_type='valid', correct_count=correct_count, labels_count=labels_count): model.eval() if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) sent1 = dev_fr['sent'] if eval_type == 'valid' else None sent2 = dev_ep['sent'] if eval_type == 'valid' else None target1 = dev_fr['label'] if eval_type == 'valid' else None target2 = dev_ep['label'] if eval_type == 'valid' else None eval_acc = [] for sent, target, diag_lens in zip([sent1, sent2], [target1, target2], [dev_fr_lens, dev_ep_lens]): correct = 0 stidx = 0 for batch_size in diag_lens: sent_batch, len_batch = get_batch(sent[stidx:stidx + batch_size], word_vec, embed_size=params.embed_size) sent_batch = Variable( sent_batch.cuda()) if params.use_cuda else Variable( sent_batch.cpu()) label_batch = Variable(torch.LongTensor(target[stidx:stidx + batch_size])).cuda() if params.use_cuda else \ Variable(torch.LongTensor(target[stidx:stidx + batch_size])).cpu() stidx += batch_size output = model((sent_batch, len_batch)) pred = output.data.max(1)[1] # counting correct_count, labels_count = compute_acc( pred=pred.long(), label=label_batch.data.long(), correct_count=correct_count, labels_count=labels_count) # correct += pred.long().eq(label_batch.data.long()).cuda().sum() if params.use_cuda else pred.long().eq( # label_batch.data.long()).cpu().sum() correct = list(correct_count.values()) total = list(labels_count.values()) correct = np.array(correct) total = np.array(total) acc = np.round(100 * correct / total, 2) eval_wa = round(100 * sum(correct[:4]) / sum(total[:4]), 1) eval_uwa = round(sum(acc[:4]) / 4, 1) eval_acc.append([eval_wa, eval_uwa]) print("accuracy for each category\n{}".format(acc)) print("wa: {}".format(eval_wa)) print("uwa: {}".format(eval_uwa)) return eval_acc
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) # store a line history for 3 lines conversation_history = [] line_history = ['', '', ''] while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] # update the line_history line_history.append(line) line_history.pop(0) # create line from the line history line = ''.join(line_history) if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) conversation_history.append((line, response)) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def test(model, testFile): batch_size = 1000 testGen = csv_generator(open(testFile)) testGen, x_batch, y_batch = get_batch(batch_size, testGen, testFile) predicted = model.forward(x_batch) accuracy = score_accuracy(predicted, y_batch) print "Test Accuracy (batch_size=", batch_size, "):", accuracy
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = _get_buckets() # 버킷별로 샘플을 채워서 읽어온다!! # in train mode, we need to create the backward path, so forwrad_only is False model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) # 세션을 리스토어 할 수 있으면 해오고 iteration = model.global_step.eval() # global step을 불러온다. total_loss = 0 while True: skip_step = _get_skip_step( iteration) # skip_step을 얻어온다. 100보다 적으면 30, 아니면 100 bucket_id = _get_random_bucket( train_buckets_scale) # 버킷 아이디를 랜덤으로 고른다 # 선택된 버킷으로부터 batch_size만큼의 배치를 받아온다 encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() # step run!! forward_only = False _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 # skip_step마다 누적해두었던 loss와 걸린 시간을 보고한다 # 그리고 다시 초기화 # 세션 저장 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format( iteration, total_loss / skip_step, time.time() - start)) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) # skip_step의 10번을 돌았으면 test 버킷에서 버킷 id 별로 테스트를 한번씩 돈다 if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) start = time.time() sys.stdout.flush()
def fill_feed_dict(data, img1_pl, img2_pl, flo_pl): img1_feed, img2_feed, flo_feed = data.get_batch(batch_size) # test # cv2.imshow('img1', img1_feed[3].astype(np.uint8)) # cv2.imshow('img2', img2_feed[1].astype(np.uint8)) # cv2.waitKey() feed_dict = {img1_pl: img1_feed, img2_pl: img2_feed, flo_pl: flo_feed} return feed_dict
def chat(use_attention, ckpt_path="./ckp-dir/checkpoints"): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) if not use_attention: model = BasicChatBotModel(batch_size=1) else: model = AttentionChatBotModel(batch_size=1) model.build() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver, ckpt_path) output_file = open(os.path.join( config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == b'\n': line = line[:-1] if line == b'': break output_file.write('HUMAN ++++ ' + line.decode('ascii') + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, line) if len(token_ids) > max_length: print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? # bucket_id = _find_right_bucket(len(token_ids)) bucket_id = -1 # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. decoder_lens = np.sum(np.transpose(np.array(decoder_masks), (1, 0)), axis=1) output_logits = sess.run([model.final_outputs], feed_dict={model.encoder_inputs_tensor: encoder_inputs, model.decoder_inputs_tensor: decoder_inputs, model.decoder_length_tensor: decoder_lens, model.bucket_length: config.BUCKETS[bucket_id]}) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def train(epoch): """Train DETM on data for one epoch. """ model.train() acc_loss = 0 acc_nll = 0 acc_kl_theta_loss = 0 acc_kl_eta_loss = 0 acc_kl_alpha_loss = 0 cnt = 0 indices = torch.randperm(args.num_docs_train) indices = torch.split(indices, args.batch_size) for idx, ind in enumerate(indices): optimizer.zero_grad() model.zero_grad() data_batch, times_batch = data.get_batch( train_tokens, train_counts, ind, args.vocab_size, args.emb_size, temporal=True, times=train_times) sums = data_batch.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch = data_batch / sums else: normalized_data_batch = data_batch loss, nll, kl_alpha, kl_eta, kl_theta = model(data_batch, normalized_data_batch, times_batch, train_rnn_inp, args.num_docs_train) loss.backward() if args.clip > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() acc_loss += torch.sum(loss).item() acc_nll += torch.sum(nll).item() acc_kl_theta_loss += torch.sum(kl_theta).item() acc_kl_eta_loss += torch.sum(kl_eta).item() acc_kl_alpha_loss += torch.sum(kl_alpha).item() cnt += 1 if idx % args.log_interval == 0 and idx > 0: cur_loss = round(acc_loss / cnt, 2) cur_nll = round(acc_nll / cnt, 2) cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) cur_kl_eta = round(acc_kl_eta_loss / cnt, 2) cur_kl_alpha = round(acc_kl_alpha_loss / cnt, 2) lr = optimizer.param_groups[0]['lr'] print('Epoch: {} .. batch: {}/{} .. LR: {} .. KL_theta: {} .. KL_eta: {} .. KL_alpha: {} .. Rec_loss: {} .. NELBO: {}'.format( epoch, idx, len(indices), lr, cur_kl_theta, cur_kl_eta, cur_kl_alpha, cur_nll, cur_loss)) cur_loss = round(acc_loss / cnt, 2) cur_nll = round(acc_nll / cnt, 2) cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) cur_kl_eta = round(acc_kl_eta_loss / cnt, 2) cur_kl_alpha = round(acc_kl_alpha_loss / cnt, 2) lr = optimizer.param_groups[0]['lr'] print('*'*100) print('Epoch----->{} .. LR: {} .. KL_theta: {} .. KL_eta: {} .. KL_alpha: {} .. Rec_loss: {} .. NELBO: {}'.format( epoch, lr, cur_kl_theta, cur_kl_eta, cur_kl_alpha, cur_nll, cur_loss)) print('*'*100)
def chat(): """ in test mode, we don't to create the backward path """ # index2word , word2index _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) # 배치 사이즈는 하나 (forward only) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] # 유저가 타이핑할 수 있는 최대 길이는 버킷의 최대길이 print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() # 시스템 인풋을 받아온다 if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': # 아무것도 타이핑 안하면 브레이크 break output_file.write('HUMAN ++++ ' + line + '\n') # 아웃풋 파일에 한줄씩 기록 # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) # 문장 하나를 index로 if (len(token_ids) > max_length): # 만약 최대 길이보다 더 받았으면 다시 타이핑 받게 한다 print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket( len(token_ids)) # 입력 시퀀스의 길이에 맞는 버킷(최소) id 골라온다 # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], # 디코더 인풋은 x 전부 패딩되서 들어가는듯 bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) # forward_only == True response = _construct_response( output_logits, inv_dec_vocab) # id2word로 복구해서 다시 리스폰스로 print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def get_cluster_quality(): """Returns cluster quality. """ print('Getting vocabulary ...') data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df)) vocab, cluster_valid = data.get_all_data(data_file, temporal=True) vocab_size = len(vocab) topics_distributions = [] # get data print('Getting full data ...') tokens = train['tokens'] counts = train['counts'] times = train['times'] num_times = len(np.unique(train_times)) num_docs = len(tokens) rnn_inp = data.get_rnn_input(tokens, counts, times, num_times, vocab_size, num_docs) model.eval() with torch.no_grad(): indices = torch.split(torch.tensor(range(num_docs)), args.eval_batch_size) eta = get_eta(rnn_inp) acc_loss = 0 cnt = 0 for idx, ind in enumerate(indices): data_batch, times_batch = data.get_batch( tokens, counts, ind, vocab_size, args.emb_size, temporal=True, times=times) sums = data_batch.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch = data_batch / sums else: normalized_data_batch = data_batch eta_td = eta[times_batch.type('torch.LongTensor')] theta = get_theta(eta_td, normalized_data_batch) print('\n') print('Get topic coherence...') print('train_tokens: ', train_tokens[0]) TC_all = [] cnt_all = [] for tt in range(args.num_times): tc, cnt = get_topic_coherence(beta[:, tt, :].detach().numpy(), train_tokens, vocab) TC_all.append(tc) cnt_all.append(cnt) print('TC_all: ', TC_all) TC_all = torch.tensor(TC_all) print('TC_all: ', TC_all.size()) print('\n') print('Get topic quality...') quality = tc * diversity print('Topic Quality is: {}'.format(quality)) print('#'*100)
def inference(infer_data): nli_net.eval() prob_res_1 = [] s1 = infer_data['s1'] s2 = infer_data['s2'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size].tolist(), word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size].tolist(), word_vec) s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch) # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) # get softmax probability sm = nn.Softmax() res = sm(output.data)[:,1] prob_res_1 += res.data.tolist() return prob_res_1
def write_mixed_video(sess, x, y_, lstm_init_state, y_net, mse, current_state): _current_state = np.zeros((LSTM_INFO[1], 2, BATCH_SIZE, LSTM_INFO[0])) accumulator = [] pointer_v = 0 print('Creating mixed video data') if DATASET_NAME == 'moving_mnist': get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None, None) if DATASET_NAME == 'moving_mnist_sin': get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None, None) for i in range(steps_mix): # obtain testing batch batch_i, batch_t, pointer_v = get_batch('testing', DATASET_NAME, BATCH_SIZE, pointer_v, INPUT_WIDTH, INPUT_HEIGHT) if pointer_v == 0: _current_state = np.zeros( (LSTM_INFO[1], 2, BATCH_SIZE, LSTM_INFO[0])) # evaluation step _y_net, _mse, _current_state = sess.run([y_net, mse, current_state], feed_dict={ x: batch_i, y_: batch_t, lstm_init_state: _current_state }) if DATASET_NAME == "moving_mnist" and ( (i + 1) * BATCH_SIZE) % (2 * MIXED_SEQUENCE_LENGTH) == 0: get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None, None) if DATASET_NAME == "moving_mnist_sin" and ( (i + 1) * BATCH_SIZE) % (2 * MIXED_SEQUENCE_LENGTH) == 0: get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None, None) if accumulator == []: accumulator = batch_t else: if floor(i * BATCH_SIZE / MIXED_SEQUENCE_LENGTH) % 2 == 1: accumulator = np.append(accumulator, _y_net, axis=0) else: accumulator = np.append(accumulator, batch_t, axis=0) print('step {} of {}, error: {}'.format(i, steps_mix, _mse)) write_video('mix', accumulator)
def evaluate(epoch, valid, params, word_vec, shared_nli_net, eval_type, pred_file): shared_nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop #if eval_type == 'valid': print('\n{0} : Epoch {1}'.format(eval_type, epoch)) hypoths = valid['hypoths'] #if eval_type == 'valid' else test['s1'] premises = valid['premises'] #if eval_type == 'valid' else test['s2'] target = valid['lbls'] out_preds_f = open(pred_file, "wb") for i in range(0, len(hypoths), params.batch_size): # prepare batch hypoths_batch, hypoths_len = get_batch(hypoths[i:i + params.batch_size], word_vec) premises_batch, premises_len = get_batch(premises[i:i + params.batch_size], word_vec) tgt_batch = None if params.gpu_id > -1: hypoths_batch = Variable(hypoths_batch.cuda()) premises_batch = Variable(premises_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda() else: hypoths_batch = Variable(hypoths_batch) premises_batch = Variable(premises_batch) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])) # model forward output = shared_nli_net((premises_batch, premises_len), (hypoths_batch, hypoths_len)) all_preds = output.data.max(1)[1] for pred in all_preds: out_preds_f.write(IDX2LBL[pred.item()] + "\n") correct += all_preds.long().eq(tgt_batch.data.long()).cpu().sum() out_preds_f.close() # save model eval_acc = round(100.0 * correct / len(hypoths), 2) print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) return eval_acc
def main(_): if not os.path.exists(video_path): os.makedirs(video_path) with tf.Session() as sess: saver = tf.train.import_meta_graph(MODEL_PATH + MODEL_NAME + '.meta') saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH)) graph = tf.get_default_graph() lstm_init_state = graph.get_tensor_by_name("lstm_state:0") x = graph.get_tensor_by_name("input:0") y_ = graph.get_tensor_by_name("target:0") y_net = tf.get_collection("y_net")[0] if ADDITIONAL_OUTPUT: mse = tf.get_collection("mse1")[0] else: mse = tf.get_collection("mse")[0] train_step = tf.get_collection("train_step")[0] current_state = get_collection_rnn_state("current_state") assert FRAMES_NUM % BATCH_SIZE == 0, "Number of frames should be a multiple " \ "of the batch_size used while training" if DATASET_NAME == 'moving_mnist': get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None, None) if DATASET_NAME == 'moving_mnist_sin': get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None, None) # obtain original batch batch_i, _, _ = get_batch('testing', DATASET_NAME, FRAMES_NUM, -1, INPUT_WIDTH, INPUT_HEIGHT) write_video('original', batch_i) write_parallel_video(sess, x, y_, lstm_init_state, y_net, mse, current_state) for n in INTERLACED_N: write_interlaced_video(sess, n[0], n[1], x, y_, lstm_init_state, y_net, mse, current_state) if MIXED_VIDEO: write_mixed_video(sess, x, y_, lstm_init_state, y_net, mse, current_state)
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) sent_lens = list(train_data) random.shuffle(sent_lens) num_seqs = 0 for sent_len in sent_lens: for batch, i in enumerate( range(0, train_data[sent_len].size(1) - 1, args.batch_size)): # print(model.rnn.cell.w_f.weight) data, targets = get_batch(train_data[sent_len], i, args.batch_size, prefix_len=sent_len - 1) actual_batch_size = data.shape[1] if args.unk: data = add_unk(data, corpus) # For the last batch the batch size may be smaller: hidden = model.init_hidden(actual_batch_size) model.zero_grad() output, hidden = model(data, hidden) flat_dim = actual_batch_size * (sent_len - 1) loss = criterion(output.view(flat_dim, -1), targets.contiguous().view(flat_dim)) loss.backward() # Haven't seen any benefit but this would go here: # torch.nn.utils.clip_grad_norm_(model.parameters(),0.1) optimizer.step() total_loss += loss.data num_seqs += data.shape[1] if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr (ADAM) | ms/batch {:5.2f} | ' 'loss {:5.2f} | {:5d} sequences | ppl NA'.format( epoch, batch, len(train_data) // args.prefix_len, elapsed * 1000 / args.log_interval, cur_loss, num_seqs)) # , math.exp(cur_loss))) model.update_callback(epoch, batch) total_loss = 0 start_time = time.time() model.epoch_callback(epoch, args.epochs) return num_seqs
def evaluate(cv, epoch, dev, model, optimizer, final_eval=False): model.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop print('\nVALIDATION : Epoch {0}'.format(epoch)) for i in range(0, len(dev), params.batch_size): # prepare batch label_batch, q1_batch, q1_len, q2_batch, q2_len = get_batch( questions_dict, dev[i:i + params.batch_size], word_vec, random_flip=False, feature=params.feature) q1_batch, q2_batch = Variable(q1_batch).cuda(), Variable( q2_batch).cuda() tgt_batch = Variable(torch.FloatTensor(label_batch)).cuda() # model forward output = model((q1_batch, q1_len), (q2_batch, q2_len)) pred = output.data > 0 correct += pred.long().eq(tgt_batch.data.long()).cpu().sum().numpy() # save model eval_acc = round(100 * correct / len(dev), 4) if final_eval: print('finalgrep : accuracy: {0}'.format(eval_acc)) else: print( 'togrep : results : epoch {0} ; mean accuracy:\ {1}'.format(epoch, eval_acc)) if epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) torch.save(model, os.path.join(params.save_dir, "%d.pkl" % (cv))) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0][ 'lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}'.format( params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
def _eval_test_set(sess, model, test_buckets): """ Evaluate on the test set. """ for bucket_id in xrange(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket %d" % (bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print('Test bucket {}: loss {}, time {}'.format(bucket_id, step_loss, time.time() - start))
def run(self): with tf.Graph().as_default(): x = tf.placeholder(dtype=tf.float32, shape=[None, self.input_dim], name='x') x_ = tf.placeholder(dtype=tf.float32, shape=[None, self.input_dim], name='x_') encoded, decoded = self.forward(x) loss, train_op = self.train(x_, decoded) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(self.epoch): for j in range(50): b_x, b_x_ = get_batch(self.data_x, self.data_x_, self.batch_size) sess.run(train_op, feed_dict={x: b_x, x_: b_x_}) if i % 100 == 0: l = sess.run(loss, feed_dict={x: self.data_x, x_: self.data_x_}) print('epoch {0}: global loss = {1}'.format(i, l)) self.hidden_feature = sess.run(encoded, feed_dict={x: self.data_x_})
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = _get_buckets() # in train mode, we need to create the backward path, so forwrad_only is False model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format(iteration, total_loss/skip_step, time.time() - start)) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) start = time.time() sys.stdout.flush()
def trainepoch(epoch): print('\nTRAINING : Epoch ' + str(epoch)) nli_net.train() all_costs = [] logs = [] words_count = 0 last_time = time.time() correct = 0. # shuffle the data permutation = np.random.permutation(len(train['s1'])) s1 = train['s1'][permutation] s2 = train['s2'][permutation] target = train['label'][permutation] optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr'] print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr'])) for stidx in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda() k = s1_batch.size(1) # actual batch size # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() assert len(pred) == len(s1[stidx:stidx + params.batch_size]) # loss loss = loss_fn(output, tgt_batch) all_costs.append(loss.data[0]) words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim # backward optimizer.zero_grad() loss.backward() # gradient clipping (off by default) shrink_factor = 1 total_norm = 0 for p in nli_net.parameters(): if p.requires_grad: p.grad.data.div_(k) # divide by the actual batch size total_norm += p.grad.data.norm() ** 2 total_norm = np.sqrt(total_norm) if total_norm > params.max_norm: shrink_factor = params.max_norm / total_norm current_lr = optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam) optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update # optimizer step optimizer.step() optimizer.param_groups[0]['lr'] = current_lr if len(all_costs) == 100: logs.append('{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}'.format( stidx, round(np.mean(all_costs), 2), int(len(all_costs) * params.batch_size / (time.time() - last_time)), int(words_count * 1.0 / (time.time() - last_time)), round(100.*correct/(stidx+k), 2))) print(logs[-1]) last_time = time.time() words_count = 0 all_costs = [] train_acc = round(100 * correct/len(s1), 2) print('results : epoch {0} ; mean accuracy train : {1}' .format(epoch, train_acc)) return train_acc
def main(): params = Params() model = RegressionModel(params) # Use functions of the model to build the graph out, states = model.inference(model.data_placeholder) loss = model.loss(out, model.labels_placeholder) train_op = model.train(loss, params.step_size) # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) saver = tf.train.Saver(tf.all_variables()) for i in range(params.train_steps + 1): data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels) feed_dict = { model.data_placeholder: data, model.labels_placeholder: labels } # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if i % params.print_every == 0: print i, loss_value if i % params.save_every == 0: name = "model_{0}.ckpt".format(params.get_id) checkpoint_path = os.path.join('./save', name) # TODO: If we restore a model for further training, we should # add the number of training steps it had completed to our global step here saver.save(sess, checkpoint_path, global_step=i) print "model saved to {0}-{1}".format(checkpoint_path, i) with open('./save/{0}.model_param'.format(params.get_id), 'w') as f: pickle.dump(params, f, protocol=2 # pickle.HIGHEST_PROTOCOL as of writing ) data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels) feed_dict = { model.data_placeholder: data, model.labels_placeholder: labels } vars = sess.run(out + states, feed_dict) out_ = vars[0:len(out)] states_ = vars[len(out)+1:] d = data[0,0,:] o = np.array(out_)[:, 0, 0] l = labels[0,0,:] x1 = range(d.shape[0]) x2 = range(1, d.shape[0] + 1) # TODO: output graph every 100 steps plt.scatter(x1, d, c='r') plt.scatter(x2, o, c='g') plt.scatter(x2, l, c='b', alpha=0.5) plt.show() print "data third dim", d print "out", o # print "states", np.array(states_) print "labels third dim", l