Exemplo n.º 1
0
def error_classify(epoch, eval_type='valid', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop
    global n_earlystopping

    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = X_train_passage if eval_type == 'valid' else X_val_passage
    s2 = X_train_query if eval_type == 'valid' else X_val_query
    target = y_train if eval_type == 'valid' else y_val
    query_id = query_id_train if eval_type == 'valid' else query_id_val

    f_error = get_data_path() + eval_type + 'error.csv'
    f_right = get_data_path() + eval_type + 'right.csv'
    label_cnt = {}
    with open(f_error, 'wb') as wf1, open(f_right, 'wb') as wf2:
        for i in range(0, len(s1), params['bsize']):
            # prepare batch
            s1_batch, s1_len = get_batch(s1[i:i + params['bsize']],
                                         params['word_emb_dim'])
            s2_batch, s2_len = get_batch(s2[i:i + params['bsize']],
                                         params['word_emb_dim'])

            s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch)
            tgt_batch = Variable(
                torch.LongTensor(target[i:i + params['bsize']]))
            query_id_batch = query_id[i:i + params['bsize']]

            # model forward
            output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
            pred = output.data.max(1)[1]

            for q, t, p, _s1, _s2 in zip(query_id_batch, tgt_batch, pred,
                                         s1[i:i + params['bsize']],
                                         s2[i:i + params['bsize']]):
                if label_cnt.has_key(int(t)):
                    label_cnt[int(t)] += 1
                else:
                    label_cnt[int(t)] = 1

                if int(t) == int(p):
                    wf2.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        q, t, p, ' '.join(_s1), ' '.join(_s2)))
                else:
                    wf1.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        q, t, p, ' '.join(_s1), ' '.join(_s2)))

            correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()

    print(label_cnt)
    # save model
    eval_acc = round(100 * correct / len(s1), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
    else:
        print(
            'togrep : results : epoch {0} ; mean accuracy {1} :\
              {2}'.format(epoch, eval_type, eval_acc))
Exemplo n.º 2
0
def evaluate(epoch, eval_type='dev', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop

    if eval_type == 'dev' and not final_eval:
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = dev['s1'] if eval_type == 'dev' else test['s1']
    s2 = dev['s2'] if eval_type == 'dev' else test['s2']
    target = dev['label'] if eval_type == 'dev' else test['label']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec,
                                     params.word_emb_dim)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec,
                                     params.word_emb_dim)
        s1_batch, s2_batch = Variable(s1_batch.to(DEVICE)), Variable(
            s2_batch.to(DEVICE))
        tgt_batch = Variable(torch.LongTensor(
            target[i:i + params.batch_size])).to(DEVICE)

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()

    # save model
    eval_acc = 100 * float(correct) / len(s1)
    if final_eval:
        print('finalgrep : accuracy {0} : {1:4.2f}%'.format(
            eval_type, eval_acc))
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} : {2:4.2f}'.
              format(epoch, eval_type, eval_acc))

    if eval_type == 'dev' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            if not os.path.exists(params.outputdir):
                os.makedirs(params.outputdir)
            torch.save(nli_net.state_dict(),
                       os.path.join(params.outputdir, params.outputmodelname))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0][
                    'lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'.format(
                    params.lrshrink, optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    assert isinstance(eval_acc, float)
    return eval_acc
Exemplo n.º 3
0
def inference(infer_data):
    if torch.cuda.is_available():
        nli_net.cuda()
    nli_net.eval()
    prob_res_1 = []
    s1 = infer_data['s1']
    s2 = infer_data['s2']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size].tolist(),
                                     word_vec)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size].tolist(),
                                     word_vec)
        # s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch)

        if torch.cuda.is_available():
            s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(
                s2_batch.cuda())
        else:
            s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch)

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        # get softmax probability
        sm = nn.Softmax(dim=1)
        res = sm(output).data[:, 1]
        # print res
        prob_res_1 += res.tolist()
    return prob_res_1
Exemplo n.º 4
0
def evaluate(args, nli_net, test_nlipath, n_classes, word_vec, split="test"):
    test = get_nli_split(test_nlipath, n_classes, split)
    for split in ['s1', 's2']:
        test[split] = np.array(
            [['<s>'] + [word for word in sent.split() if word in word_vec] +
             ['</s>'] for sent in test[split]])

    # Evaluates on the test set.
    correct = 0.
    s1 = test['s1']
    s2 = test['s2']
    target = test['labels']
    outputs = []
    for i in range(0, len(s1), args.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + args.batch_size], word_vec,
                                     args.word_emb_dim)

        s2_batch, s2_len = get_batch(s2[i:i + args.batch_size], word_vec,
                                     args.word_emb_dim)

        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(
            s2_batch.cuda())
        tgt_batch = Variable(torch.LongTensor(target[i:i +
                                                     args.batch_size])).cuda()
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        outputs.extend(output.data.max(1)[1].cpu().numpy())
        correct += compute_score_with_logits(output, tgt_batch, n_classes)

    eval_acc = round(100 * correct.item() / len(s1), 2)
    print('evaluation accuracy is {0}'.format(eval_acc))
    return eval_acc, outputs
Exemplo n.º 5
0
def evaluate(m, source, tc=False, td=False):
    """Compute perplexity on document completion.
    """
    m.eval()
    with torch.no_grad():
        if source == 'val':
            indices = torch.split(torch.tensor(range(args.num_docs_valid)), args.eval_batch_size)
            tokens = valid_tokens
            counts = valid_counts
        else: 
            indices = torch.split(torch.tensor(range(args.num_docs_test)), args.eval_batch_size)
            tokens = test_tokens
            counts = test_counts

        ## get \beta here
        beta = m.get_beta()

        ### do dc and tc here
        acc_loss = 0
        cnt = 0
        indices_1 = torch.split(torch.tensor(range(args.num_docs_test_1)), args.eval_batch_size)
        for idx, ind in enumerate(indices_1):
            ## get theta from first half of docs
            data_batch_1 = data.get_batch(test_1_tokens, test_1_counts, ind, args.vocab_size, device)
            sums_1 = data_batch_1.sum(1).unsqueeze(1)
            if args.bow_norm:
                normalized_data_batch_1 = data_batch_1 / sums_1
            else:
                normalized_data_batch_1 = data_batch_1
            theta, _ = m.get_theta(normalized_data_batch_1)

            ## get prediction loss using second half
            data_batch_2 = data.get_batch(test_2_tokens, test_2_counts, ind, args.vocab_size, device)
            sums_2 = data_batch_2.sum(1).unsqueeze(1)
            res = torch.mm(theta, beta)
            preds = torch.log(res)
            recon_loss = -(preds * data_batch_2).sum(1)
            
            loss = recon_loss / sums_2.squeeze()
            loss = loss.mean().item()
            acc_loss += loss
            cnt += 1
        cur_loss = acc_loss / cnt
        ppl_dc = round(math.exp(cur_loss), 1)
        print('*'*100)
        print('{} Doc Completion PPL: {}'.format(source.upper(), ppl_dc))
        print('*'*100)
        if tc or td:
            beta = beta.data.cpu().numpy()
            if tc:
                print('Computing topic coherence...')
                get_topic_coherence(beta, train_tokens, vocab)
            if td:
                print('Computing topic diversity...')
                get_topic_diversity(beta, 25)
        return ppl_dc
Exemplo n.º 6
0
def evaluate(epoch, eval_type='valid', final_eval=False):
	fi = open('result_'+eval_type+str(epoch)+'.txt','w')
	nli_net.eval()
	correct = 0.
	global val_acc_best, lr, stop_training, adam_stop
	if eval_type == 'valid':
		print('\nVALIDATION : Epoch {0}'.format(epoch))
	

	s1 = valid['s1'] if eval_type == 'valid' else test['s1']
	s2 = valid['s2'] if eval_type == 'valid' else test['s2']
	target = valid['label'] if eval_type == 'valid' else test['label']
	
	for i in range(0, len(s1), params.batch_size):
        # prepare batch
		s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
		s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
		s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
		tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()

        # model forward
		output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
	
	
		pred = output.data.max(1)[1]
		for p in pred:
			fi.write(str(p)+'\n')
		correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
	
	eval_acc = round(100 * correct / len(s1), 2)
	if final_eval:
		print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
	else:
		print('togrep : results : epoch {0} ; mean accuracy {1} :\
		{2}'.format(epoch, eval_type, eval_acc))
	
	if eval_type == 'valid' and epoch <= params.n_epochs:
		if eval_acc > val_acc_best:
			print('saving model at epoch {0}'.format(epoch))
			if not os.path.exists(params.outputdir):
				os.makedirs(params.outputdir)
			torch.save(nli_net, os.path.join(params.outputdir,params.outputmodelname))
			val_acc_best = eval_acc
		else:
			if 'sgd' in params.optimizer:
				optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
				print('Shrinking lr by : {0}. New lr = {1}'.format(params.lrshrink,optimizer.param_groups[0]['lr']))
				if optimizer.param_groups[0]['lr'] < params.minlr:
					stop_training = True
           		 #if 'adam' in params.optimizer:
                		# early stopping (at 2nd decrease in accuracy)
                		#stop_training = adam_stop
                		#adam_stop = True
				#print('nothing')
	return eval_acc
Exemplo n.º 7
0
def evaluate(epoch, eval_type='valid', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop

    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = valid['s1'] if eval_type == 'valid' else test['s1']
    s2 = valid['s2'] if eval_type == 'valid' else test['s2']
    target = valid['label'] if eval_type == 'valid' else test['label']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
        tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()

    # save model
    eval_acc = round(100 * correct / len(s1), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} :\
              {2}'.format(epoch, eval_type, eval_acc))

    if eval_type == 'valid' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            if not os.path.exists(params.outputdir):
                os.makedirs(params.outputdir)
            torch.save(nli_net.state_dict(), os.path.join(params.outputdir,
                       params.outputmodelname))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'
                      .format(params.lrshrink,
                              optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    return eval_acc
Exemplo n.º 8
0
def evaluate(epoch, eval_type='valid', final_eval=False):
    classifier.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop

    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = valid['s1'] if eval_type == 'valid' else test['s1']
    s2 = valid['s2'] if eval_type == 'valid' else test['s2']
    target = valid['label'] if eval_type == 'valid' else test['label']

    for i in range(0, len(s1), params.batch_size):
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_embed, params.word_emb_dim)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_embed, params.word_emb_dim)
        s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch)
        tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size]))

        output = classifier((s1_batch, s1_len), (s2_batch, s2_len))

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()

    # save model
    eval_acc = round(100 * correct.item() / len(s1), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} :\
              {2}'.format(epoch, eval_type, eval_acc))

    if eval_type == 'valid' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            if not os.path.exists(params.outputdir):
                os.makedirs(params.outputdir)
            torch.save(classifier.state_dict(), os.path.join(params.outputdir,
                       params.outputmodelname))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'
                      .format(params.lrshrink,
                              optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    return eval_acc
Exemplo n.º 9
0
def main(argv=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name="input")
    gt_maps = tf.placeholder(tf.float32,
                             shape=[None, None, None, 1],
                             name="input_gt")
    unet_output = unet.unet(name="UNET", input_data=input_images)
    loss = tf.reduce_mean(
        tf.keras.losses.sparse_categorical_crossentropy(gt_maps, unet_output))
    train_ops = tf.train.AdamOptimizer(
        learning_rate=FLAGS.learning_rate).minimize(loss)
    saver = tf.train.Saver(tf.global_variables())

    summaty_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())
    init = tf.global_variables_initializer()

    with tf.Session(graph=tf.get_default_graph()) as sess:
        sess.run(init)
        data_generator = data.get_batch(num_workers=FLAGS.number_reasers,
                                        batch_size=FLAGS.batch_size)
        for step in range(FLAGS.max_step):
            input_list = next(data_generator)
            peer_loss, _ = sess.run([loss, train_ops],
                                    feed_dict={
                                        input_images: input_list[0],
                                        gt_maps: input_list[1]
                                    })
            print("step {}, model loss {}".format(step, peer_loss))
            saver.save(sess=sess,
                       save_path=FLAGS.checkpoint_path + str(step) + ".ckpt",
                       global_step=step)
Exemplo n.º 10
0
    def __init__(self, EPOCH=100000, batch_size=32, embedding_size=300,
                 num_units=300):
        """
        https://arxiv.org/abs/1706.04223
        While handling discrete outputs, gradients do not flow over the network parameters, so this paper
        demonstrates a method which mapping discrete feature to continuous latent space by AE and WGAN.

        :param LATENT_DIM: Integer. dimension of latent space
        :param LEARNING_RATE: Float. learning rate for optimizing generator and autoencoder
        :param LEARNING_RATE_CRITIC: Float. learning rate for optimizing critic
        :param EPOCH: Integer. # of epochs
        :param BATCH_SIZE: Integer. batch size
        """
        self.initializer = tf.truncated_normal_initializer(stddev=0.02)
        self.AE_learning_rate = 1.
        self.critic_lr= 0.00001
        self.gen_lr = 0.00005
        self.EPOCH = EPOCH
        self.batch_size = batch_size
        self.num_units = num_units
        self.data, self.sequence_length, self.dict = get_batch()
        self.num_batch = len(self.data) // self.batch_size
        self.reverse_dict = {v: k for k, v in self.dict.iteritems()}
        self.voca_size = len(self.dict)
        self.max_len = 30
        self.embedding_size = embedding_size
        with tf.variable_scope("embedding"):
            self.embedding = tf.get_variable("embedding_table", [self.voca_size, self.embedding_size])
        self.build_graph()
Exemplo n.º 11
0
def main():
	X = tf.placeholder('float', [None, N_NUMBERS, N_BITS])
	y = tf.placeholder('float', [None, N_NUMBERS, N_BITS])

	y_hat = build_model(X)

	loss = tf.losses.mean_squared_error(y, y_hat)

	optimizer = tf.train.AdamOptimizer()
	train_operation = optimizer.minimize(loss)

	init = tf.global_variables_initializer()
	with tf.Session() as sess:
		sess.run(init)

		for epoch in xrange(N_EPOCHS):
			x_batch, y_batch = get_batch()

			_, pred, loss_val = sess.run([train_operation, y_hat, loss],
       								feed_dict={X: x_batch, y: y_batch})

			if (epoch + 1) % 2000 == 0:
				print('epoch: {}, loss: {}'.format(epoch + 1, loss_val))
				print('Input: ')
				print(x_batch[0])
				print()
				print('Ground truth:')
				print(y_batch[0])
				print()
				print('Prediction:')
				print(convert_prediction(pred[0]))
				print()
				print()
Exemplo n.º 12
0
def chat(line):
    """ in test mode, we don't to create the backward path
    """
    global enc_vocab, inv_dec_vocab, model, saver, sess, output_file

    line = line.decode().lower()

    start = time.time()

    if len(line) > 0 and line[-1] == '\n':
        line = line[:-1]
    if line == '':
        return 'Hmm...'
    output_file.write('HUMAN: ' + line + '\n')
    # Get token-ids for the input sentence.
    token_ids = data.sentence2id(enc_vocab, str(line))
    if (len(token_ids) > max_length):
        return 'TL;DR'

    # Which bucket does it belong to?
    bucket_id = _find_right_bucket(len(token_ids))
    # Get a 1-element batch to feed the sentence to the model.
    encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
        [(token_ids, [])], bucket_id, batch_size=1)
    # Get output logits for the sentence.
    _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs,
                                   decoder_masks, bucket_id, True)
    response = _construct_response(output_logits, inv_dec_vocab)
    # print(response)
    output_file.write('BOT: ' + response + '\n')

    print(time.time() - start)
    return response
Exemplo n.º 13
0
def evaluate(model, criterion, corpus, data_source, eval_batch_size):

    model.eval()
    total_loss = 0.
    total_words = 0.
    total_entropy = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args.bptt)
            output, hidden = model(data, hidden, mean_field_inference=True)
            output_flat = output.view(-1, ntokens)

            num_words = output_flat.shape[0]
            pred_proba = nn.functional.softmax(output_flat, dim=-1)
            loss = len(data) * criterion(output_flat, targets).item() / num_words
            entropy = -(pred_proba * pred_proba.log()).sum(1).sum(0).item()

            total_words += num_words
            total_entropy += entropy
            total_loss += loss

            hidden = repackage_hidden(hidden)

    return total_loss / (len(data_source) - 1), total_entropy / total_words
Exemplo n.º 14
0
def chat():
    """ in test mode, we don't to create the backward path
    """
    test_data_buckets, train_data_buckets, train_buckets_scale, metadata = _get_buckets(
    )

    model = ChatBotModel(True, batch_size=1)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)
        bucket_id = _get_random_bucket(train_buckets_scale)
        encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
            test_data_buckets[bucket_id], bucket_id, batch_size=20)

        # Get output logits for the sentence.
        _, _, output_logits = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, True)

        for logit in output_logits:

            response = _construct_response(logit, metadata)
            print(response)
Exemplo n.º 15
0
    def train_epoch(self):
        self.model.train() # Turn on the train mode
        total_loss = 0.
        start_time = time.time()
        # ntokens = len(TEXT.vocab.stoi)
        for batch, i in enumerate(range(0, self.train_data.size(0) - 1, self.bptt)):
            data, targets = get_batch(self.train_data, i)
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output.view(-1, self.ntokens), targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
            self.optimizer.step()

            total_loss += loss.item()
            log_interval = 200
            if batch % log_interval == 0 and batch > 0:
                cur_loss = total_loss / log_interval
                elapsed = time.time() - start_time
                print('{:5d}/{:5d} batches | '
                    'lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        batch, len(self.train_data) // self.bptt, self.scheduler.get_lr()[0],
                        elapsed * 1000 / log_interval,
                        cur_loss, math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()
Exemplo n.º 16
0
def train():
    model = ChatBotModel(False,config.BATCH_SIZE)
    model.build_graph()
    
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        print('Running Session')
        sess.run(init)
        _check_restore_parameters(sess,saver)
        
        iteration = model.global_step.eval()
        total_loss = 0
        while True:
            skip_step = _get_skip_step(iteration)
            bucket_id = _get_random_bucket(train_buckets_scale)
            encoder_inputs,decoder_inputs,decoder_masks = data.get_batch(data_buckets[bucket_is],bucket_id,batch_size=config.BATCH_SIZE)
            start = time.time()
            
            _,step_loss,_ = run_step(sess,model,encoder_inputs,decoder_inputs,decoder_masks,bucket_id,False)
            total_loss += step_loss
            iteration += 1
            
            if iteration % skip_step == 0:
                print('Iter {}: loss {}, time {}'.format(iteration,total_loss/skip_step,time.time() - start))
                start = time.time()
                total_loss = 0
                saver.save(sess,os.path.join(config.CPT_PATH,'chatbot'),global_step=model.global_step)
                
            if iteration % (10 * skip_step) == 0:
                _eval_test_set(sess,model,test_buckets)
                start = time.time()
            sys.stdout.flush()
Exemplo n.º 17
0
def evaluate(epoch,
             eval_type='valid',
             correct_count=correct_count,
             labels_count=labels_count):
    model.eval()
    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    sent1 = dev_fr['sent'] if eval_type == 'valid' else None
    sent2 = dev_ep['sent'] if eval_type == 'valid' else None
    target1 = dev_fr['label'] if eval_type == 'valid' else None
    target2 = dev_ep['label'] if eval_type == 'valid' else None

    eval_acc = []

    for sent, target, diag_lens in zip([sent1, sent2], [target1, target2],
                                       [dev_fr_lens, dev_ep_lens]):
        correct = 0
        stidx = 0
        for batch_size in diag_lens:
            sent_batch, len_batch = get_batch(sent[stidx:stidx + batch_size],
                                              word_vec,
                                              embed_size=params.embed_size)

            sent_batch = Variable(
                sent_batch.cuda()) if params.use_cuda else Variable(
                    sent_batch.cpu())
            label_batch = Variable(torch.LongTensor(target[stidx:stidx + batch_size])).cuda() if params.use_cuda else \
                Variable(torch.LongTensor(target[stidx:stidx + batch_size])).cpu()

            stidx += batch_size

            output = model((sent_batch, len_batch))

            pred = output.data.max(1)[1]

            # counting
            correct_count, labels_count = compute_acc(
                pred=pred.long(),
                label=label_batch.data.long(),
                correct_count=correct_count,
                labels_count=labels_count)

            # correct += pred.long().eq(label_batch.data.long()).cuda().sum() if params.use_cuda else pred.long().eq(
            #     label_batch.data.long()).cpu().sum()

        correct = list(correct_count.values())
        total = list(labels_count.values())
        correct = np.array(correct)
        total = np.array(total)
        acc = np.round(100 * correct / total, 2)

        eval_wa = round(100 * sum(correct[:4]) / sum(total[:4]), 1)
        eval_uwa = round(sum(acc[:4]) / 4, 1)
        eval_acc.append([eval_wa, eval_uwa])
        print("accuracy for each category\n{}".format(acc))
        print("wa: {}".format(eval_wa))
        print("uwa: {}".format(eval_uwa))
    return eval_acc
def chat():
    """ in test mode, we don't to create the backward path
    """
    _, enc_vocab = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
    inv_dec_vocab, _ = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.dec'))

    model = ChatBotModel(True, batch_size=1)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)
        output_file = open(
            os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]
        print(
            'Welcome to TensorBro. Say something. Enter to exit. Max length is',
            max_length)
        # store a line history for 3 lines
        conversation_history = []
        line_history = ['', '', '']
        while True:
            line = _get_user_input()
            if len(line) > 0 and line[-1] == '\n':
                line = line[:-1]
                # update the line_history
                line_history.append(line)
                line_history.pop(0)
                # create line from the line history
                line = ''.join(line_history)
            if line == '':
                break
            output_file.write('HUMAN ++++ ' + line + '\n')
            # Get token-ids for the input sentence.
            token_ids = data.sentence2id(enc_vocab, str(line))
            if (len(token_ids) > max_length):
                print('Max length I can handle is:', max_length)
                line = _get_user_input()
                continue
            # Which bucket does it belong to?
            bucket_id = _find_right_bucket(len(token_ids))
            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
                [(token_ids, [])], bucket_id, batch_size=1)
            # Get output logits for the sentence.
            _, _, output_logits = run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id, True)
            response = _construct_response(output_logits, inv_dec_vocab)
            print(response)
            conversation_history.append((line, response))
            output_file.write('BOT ++++ ' + response + '\n')
        output_file.write('=============================================\n')
        output_file.close()
Exemplo n.º 19
0
def test(model, testFile):
	batch_size = 1000
	testGen = csv_generator(open(testFile))

	testGen, x_batch, y_batch = get_batch(batch_size, testGen, testFile)
	predicted = model.forward(x_batch)
	accuracy = score_accuracy(predicted, y_batch)
	print "Test Accuracy (batch_size=", batch_size, "):", accuracy
Exemplo n.º 20
0
def train():
    """ Train the bot """
    test_buckets, data_buckets, train_buckets_scale = _get_buckets()
    # 버킷별로 샘플을 채워서 읽어온다!!

    # in train mode, we need to create the backward path, so forwrad_only is False
    model = ChatBotModel(False, config.BATCH_SIZE)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        print('Running session')
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)  # 세션을 리스토어 할 수 있으면 해오고

        iteration = model.global_step.eval()  # global step을 불러온다.
        total_loss = 0
        while True:
            skip_step = _get_skip_step(
                iteration)  # skip_step을 얻어온다. 100보다 적으면 30, 아니면 100
            bucket_id = _get_random_bucket(
                train_buckets_scale)  # 버킷 아이디를 랜덤으로 고른다

            # 선택된 버킷으로부터 batch_size만큼의 배치를 받아온다
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
                data_buckets[bucket_id],
                bucket_id,
                batch_size=config.BATCH_SIZE)
            start = time.time()

            # step run!! forward_only = False
            _, step_loss, _ = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, False)
            total_loss += step_loss
            iteration += 1

            # skip_step마다 누적해두었던 loss와 걸린 시간을 보고한다
            # 그리고 다시 초기화
            # 세션 저장
            if iteration % skip_step == 0:
                print('Iter {}: loss {}, time {}'.format(
                    iteration, total_loss / skip_step,
                    time.time() - start))
                start = time.time()
                total_loss = 0
                saver.save(sess,
                           os.path.join(config.CPT_PATH, 'chatbot'),
                           global_step=model.global_step)

                # skip_step의 10번을 돌았으면 test 버킷에서 버킷 id 별로 테스트를 한번씩 돈다
                if iteration % (10 * skip_step) == 0:
                    # Run evals on development set and print their loss
                    _eval_test_set(sess, model, test_buckets)
                    start = time.time()
                sys.stdout.flush()
Exemplo n.º 21
0
def fill_feed_dict(data, img1_pl, img2_pl, flo_pl):
    img1_feed, img2_feed, flo_feed = data.get_batch(batch_size)
    # test
    # cv2.imshow('img1', img1_feed[3].astype(np.uint8))
    # cv2.imshow('img2', img2_feed[1].astype(np.uint8))
    # cv2.waitKey()

    feed_dict = {img1_pl: img1_feed, img2_pl: img2_feed, flo_pl: flo_feed}
    return feed_dict
Exemplo n.º 22
0
def chat(use_attention, ckpt_path="./ckp-dir/checkpoints"):
    """ in test mode, we don't to create the backward path
    """
    _, enc_vocab = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
    inv_dec_vocab, _ = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.dec'))

    if not use_attention:
        model = BasicChatBotModel(batch_size=1)
    else:
        model = AttentionChatBotModel(batch_size=1)
    model.build()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver, ckpt_path)
        output_file = open(os.path.join(
            config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]
        print(
            'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length)
        while True:
            line = _get_user_input()
            if len(line) > 0 and line[-1] == b'\n':
                line = line[:-1]
            if line == b'':
                break
            output_file.write('HUMAN ++++ ' + line.decode('ascii') + '\n')
            # Get token-ids for the input sentence.
            token_ids = data.sentence2id(enc_vocab, line)
            if len(token_ids) > max_length:
                print('Max length I can handle is:', max_length)
                line = _get_user_input()
                continue
            # Which bucket does it belong to?
            # bucket_id = _find_right_bucket(len(token_ids))
            bucket_id = -1
            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])],
                                                                           bucket_id,
                                                                           batch_size=1)
            # Get output logits for the sentence.
            decoder_lens = np.sum(np.transpose(np.array(decoder_masks), (1, 0)), axis=1)
            output_logits = sess.run([model.final_outputs],
                                     feed_dict={model.encoder_inputs_tensor: encoder_inputs,
                                                model.decoder_inputs_tensor: decoder_inputs,
                                                model.decoder_length_tensor: decoder_lens,
                                                model.bucket_length: config.BUCKETS[bucket_id]})
            response = _construct_response(output_logits, inv_dec_vocab)
            print(response)
            output_file.write('BOT ++++ ' + response + '\n')
        output_file.write('=============================================\n')
        output_file.close()
Exemplo n.º 23
0
def train(epoch):
    """Train DETM on data for one epoch.
    """
    model.train()
    acc_loss = 0
    acc_nll = 0
    acc_kl_theta_loss = 0
    acc_kl_eta_loss = 0
    acc_kl_alpha_loss = 0
    cnt = 0
    indices = torch.randperm(args.num_docs_train)
    indices = torch.split(indices, args.batch_size) 
    for idx, ind in enumerate(indices):
        optimizer.zero_grad()
        model.zero_grad()
        data_batch, times_batch = data.get_batch(
            train_tokens, train_counts, ind, args.vocab_size, args.emb_size, temporal=True, times=train_times)
        sums = data_batch.sum(1).unsqueeze(1)
        if args.bow_norm:
            normalized_data_batch = data_batch / sums
        else:
            normalized_data_batch = data_batch

        loss, nll, kl_alpha, kl_eta, kl_theta = model(data_batch, normalized_data_batch, times_batch, train_rnn_inp, args.num_docs_train)
        loss.backward()
        if args.clip > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        acc_loss += torch.sum(loss).item()
        acc_nll += torch.sum(nll).item()
        acc_kl_theta_loss += torch.sum(kl_theta).item()
        acc_kl_eta_loss += torch.sum(kl_eta).item()
        acc_kl_alpha_loss += torch.sum(kl_alpha).item()
        cnt += 1

        if idx % args.log_interval == 0 and idx > 0:
            cur_loss = round(acc_loss / cnt, 2) 
            cur_nll = round(acc_nll / cnt, 2) 
            cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) 
            cur_kl_eta = round(acc_kl_eta_loss / cnt, 2) 
            cur_kl_alpha = round(acc_kl_alpha_loss / cnt, 2) 
            lr = optimizer.param_groups[0]['lr']
            print('Epoch: {} .. batch: {}/{} .. LR: {} .. KL_theta: {} .. KL_eta: {} .. KL_alpha: {} .. Rec_loss: {} .. NELBO: {}'.format(
                epoch, idx, len(indices), lr, cur_kl_theta, cur_kl_eta, cur_kl_alpha, cur_nll, cur_loss))
    
    cur_loss = round(acc_loss / cnt, 2) 
    cur_nll = round(acc_nll / cnt, 2) 
    cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) 
    cur_kl_eta = round(acc_kl_eta_loss / cnt, 2) 
    cur_kl_alpha = round(acc_kl_alpha_loss / cnt, 2) 
    lr = optimizer.param_groups[0]['lr']
    print('*'*100)
    print('Epoch----->{} .. LR: {} .. KL_theta: {} .. KL_eta: {} .. KL_alpha: {} .. Rec_loss: {} .. NELBO: {}'.format(
            epoch, lr, cur_kl_theta, cur_kl_eta, cur_kl_alpha, cur_nll, cur_loss))
    print('*'*100)
Exemplo n.º 24
0
def chat():
    """ in test mode, we don't to create the backward path
    """
    # index2word , word2index
    _, enc_vocab = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
    inv_dec_vocab, _ = data.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.dec'))

    model = ChatBotModel(True, batch_size=1)  # 배치 사이즈는 하나 (forward only)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)
        output_file = open(
            os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]  # 유저가 타이핑할 수 있는 최대 길이는 버킷의 최대길이
        print(
            'Welcome to TensorBro. Say something. Enter to exit. Max length is',
            max_length)
        while True:
            line = _get_user_input()  # 시스템 인풋을 받아온다
            if len(line) > 0 and line[-1] == '\n':
                line = line[:-1]
            if line == '':  # 아무것도 타이핑 안하면 브레이크
                break
            output_file.write('HUMAN ++++ ' + line + '\n')  # 아웃풋 파일에 한줄씩 기록
            # Get token-ids for the input sentence.
            token_ids = data.sentence2id(enc_vocab, str(line))  # 문장 하나를 index로
            if (len(token_ids) > max_length):  # 만약 최대 길이보다 더 받았으면 다시 타이핑 받게 한다
                print('Max length I can handle is:', max_length)
                line = _get_user_input()
                continue
            # Which bucket does it belong to?
            bucket_id = _find_right_bucket(
                len(token_ids))  # 입력 시퀀스의 길이에 맞는 버킷(최소) id 골라온다
            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
                [(token_ids, [])],  # 디코더 인풋은 x 전부 패딩되서 들어가는듯
                bucket_id,
                batch_size=1)
            # Get output logits for the sentence.
            _, _, output_logits = run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id,
                                           True)  # forward_only == True
            response = _construct_response(
                output_logits, inv_dec_vocab)  # id2word로 복구해서 다시 리스폰스로
            print(response)
            output_file.write('BOT ++++ ' + response + '\n')
        output_file.write('=============================================\n')
        output_file.close()
Exemplo n.º 25
0
def get_cluster_quality():
    """Returns cluster quality.
    """

    print('Getting vocabulary ...')
    data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df))
    vocab, cluster_valid = data.get_all_data(data_file, temporal=True)
    vocab_size = len(vocab)
    topics_distributions = []

    # get data
    print('Getting full data ...')
    tokens = train['tokens']
    counts = train['counts']
    times = train['times']
    num_times = len(np.unique(train_times))
    num_docs = len(tokens)
    rnn_inp = data.get_rnn_input(tokens, counts, times, num_times, vocab_size, num_docs)
    model.eval()
    with torch.no_grad():
        indices = torch.split(torch.tensor(range(num_docs)), args.eval_batch_size)

        eta = get_eta(rnn_inp)

        acc_loss = 0
        cnt = 0
        for idx, ind in enumerate(indices):
            data_batch, times_batch = data.get_batch(
                tokens, counts, ind, vocab_size, args.emb_size, temporal=True, times=times)
            sums = data_batch.sum(1).unsqueeze(1)
            if args.bow_norm:
                normalized_data_batch = data_batch / sums
            else:
                normalized_data_batch = data_batch

            eta_td = eta[times_batch.type('torch.LongTensor')]
            theta = get_theta(eta_td, normalized_data_batch)


        print('\n')
        print('Get topic coherence...')
        print('train_tokens: ', train_tokens[0])
        TC_all = []
        cnt_all = []
        for tt in range(args.num_times):
            tc, cnt = get_topic_coherence(beta[:, tt, :].detach().numpy(), train_tokens, vocab)
            TC_all.append(tc)
            cnt_all.append(cnt)
        print('TC_all: ', TC_all)
        TC_all = torch.tensor(TC_all)
        print('TC_all: ', TC_all.size())
        print('\n')
        print('Get topic quality...')
        quality = tc * diversity
        print('Topic Quality is: {}'.format(quality))
        print('#'*100)
Exemplo n.º 26
0
def inference(infer_data):
    nli_net.eval()
    prob_res_1 = []
    s1 = infer_data['s1']
    s2 = infer_data['s2']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size].tolist(), word_vec)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size].tolist(), word_vec)
        s1_batch, s2_batch = Variable(s1_batch), Variable(s2_batch)

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        # get softmax probability
        sm = nn.Softmax()
        res = sm(output.data)[:,1]
        prob_res_1 += res.data.tolist()
        return prob_res_1
Exemplo n.º 27
0
def write_mixed_video(sess, x, y_, lstm_init_state, y_net, mse, current_state):
    _current_state = np.zeros((LSTM_INFO[1], 2, BATCH_SIZE, LSTM_INFO[0]))
    accumulator = []
    pointer_v = 0
    print('Creating mixed video data')

    if DATASET_NAME == 'moving_mnist':
        get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None, None)
    if DATASET_NAME == 'moving_mnist_sin':
        get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None,
                  None)

    for i in range(steps_mix):
        # obtain testing batch
        batch_i, batch_t, pointer_v = get_batch('testing', DATASET_NAME,
                                                BATCH_SIZE, pointer_v,
                                                INPUT_WIDTH, INPUT_HEIGHT)
        if pointer_v == 0:
            _current_state = np.zeros(
                (LSTM_INFO[1], 2, BATCH_SIZE, LSTM_INFO[0]))

        # evaluation step
        _y_net, _mse, _current_state = sess.run([y_net, mse, current_state],
                                                feed_dict={
                                                    x: batch_i,
                                                    y_: batch_t,
                                                    lstm_init_state:
                                                    _current_state
                                                })

        if DATASET_NAME == "moving_mnist" and (
            (i + 1) * BATCH_SIZE) % (2 * MIXED_SEQUENCE_LENGTH) == 0:
            get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None,
                      None)
        if DATASET_NAME == "moving_mnist_sin" and (
            (i + 1) * BATCH_SIZE) % (2 * MIXED_SEQUENCE_LENGTH) == 0:
            get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None,
                      None)

        if accumulator == []:
            accumulator = batch_t
        else:
            if floor(i * BATCH_SIZE / MIXED_SEQUENCE_LENGTH) % 2 == 1:
                accumulator = np.append(accumulator, _y_net, axis=0)
            else:
                accumulator = np.append(accumulator, batch_t, axis=0)
        print('step {} of {}, error: {}'.format(i, steps_mix, _mse))
    write_video('mix', accumulator)
Exemplo n.º 28
0
def evaluate(epoch, valid, params, word_vec, shared_nli_net, eval_type, pred_file):
  shared_nli_net.eval()
  correct = 0.
  global val_acc_best, lr, stop_training, adam_stop

  #if eval_type == 'valid':
  print('\n{0} : Epoch {1}'.format(eval_type, epoch))

  hypoths = valid['hypoths'] #if eval_type == 'valid' else test['s1']
  premises = valid['premises'] #if eval_type == 'valid' else test['s2']
  target = valid['lbls']

  out_preds_f = open(pred_file, "wb")

  for i in range(0, len(hypoths), params.batch_size):
    # prepare batch
    hypoths_batch, hypoths_len = get_batch(hypoths[i:i + params.batch_size], word_vec)
    premises_batch, premises_len = get_batch(premises[i:i + params.batch_size], word_vec)
    tgt_batch = None
    if params.gpu_id > -1:
      hypoths_batch = Variable(hypoths_batch.cuda())
      premises_batch = Variable(premises_batch.cuda())
      tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()
    else:
      hypoths_batch = Variable(hypoths_batch)
      premises_batch = Variable(premises_batch)
      tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size]))

    # model forward
    output = shared_nli_net((premises_batch, premises_len), (hypoths_batch, hypoths_len))

    all_preds = output.data.max(1)[1]
    for pred in all_preds:
      out_preds_f.write(IDX2LBL[pred.item()] + "\n")
    correct += all_preds.long().eq(tgt_batch.data.long()).cpu().sum()

  out_preds_f.close()
  # save model
  eval_acc = round(100.0 * correct / len(hypoths), 2)
  print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))

  return eval_acc
Exemplo n.º 29
0
def main(_):
    if not os.path.exists(video_path):
        os.makedirs(video_path)

    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(MODEL_PATH + MODEL_NAME + '.meta')
        saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))

        graph = tf.get_default_graph()
        lstm_init_state = graph.get_tensor_by_name("lstm_state:0")
        x = graph.get_tensor_by_name("input:0")
        y_ = graph.get_tensor_by_name("target:0")
        y_net = tf.get_collection("y_net")[0]
        if ADDITIONAL_OUTPUT:
            mse = tf.get_collection("mse1")[0]
        else:
            mse = tf.get_collection("mse")[0]
        train_step = tf.get_collection("train_step")[0]
        current_state = get_collection_rnn_state("current_state")

        assert FRAMES_NUM % BATCH_SIZE == 0, "Number of frames should be a multiple " \
                                             "of the batch_size used while training"

        if DATASET_NAME == 'moving_mnist':
            get_batch('reset_test', 'moving_mnist', BATCH_SIZE, None, None,
                      None)
        if DATASET_NAME == 'moving_mnist_sin':
            get_batch('reset_test', 'moving_mnist_sin', BATCH_SIZE, None, None,
                      None)
        # obtain original batch
        batch_i, _, _ = get_batch('testing', DATASET_NAME, FRAMES_NUM, -1,
                                  INPUT_WIDTH, INPUT_HEIGHT)

        write_video('original', batch_i)
        write_parallel_video(sess, x, y_, lstm_init_state, y_net, mse,
                             current_state)
        for n in INTERLACED_N:
            write_interlaced_video(sess, n[0], n[1], x, y_, lstm_init_state,
                                   y_net, mse, current_state)
        if MIXED_VIDEO:
            write_mixed_video(sess, x, y_, lstm_init_state, y_net, mse,
                              current_state)
Exemplo n.º 30
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)

    sent_lens = list(train_data)
    random.shuffle(sent_lens)
    num_seqs = 0
    for sent_len in sent_lens:
        for batch, i in enumerate(
                range(0, train_data[sent_len].size(1) - 1, args.batch_size)):
            # print(model.rnn.cell.w_f.weight)
            data, targets = get_batch(train_data[sent_len],
                                      i,
                                      args.batch_size,
                                      prefix_len=sent_len - 1)
            actual_batch_size = data.shape[1]
            if args.unk:
                data = add_unk(data, corpus)

            # For the last batch the batch size may be smaller:
            hidden = model.init_hidden(actual_batch_size)
            model.zero_grad()
            output, hidden = model(data, hidden)
            flat_dim = actual_batch_size * (sent_len - 1)
            loss = criterion(output.view(flat_dim, -1),
                             targets.contiguous().view(flat_dim))
            loss.backward()

            # Haven't seen any benefit but this would go here:
            # torch.nn.utils.clip_grad_norm_(model.parameters(),0.1)
            optimizer.step()

            total_loss += loss.data
            num_seqs += data.shape[1]

            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss.item() / args.log_interval
                elapsed = time.time() - start_time
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr (ADAM) | ms/batch {:5.2f} | '
                    'loss {:5.2f} | {:5d} sequences | ppl NA'.format(
                        epoch, batch,
                        len(train_data) // args.prefix_len,
                        elapsed * 1000 / args.log_interval, cur_loss,
                        num_seqs))  # , math.exp(cur_loss)))
                model.update_callback(epoch, batch)
                total_loss = 0
                start_time = time.time()

    model.epoch_callback(epoch, args.epochs)
    return num_seqs
Exemplo n.º 31
0
def evaluate(cv, epoch, dev, model, optimizer, final_eval=False):
    model.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop

    print('\nVALIDATION : Epoch {0}'.format(epoch))

    for i in range(0, len(dev), params.batch_size):
        # prepare batch

        label_batch, q1_batch, q1_len, q2_batch, q2_len = get_batch(
            questions_dict,
            dev[i:i + params.batch_size],
            word_vec,
            random_flip=False,
            feature=params.feature)

        q1_batch, q2_batch = Variable(q1_batch).cuda(), Variable(
            q2_batch).cuda()
        tgt_batch = Variable(torch.FloatTensor(label_batch)).cuda()

        # model forward
        output = model((q1_batch, q1_len), (q2_batch, q2_len))

        pred = output.data > 0
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum().numpy()

    # save model
    eval_acc = round(100 * correct / len(dev), 4)
    if final_eval:
        print('finalgrep : accuracy: {0}'.format(eval_acc))
    else:
        print(
            'togrep : results : epoch {0} ; mean accuracy:\
              {1}'.format(epoch, eval_acc))

    if epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            torch.save(model, os.path.join(params.save_dir, "%d.pkl" % (cv)))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0][
                    'lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'.format(
                    params.lrshrink, optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    return eval_acc
Exemplo n.º 32
0
def _eval_test_set(sess, model, test_buckets):
    """ Evaluate on the test set. """
    for bucket_id in xrange(len(config.BUCKETS)):
        if len(test_buckets[bucket_id]) == 0:
            print("  Test: empty bucket %d" % (bucket_id))
            continue
        start = time.time()
        encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(test_buckets[bucket_id], 
                                                                        bucket_id,
                                                                        batch_size=config.BATCH_SIZE)
        _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, 
                                   decoder_masks, bucket_id, True)
        print('Test bucket {}: loss {}, time {}'.format(bucket_id, step_loss, time.time() - start))
Exemplo n.º 33
0
 def run(self):
     with tf.Graph().as_default():
         x = tf.placeholder(dtype=tf.float32, shape=[None, self.input_dim], name='x')
         x_ = tf.placeholder(dtype=tf.float32, shape=[None, self.input_dim], name='x_')
         encoded, decoded = self.forward(x)
         loss, train_op = self.train(x_, decoded)
         with tf.Session() as sess:
             sess.run(tf.initialize_all_variables())
             for i in range(self.epoch):
                 for j in range(50):
                     b_x, b_x_ = get_batch(self.data_x, self.data_x_, self.batch_size)
                     sess.run(train_op, feed_dict={x: b_x, x_: b_x_})
                 if i % 100 == 0:
                     l = sess.run(loss, feed_dict={x: self.data_x, x_: self.data_x_})
                     print('epoch {0}: global loss = {1}'.format(i, l))
             self.hidden_feature = sess.run(encoded, feed_dict={x: self.data_x_})
Exemplo n.º 34
0
def chat():
    """ in test mode, we don't to create the backward path
    """
    _, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
    inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec'))

    model = ChatBotModel(True, batch_size=1)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)
        output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]
        print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length)
        while True:
            line = _get_user_input()
            if len(line) > 0 and line[-1] == '\n':
                line = line[:-1]
            if line == '':
                break
            output_file.write('HUMAN ++++ ' + line + '\n')
            # Get token-ids for the input sentence.
            token_ids = data.sentence2id(enc_vocab, str(line))
            if (len(token_ids) > max_length):
                print('Max length I can handle is:', max_length)
                line = _get_user_input()
                continue
            # Which bucket does it belong to?
            bucket_id = _find_right_bucket(len(token_ids))
            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], 
                                                                            bucket_id,
                                                                            batch_size=1)
            # Get output logits for the sentence.
            _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs,
                                           decoder_masks, bucket_id, True)
            response = _construct_response(output_logits, inv_dec_vocab)
            print(response)
            output_file.write('BOT ++++ ' + response + '\n')
        output_file.write('=============================================\n')
        output_file.close()
Exemplo n.º 35
0
def train():
    """ Train the bot """
    test_buckets, data_buckets, train_buckets_scale = _get_buckets()
    # in train mode, we need to create the backward path, so forwrad_only is False
    model = ChatBotModel(False, config.BATCH_SIZE)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        print('Running session')
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)

        iteration = model.global_step.eval()
        total_loss = 0
        while True:
            skip_step = _get_skip_step(iteration)
            bucket_id = _get_random_bucket(train_buckets_scale)
            encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(data_buckets[bucket_id], 
                                                                           bucket_id,
                                                                           batch_size=config.BATCH_SIZE)
            start = time.time()
            _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False)
            total_loss += step_loss
            iteration += 1

            if iteration % skip_step == 0:
                print('Iter {}: loss {}, time {}'.format(iteration, total_loss/skip_step, time.time() - start))
                start = time.time()
                total_loss = 0
                saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step)
                if iteration % (10 * skip_step) == 0:
                    # Run evals on development set and print their loss
                    _eval_test_set(sess, model, test_buckets)
                    start = time.time()
                sys.stdout.flush()
Exemplo n.º 36
0
def trainepoch(epoch):
    print('\nTRAINING : Epoch ' + str(epoch))
    nli_net.train()
    all_costs = []
    logs = []
    words_count = 0

    last_time = time.time()
    correct = 0.
    # shuffle the data
    permutation = np.random.permutation(len(train['s1']))

    s1 = train['s1'][permutation]
    s2 = train['s2'][permutation]
    target = train['label'][permutation]


    optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\
        and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr']
    print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))

    for stidx in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size],
                                     word_vec)
        s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size],
                                     word_vec)
        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
        tgt_batch = Variable(torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda()
        k = s1_batch.size(1)  # actual batch size

        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
        assert len(pred) == len(s1[stidx:stidx + params.batch_size])

        # loss
        loss = loss_fn(output, tgt_batch)
        all_costs.append(loss.data[0])
        words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient clipping (off by default)
        shrink_factor = 1
        total_norm = 0

        for p in nli_net.parameters():
            if p.requires_grad:
                p.grad.data.div_(k)  # divide by the actual batch size
                total_norm += p.grad.data.norm() ** 2
        total_norm = np.sqrt(total_norm)

        if total_norm > params.max_norm:
            shrink_factor = params.max_norm / total_norm
        current_lr = optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam)
        optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update

        # optimizer step
        optimizer.step()
        optimizer.param_groups[0]['lr'] = current_lr

        if len(all_costs) == 100:
            logs.append('{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}'.format(
                            stidx, round(np.mean(all_costs), 2),
                            int(len(all_costs) * params.batch_size / (time.time() - last_time)),
                            int(words_count * 1.0 / (time.time() - last_time)),
                            round(100.*correct/(stidx+k), 2)))
            print(logs[-1])
            last_time = time.time()
            words_count = 0
            all_costs = []
    train_acc = round(100 * correct/len(s1), 2)
    print('results : epoch {0} ; mean accuracy train : {1}'
          .format(epoch, train_acc))
    return train_acc
Exemplo n.º 37
0
def main():

    params = Params()

    model = RegressionModel(params)

    # Use functions of the model to build the graph

    out, states = model.inference(model.data_placeholder)
    loss = model.loss(out, model.labels_placeholder)
    train_op = model.train(loss, params.step_size)

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)
    saver = tf.train.Saver(tf.all_variables())

    for i in range(params.train_steps + 1):
        data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels)
        feed_dict = {
            model.data_placeholder: data,
            model.labels_placeholder: labels
        }

        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        if i % params.print_every == 0:
            print i, loss_value
        if i % params.save_every == 0:
            name = "model_{0}.ckpt".format(params.get_id)
            checkpoint_path = os.path.join('./save', name)
            # TODO: If we restore a model for further training, we should
            # add the number of training steps it had completed to our global step here
            saver.save(sess, checkpoint_path, global_step=i)
            print "model saved to {0}-{1}".format(checkpoint_path, i)
            with open('./save/{0}.model_param'.format(params.get_id), 'w') as f:
                pickle.dump(params,
                            f,
                            protocol=2 # pickle.HIGHEST_PROTOCOL as of writing
                            )

    data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels)
    feed_dict = {
            model.data_placeholder: data,
            model.labels_placeholder: labels
    }

    vars = sess.run(out + states, feed_dict)
    out_ = vars[0:len(out)]
    states_ = vars[len(out)+1:]

    d = data[0,0,:]
    o = np.array(out_)[:, 0, 0]
    l = labels[0,0,:]

    x1 = range(d.shape[0])
    x2 = range(1, d.shape[0] + 1)
    # TODO: output graph every 100 steps
    plt.scatter(x1, d, c='r')
    plt.scatter(x2, o, c='g')
    plt.scatter(x2, l, c='b', alpha=0.5)
    plt.show()

    print "data third dim", d

    print "out", o
    # print "states", np.array(states_)

    print "labels third dim", l