def _eval_test_set(sess, model, test_buckets):
    # Evaluate on the test set
    for bucket_id in range(len(settings.BUCKETS)):
        if len(test_buckets[bucket_id]) == 0:
            print("\nEmpty test bucket {}".format(settings.BUCKETS[bucket_id]))
            continue

        # Run forward only on test batch
        encoder_inputs, decoder_inputs, decoder_masks = util.get_batch(
            test_buckets[bucket_id], bucket_id,
            batch_size=settings.BATCH_SIZE)  #settings.BATCH_SIZE)
        _, step_loss, logits = run_step(sess, model, encoder_inputs,
                                        decoder_inputs, decoder_masks,
                                        bucket_id, True)

        print("\nEVALUATING ON TEST SET")
        print("\n{} | test bucket {}; test loss {}\n".format(
            time.strftime("%c"), settings.BUCKETS[bucket_id], step_loss))

        # Print random example of Q/A
        example = random.choice(range(settings.BATCH_SIZE))
        question = [
            encoder_inputs[char][example]
            for char in range(len(encoder_inputs))
        ]

        print_encoder(question, model.encoder_to_words)
        print_decoder(logits, model.decoder_to_words, example)
def fetch_batch_and_train(sents, docs, tags, model, seq_len, i, p1, p2):
    (tm_costs, tm_words, lm_costs, lm_words) = p1
    (m_tm_cost, m_tm_train, m_lm_cost, m_lm_train) = p2
    x, y, m, d, t = get_batch(sents, docs, tags, i, cf.doc_len, seq_len, cf.tag_len, cf.batch_size, 0, \
        (True if isinstance(model, LM) else False))

    if isinstance(model, LM):
        if cf.topic_number > 0:
            tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
                {model.x: x, model.y: y, model.lm_mask: m, model.doc: d, model.tag: t})
        else:
            #pure lstm
            tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
                {model.x: x, model.y: y, model.lm_mask: m})
    else:
        tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
            {model.y: y, model.tm_mask: m, model.doc: d, model.tag: t})

    if tm_cost != None:
        tm_costs += tm_cost * cf.batch_size  #keep track of full batch loss (not per example batch loss)
        tm_words += np.sum(m)
    if lm_cost != None:
        lm_costs += lm_cost * cf.batch_size
        lm_words += np.sum(m)

    return tm_costs, tm_words, lm_costs, lm_words
예제 #3
0
    def fit(self, x_train, num_epochs=1, print_every=0):
        """
        Method to train GAN.

        Parameters
        ----------
        print_every : int
            Print loss information every |print_every| number of batches. If 0
            prints nothing.
        """
        num_batches = x_train.shape[0] / self.model.batch_size
        print("num batches {}".format(num_batches))

        for epoch in range(num_epochs):
            print("\nEpoch {}".format(epoch + 1))

            for batch in range(num_batches):
                x_batch = get_batch(x_train, self.model.batch_size)
                self.train_discriminator()
                self.train_discriminator(x_batch)
                self.train_gan()
                if print_every and batch % print_every == 0:
                    print("GAN loss {} \t D loss {} \t Entropy {}".format(
                        self.g_loss_history[-1], self.d_loss_history[-1],
                        self.ent_loss_history[-1]))
예제 #4
0
def log(config, data, patterns, word2idx_dict, model, sess, label="train", entropy=None):
    golds, preds, vals, sim_preds, sim_vals = [], [], [], [], []
    for batch in get_batch(config, data, word2idx_dict):
        gold, pred, val, sim_pred, sim_val = sess.run([model.gold, model.pred, model.max_val, model.sim_pred, model.sim_max_val],
                                                      feed_dict=get_feeddict(model, batch, patterns, is_train=False))
        golds += gold.tolist()
        preds += pred.tolist()
        vals += val.tolist()
        sim_preds += sim_pred.tolist()
        sim_vals += sim_val.tolist()

    threshold = [0.01 * i for i in range(1, 200)]
    acc, recall, f1 = 0., 0., 0.
    best_entro = 0.

    if entropy is None:
        for t in threshold:
            _preds = (np.asarray(vals, dtype=np.float32) <= t).astype(np.int32) * np.asarray(preds, dtype=np.int32)
            _preds = _preds.tolist()
            _acc, _recall, _f1 = evaluate(golds, _preds)
            if _f1 > f1:
                acc, recall, f1 = _acc, _recall, _f1
                best_entro = t
    else:
        preds = (np.asarray(vals, dtype=np.float32) <= entropy).astype(np.int32) * np.asarray(preds, dtype=np.int32)
        preds = preds.tolist()
        acc, recall, f1 = evaluate(golds, preds)
    return (acc, recall, f1), best_entro
예제 #5
0
def train(config, data):
    word2idx_dict, word_emb, train_data, dev_data, test_data = data
    patterns = get_patterns(config, word2idx_dict)

    with tf.variable_scope("models"):
        if config.dataset == "tacred":
            import tacred_constant as constant
        else:
            import semeval_constant as constant
        regex = Pat_Match(config, constant.LABEL_TO_ID)
        match = Soft_Match(config, word_mat=word_emb, word2idx_dict=word2idx_dict)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    labeled_data = []
    unlabeled_data = []
    for x in train_data:
        batch = [x["tokens"]]
        res, pred = regex.match(batch)
        patterns["weights"] += res[0]
        if np.amax(res) > 0:
            x["rel"] = pred.tolist()[0]
            x["pat"] = np.argmax(res, axis=1).tolist()[0]
            labeled_data.append(x)
        else:
            x["rel"] = 0
            unlabeled_data.append(x)
    patterns["weights"] = patterns["weights"] / np.sum(patterns["weights"])
    random.shuffle(unlabeled_data)
    print("{} labeled data".format(len(labeled_data)))

    dev_history, test_history = [], []

    with tf.Session(config=sess_config) as sess:

        lr = float(config.init_lr)
        sess.run(tf.global_variables_initializer())

        for epoch in tqdm(range(1, config.num_epoch + 1), desc="Epoch"):
            for batch1, batch2 in zip(get_batch(config, labeled_data, word2idx_dict), get_batch(config, unlabeled_data, word2idx_dict, pseudo=True)):
                batch = merge_batch(batch1, batch2)
                loss, _ = sess.run([match.loss, match.train_op], feed_dict=get_feeddict(match, batch, patterns))

            (dev_acc, dev_rec, dev_f1), best_entro = log(config, dev_data, patterns, word2idx_dict, match, sess, "dev")
            (test_acc, test_rec, test_f1), _ = log(
                config, test_data, patterns, word2idx_dict, match, sess, "test", entropy=best_entro)

            dev_history.append((dev_acc, dev_rec, dev_f1))
            test_history.append((test_acc, test_rec, test_f1))
            if len(dev_history) >= 1 and dev_f1 <= dev_history[-1][2]:
                lr *= config.lr_decay
                sess.run(tf.assign(match.lr, lr))

        max_idx = dev_history.index(max(dev_history, key=lambda x: x[2]))
        max_acc, max_rec, max_f1 = test_history[max_idx]
        print("acc: {}, rec: {}, f1: {}".format(max_acc, max_rec, max_f1))
        sys.stdout.flush()
    return max_acc, max_rec, max_f1
예제 #6
0
def train_model(
    model,
    dataloader,
    place_cells,
    hd_cells,
    num_epochs=10,
    lr=1e-5,
    momentum=0.9,
    weight_decay=1e-5,
    clip=1e-5,
):
    """Train model using CrossEntropy and RMSProp as in paper"""

    hdloss = CrossEntropyLoss().spec("hdcell")
    placeloss = CrossEntropyLoss().spec("placecell")

    params = decay_params(model, ["head", "place", "g"], weight_decay)
    optimizer = torch.optim.RMSprop(params, lr=lr, momentum=momentum)

    losses = []

    tq = tqdm_notebook if in_ipynb() else tqdm

    for k in range(num_epochs):
        model.train()
        epoch_losses = []

        for i, traj in enumerate(tq(dataloader)):
            cs, hs, ego_vel, c0, h0 = get_batch(traj, place_cells, hd_cells)

            optimizer.zero_grad()

            zs, ys, _ = model(ego_vel, c0, h0)

            loss = hdloss(zs, hs) + placeloss(ys, cs)
            epoch_losses.append(loss.item())

            loss.backward()

            # torch.nn.utils.clip_grad_value_(model.head.parameters(), clip)
            # torch.nn.utils.clip_grad_value_(model.place.parameters(), clip)
            torch.nn.utils.clip_grad_value_(model.parameters(), clip)

            optimizer.step()

            if (i + 1) % 1000 == 0 or i + 1 == len(dataloader):
                # Output and visualize progress each epoch
                print(
                    f"epoch {k}, mean loss {np.mean(epoch_losses)}, std loss {np.std(epoch_losses)}"
                )
                visualize_g(model, dataloader, place_cells, hd_cells)
                model.train()
                break
            if i > 1000 * num_epochs:
                return epoch_losses
        losses += epoch_losses
    return losses
def run_epoch(sents, docs, tags, p1, pad_id, cf, idxvocab):
    (tm, lm) = p1
    #generate the batches
    tm_num_batches, lm_num_batches = int(math.ceil(float(len(sents[0]))/cf.batch_size)), \
        int(math.ceil(float(len(sents[1]))/cf.batch_size))

    #run an epoch to compute tm and lm perplexities
    if tm != None:
        tm_costs, tm_words = 0.0, 0.0
        for bi in range(tm_num_batches):
            _, y, m, d, t = get_batch(sents[0], docs[0], tags, bi, cf.doc_len, cf.tm_sent_len, cf.tag_len, cf.batch_size, \
                pad_id, False)
            tm_cost = sess.run(tm.tm_cost, {
                tm.y: y,
                tm.tm_mask: m,
                tm.doc: d,
                tm.tag: t
            })
            tm_costs += tm_cost * cf.batch_size
            tm_words += np.sum(m)
        print("\ntest topic model perplexity = %.3f" %
              (np.exp(tm_costs / tm_words)))

    if lm != None:
        lm_costs, lm_words = 0.0, 0.0
        for bi in range(lm_num_batches):
            x, y, m, d, t = get_batch(sents[1], docs[1], tags, bi, cf.doc_len, cf.lm_sent_len, cf.tag_len, cf.batch_size, \
                pad_id, True)
            lm_cost, tw = sess.run([lm.lm_cost, lm.tm_weights], {
                lm.x: x,
                lm.y: y,
                lm.lm_mask: m,
                lm.doc: d,
                lm.tag: t
            })
            lm_costs += lm_cost * cf.batch_size
            lm_words += np.sum(m)

        print("test language model perplexity = %.3f" %
              (np.exp(lm_costs / lm_words)))
예제 #8
0
def _train_epoch(args, epoch, model, train_data, corpus, device, lr,
                 criterion):
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)

    hidden = model.init_hidden(args.batch_size)

    model.train()
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):

        data, targets = get_batch(args.bptt, train_data, i)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt, lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

            # Log to tensorboard
            info = {
                f'training/{epoch}loss/loss': cur_loss,
                f'training/{epoch}/loss_exp': math.exp(cur_loss),
                'training/lr': lr,
            }

            for tag, value in info.items():
                inject_summary(summary_writer, tag, value, i)

            summary_writer.flush()
예제 #9
0
def evaluate(args, data_source, model, corpus, criterion):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)

    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(args.bptt, data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)

    return total_loss / (len(data_source) - 1)
예제 #10
0
def visualize_g(model, test_iter, place_cells, hd_cells, offset=0, limit=50):
    """Visualize 25 cells in G layer of model (applied to output of LSTM)"""
    model.eval()
    G, P = None, None
    c = 0

    # Get batches up to limit as samples
    for traj in test_iter:
        cs, hs, ego_vel, c0, h0, xs = get_batch(traj,
                                                place_cells,
                                                hd_cells,
                                                pos=True)
        if c > limit:
            break
        zs, gs, ys = model(ego_vel, c0, h0)
        if G is None:
            G = gs.cpu()
            P = xs.cpu()
        else:
            G = ntorch.cat((G, gs.cpu()), "batch")
            P = ntorch.cat((P, xs.cpu()), "batch")
        del ego_vel, cs, xs, hs, zs, ys, gs, h0, c0
        torch.cuda.empty_cache()
        c += 1

    pts = P.stack(("t", "batch"), "pts")
    G = G.stack(("t", "batch"), "pts")

    xs, ys = [pts.get("ax", i).values.detach().numpy() for i in [0, 1]]

    # Plot 5x5 grid of cell activations, starting at offset
    axs = plt.subplots(5, 5, figsize=(50, 50))[1]
    axs = axs.flatten()

    for i, ax in enumerate(axs):
        acts = G.get("placecell", offset + i).values.detach().numpy()
        res = stats.binned_statistic_2d(xs,
                                        ys,
                                        acts,
                                        bins=20,
                                        statistic="mean")[0]
        ax.imshow(res, cmap="jet")
        ax.axis("off")
    plt.show()
def simulator(args, emb_array, labels, max_compare_num, filepath, threshold):
    # Initialize
    fa = 0  # False accept
    wa = 0  # Wrong answer
    fr = 0  # False reject
    accept = 0
    reject = 0

    # Construct database
    database = Database(emb_array.shape[0], max_compare_num)

    # Simulating
    for indx, emb in enumerate(emb_array):
        test_array, test_label = util.get_batch(emb_array, labels, indx)

        if len(database) != 0:  # train_array is not empty
            max_id, max_similarity = database.get_most_similar(test_array)
            # Not intruder
            if threshold < max_similarity:
                accept += 1
                if not database.contains(test_label):
                    fa += 1  # False accept
                elif test_label != database.get_label_by_id(max_id):
                    wa += 1  # Recognition error
            # Intruder
            else:
                reject += 1
                if database.contains(test_label):
                    fr += 1  # False reject

        # Add to database
        database.insert(test_label, test_array)

    #database.print_database()

    # Calculate error
    result_file = util.show_and_save_v3(fa, fr, wa, accept, reject,
                                        max_compare_num, filepath)
    return result_file
idxvocab = []

#constants
pad_symbol = "<pad>"
start_symbol = "<go>"
end_symbol = "<eos>"
unk_symbol = "<unk>"
dummy_symbols = [pad_symbol, start_symbol, end_symbol, unk_symbol]

###########
#functions#
###########

def fetch_batch_and_train(sents, docs, tags, model, seq_len, i, (tm_costs, tm_words, lm_costs, lm_words), \
    (m_tm_cost, m_tm_train, m_lm_cost, m_lm_train)):
    x, y, m, d, t = get_batch(sents, docs, tags, i, cf.doc_len, seq_len, cf.tag_len, cf.batch_size, 0, \
        (True if isinstance(model, LM) else False))

    if isinstance(model, LM):
        if cf.topic_number > 0:
            tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
                {model.x: x, model.y: y, model.lm_mask: m, model.doc: d, model.tag: t})
        else:
            #pure lstm
            tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
                {model.x: x, model.y: y, model.lm_mask: m})
    else:
        tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \
            {model.y: y, model.tm_mask: m, model.doc: d, model.tag: t})

    if tm_cost != None:
        tm_costs += tm_cost * cf.batch_size  #keep track of full batch loss (not per example batch loss)
예제 #13
0
def main(_):

    opts = Options(save_path=FLAGS.save_path,
                   train_biom=FLAGS.train_biom,
                   test_biom=FLAGS.test_biom,
                   train_metadata=FLAGS.train_metadata,
                   test_metadata=FLAGS.test_metadata,
                   formula=FLAGS.formula,
                   tree=FLAGS.tree,
                   learning_rate=FLAGS.learning_rate,
                   clipping_size=FLAGS.clipping_size,
                   beta_mean=FLAGS.beta_mean,
                   beta_scale=FLAGS.beta_scale,
                   gamma_mean=FLAGS.gamma_mean,
                   gamma_scale=FLAGS.gamma_scale,
                   epochs_to_train=FLAGS.epochs_to_train,
                   num_neg_samples=FLAGS.num_neg_samples,
                   batch_size=FLAGS.batch_size,
                   min_sample_count=FLAGS.min_sample_count,
                   min_feature_count=FLAGS.min_feature_count,
                   statistics_interval=FLAGS.statistics_interval,
                   summary_interval=FLAGS.summary_interval,
                   checkpoint_interval=FLAGS.checkpoint_interval)

    # preprocessing
    train_table, train_metadata = opts.train_table, opts.train_metadata
    train_metadata = train_metadata.loc[train_table.ids(axis='sample')]

    sample_filter = lambda val, id_, md: (
        (id_ in train_metadata.index) and np.sum(val) > opts.min_sample_count)
    read_filter = lambda val, id_, md: np.sum(val) > opts.min_feature_count
    metadata_filter = lambda val, id_, md: id_ in train_metadata.index

    train_table = train_table.filter(metadata_filter, axis='sample')
    train_table = train_table.filter(sample_filter, axis='sample')
    train_table = train_table.filter(read_filter, axis='observation')
    train_metadata = train_metadata.loc[train_table.ids(axis='sample')]
    sort_f = lambda xs: [xs[train_metadata.index.get_loc(x)] for x in xs]
    train_table = train_table.sort(sort_f=sort_f, axis='sample')
    train_metadata = dmatrix(opts.formula,
                             train_metadata,
                             return_type='dataframe')
    tree = opts.tree
    train_table, tree = match_tips(train_table, tree)
    basis, _ = sparse_balance_basis(tree)
    basis = basis.T

    # hold out data preprocessing
    test_table, test_metadata = opts.test_table, opts.test_metadata
    metadata_filter = lambda val, id_, md: id_ in test_metadata.index
    obs_lookup = set(train_table.ids(axis='observation'))
    feat_filter = lambda val, id_, md: id_ in obs_lookup
    test_table = test_table.filter(metadata_filter, axis='sample')
    test_table = test_table.filter(feat_filter, axis='observation')
    test_metadata = test_metadata.loc[test_table.ids(axis='sample')]
    sort_f = lambda xs: [xs[test_metadata.index.get_loc(x)] for x in xs]
    test_table = test_table.sort(sort_f=sort_f, axis='sample')
    test_metadata = dmatrix(opts.formula,
                            test_metadata,
                            return_type='dataframe')
    test_table, tree = match_tips(test_table, tree)

    p = train_metadata.shape[1]  # number of covariates
    G_data = train_metadata.values
    y_data = train_table.matrix_data.tocoo().T
    y_test = np.array(test_table.matrix_data.todense()).T
    N, D = y_data.shape
    save_path = opts.save_path
    learning_rate = opts.learning_rate
    batch_size = opts.batch_size
    gamma_mean, gamma_scale = opts.gamma_mean, opts.gamma_scale
    beta_mean, beta_scale = opts.beta_mean, opts.beta_scale
    num_neg = opts.num_neg_samples
    clipping_size = opts.clipping_size

    epoch = y_data.nnz // batch_size
    num_iter = int(opts.epochs_to_train * epoch)
    holdout_size = test_metadata.shape[0]
    checkpoint_interval = opts.checkpoint_interval

    # Model code
    with tf.Graph().as_default(), tf.Session() as session:
        with tf.device("/cpu:0"):
            # Place holder variables to accept input data
            Gpos_ph = tf.placeholder(tf.float32, [batch_size, p], name='G_pos')
            Gneg_ph = tf.placeholder(tf.float32, [num_neg, p], name='G_neg')
            G_holdout = tf.placeholder(tf.float32, [holdout_size, p],
                                       name='G_holdout')
            Y_holdout = tf.placeholder(tf.float32, [holdout_size, D],
                                       name='Y_holdout')
            Y_ph = tf.placeholder(tf.float32, [batch_size], name='Y_ph')

            pos_row = tf.placeholder(tf.int32,
                                     shape=[batch_size],
                                     name='pos_row')
            pos_col = tf.placeholder(tf.int32,
                                     shape=[batch_size],
                                     name='pos_col')
            neg_row = tf.placeholder(tf.int32, shape=[num_neg], name='neg_row')
            neg_col = tf.placeholder(tf.int32, shape=[num_neg], name='neg_col')
            neg_data = tf.zeros(shape=[num_neg],
                                name='neg_data',
                                dtype=tf.float32)
            total_zero = tf.constant(y_data.shape[0] * y_data.shape[1] -
                                     y_data.nnz,
                                     dtype=tf.float32)
            total_nonzero = tf.constant(y_data.nnz, dtype=tf.float32)

            # Define PointMass Variables first
            qgamma = tf.Variable(tf.random_normal([1, D - 1]), name='qgamma')
            qbeta = tf.Variable(tf.random_normal([p, D - 1]), name='qB')
            theta = tf.Variable(tf.random_normal([N, 1]), name='theta')

            # Distributions species bias
            gamma = Normal(loc=tf.zeros([1, D - 1]) + gamma_mean,
                           scale=tf.ones([1, D - 1]) * gamma_scale,
                           name='gamma')
            # regression coefficents distribution
            beta = Normal(loc=tf.zeros([p, D - 1]) + beta_mean,
                          scale=tf.ones([p, D - 1]) * beta_scale,
                          name='B')
            Bprime = tf.concat([qgamma, qbeta], axis=0)

            # Add bias terms for samples
            Gpos = tf.concat([tf.ones([batch_size, 1]), Gpos_ph], axis=1)
            Gneg = tf.concat([tf.ones([num_neg, 1]), Gneg_ph], axis=1)

            # Convert basis to SparseTensor
            psi = tf.SparseTensor(indices=np.mat([basis.row,
                                                  basis.col]).transpose(),
                                  values=basis.data,
                                  dense_shape=basis.shape)

            V = tf.transpose(
                tf.sparse_tensor_dense_matmul(psi, tf.transpose(Bprime)))

            # sparse matrix multiplication for positive samples
            pos_prime = tf.reduce_sum(tf.multiply(
                Gpos, tf.transpose(tf.gather(V, pos_col, axis=1))),
                                      axis=1)
            pos_phi = tf.reshape(tf.gather(theta, pos_row),
                                 shape=[batch_size]) + pos_prime
            Y = Poisson(log_rate=pos_phi, name='Y')

            # sparse matrix multiplication for negative samples
            neg_prime = tf.reduce_sum(tf.multiply(
                Gneg, tf.transpose(tf.gather(V, neg_col, axis=1))),
                                      axis=1)
            neg_phi = tf.reshape(tf.gather(theta, neg_row),
                                 shape=[num_neg]) + neg_prime
            neg_poisson = Poisson(log_rate=neg_phi, name='neg_counts')

            loss = -(
              tf.reduce_sum(gamma.log_prob(qgamma)) + \
              tf.reduce_sum(beta.log_prob(qbeta)) + \
              tf.reduce_sum(Y.log_prob(Y_ph)) * (total_nonzero / batch_size) + \
              tf.reduce_sum(neg_poisson.log_prob(neg_data)) * (total_zero / num_neg)
            )

            optimizer = tf.train.AdamOptimizer(learning_rate,
                                               beta1=0.9,
                                               beta2=0.9)
            gradients, variables = zip(*optimizer.compute_gradients(loss))
            gradients, _ = tf.clip_by_global_norm(gradients, clipping_size)
            train = optimizer.apply_gradients(zip(gradients, variables))

            with tf.name_scope('accuracy'):
                holdout_count = tf.reduce_sum(Y_holdout, axis=1)
                spred = tf.nn.softmax(
                    tf.transpose(
                        tf.sparse_tensor_dense_matmul(
                            psi,
                            tf.transpose(
                                (tf.matmul(G_holdout, qbeta) + qgamma)))))

                pred = tf.reshape(holdout_count, [-1, 1]) * spred
                mse = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_holdout)))
                tf.summary.scalar('mean_absolute_error', mse)

            tf.summary.scalar('loss', loss)
            tf.summary.histogram('qbeta', qbeta)
            tf.summary.histogram('qgamma', qgamma)
            tf.summary.histogram('theta', theta)
            merged = tf.summary.merge_all()

            tf.global_variables_initializer().run()

            writer = tf.summary.FileWriter(save_path, session.graph)
            losses = np.array([0.] * num_iter)
            idx = np.arange(train_metadata.shape[0])
            log_handle = open(os.path.join(save_path, 'run.log'), 'w')
            gen = get_batch(batch_size,
                            N,
                            D,
                            y_data.data,
                            y_data.row,
                            y_data.col,
                            num_neg=num_neg)
            start_time = time.time()
            last_checkpoint_time = 0
            start_time = time.time()
            saver = tf.train.Saver()
            for i in range(num_iter):
                batch_idx = np.random.choice(idx, size=batch_size)
                batch = next(gen)
                (positive_row, positive_col, positive_data, negative_row,
                 negative_col, negative_data) = batch
                feed_dict = {
                    Y_ph: positive_data,
                    Y_holdout: y_test.astype(np.float32),
                    G_holdout: test_metadata.values.astype(np.float32),
                    Gpos_ph: G_data[positive_row, :],
                    Gneg_ph: G_data[negative_row, :],
                    pos_row: positive_row,
                    pos_col: positive_col,
                    neg_row: negative_row,
                    neg_col: negative_col
                }
                if i % 1000 == 0:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    _, summary, train_loss, grads = session.run(
                        [train, merged, loss, gradients],
                        feed_dict=feed_dict,
                        options=run_options,
                        run_metadata=run_metadata)
                    writer.add_run_metadata(run_metadata, 'step%d' % i)
                    writer.add_summary(summary, i)
                elif i % 5000 == 0:
                    _, summary, err, train_loss, grads = session.run(
                        [train, mse, merged, loss, gradients],
                        feed_dict=feed_dict)
                    writer.add_summary(summary, i)
                else:
                    _, summary, train_loss, grads = session.run(
                        [train, merged, loss, gradients], feed_dict=feed_dict)
                    writer.add_summary(summary, i)

                now = time.time()
                if now - last_checkpoint_time > checkpoint_interval:
                    saver.save(session,
                               os.path.join(opts.save_path, "model.ckpt"),
                               global_step=i)
                    last_checkpoint_time = now

                losses[i] = train_loss

            elapsed_time = time.time() - start_time
            print('Elapsed Time: %f seconds' % elapsed_time)

            # Cross validation
            pred_beta = qbeta.eval()
            pred_gamma = qgamma.eval()
            mse, mrc = cross_validation(test_metadata.values,
                                        pred_beta @ basis.T,
                                        pred_gamma @ basis.T, y_test)
            print("MSE: %f, MRC: %f" % (mse, mrc))
def chat(to_file=False):
    # Takes user input and responds with the trained model

    # Init model
    model = models.ChatbotModel(forward_only=True, batch_size=1)
    model.build_graph()
    # Init checkpoint saver
    saver = tf.train.Saver()

    sess = tf.InteractiveSession()  # More flexible with ipynb format
    print("Running session")
    sess.run(tf.global_variables_initializer())
    _check_restore_parameters(sess, saver)

    if to_file:
        output_file = open(
            os.path.join(
                settings.GENERATED_PATH.format(str(round(time.time())))), 'a+')
        output_file.write("=" * 120)
        output_file.write("{}".format(time.strftime("%c")))

    max_length = settings.BUCKETS[-1][0]

    print("=" * 120)
    print("""
 _____                                     _   _             
/  __ \                                   | | (_)            
| /  \/ ___  _ ____   _____ _ __ ___  __ _| |_ _  ___  _ __  
| |    / _ \| '_ \ \ / / _ \ '__/ __|/ _` | __| |/ _ \| '_ \ 
| \__/\ (_) | | | \ V /  __/ |  \__ \ (_| | |_| | (_) | | | |
 \____/\___/|_| |_|\_/ \___|_|  |___/\__,_|\__|_|\___/|_| |_|
        
    """)
    print("=" * 120)
    print('Welcome to Conversation.')
    print("Type up to {} chars to start, ENTER to exit.".format(max_length))

    while True:
        line = _get_user_input()

        if len(line) <= 0 or line == "": break

        # Tokens for input sentence
        tokens = util.embed(model.words_to_encoder, line)
        if (len(tokens) > max_length):
            print(
                "System message: Maximum input length for this model is {}, please try again."
                .format(max_length))
            line = _get_user_input()
            continue

        bucket_id = _find_right_bucket_length(
            len(tokens))  # Which bucket for this input length?
        # Form the input sentence into a one element batch to feed the model
        encoder_inputs, decoder_inputs, decoder_masks = util.get_batch(
            [(tokens, [])], bucket_id, batch_size=1)
        # Get outputs of model
        _, _, logits = run_step(sess, model, encoder_inputs, decoder_inputs,
                                decoder_masks, bucket_id, True)

        print_decoder(logits, model.decoder_to_words)

        if to_file:
            output_file.write("Q | " + line)
            output_file.write("A >> " + response)

    if to_file:
        output_file.write("=" * 120)
        output_file.close()
def train(messages_only=False):
    # Trains the chatbot model defined above with the data processed above

    print("""

 _____         _       _             
|_   _|       (_)     (_)            
  | |_ __ __ _ _ _ __  _ _ __   __ _ 
  | | '__/ _` | | '_ \| | '_ \ / _` |
  | | | | (_| | | | | | | | | | (_| |
  \_/_|  \__,_|_|_| |_|_|_| |_|\__, |
                                __/ |
                               |___/ 

""")

    # Load data
    test_buckets, train_buckets, train_buckets_scale = _get_buckets(
        messages_only)
    # Init model
    model = models.ChatbotModel(forward_only=False,
                                batch_size=settings.BATCH_SIZE)
    model.build_graph()
    # Init checkpoint saver
    saver = tf.train.Saver(max_to_keep=100)

    sess = tf.InteractiveSession()  # More flexible with ipynb format
    print("\nRunning session")
    sess.run(tf.global_variables_initializer())
    _check_restore_parameters(sess, saver)

    iteration = model.global_step.eval()
    total_loss = 0

    print("\nStarting training at {}\n".format(time.strftime('%c')))

    for _ in range(settings.MAX_ITER):
        bucket_id = _get_random_bucket(train_buckets_scale)

        encoder_inputs, decoder_inputs, decoder_masks = util.get_batch(
            train_buckets[bucket_id],
            bucket_id,
            batch_size=settings.BATCH_SIZE)
        ######
        ###### Kept having errors with below line of type
        # InvalidArgumentError (see above for traceback): indices[61] = 42998 is not in [0, 42996)
        ###### SO added a try excpet wrapper so it didn't break but needs fixing
        ###### TO-DO: fix sizing error in embedding call un training loop call to run_step ~ line 217
        ###### It seems to come from decoding outputs back to embeddings that are too large -poss due tp
        ###### my new evaluation print out parts
        ######
        try:
            _, step_loss, _ = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, False)
            total_loss += step_loss
            iteration += 1
        except:
            print(
                "Error in training step []run_step()], continuing from next step"
            )

        if iteration % settings.PRINT_EVERY == 0:  # Print over period of iterations to reduce noise by averaging
            print("{} | Iteration {}; Loss {};".format(
                time.strftime('%c'), iteration,
                float(total_loss) / settings.PRINT_EVERY))
            total_loss = 0

        if iteration % settings.SAVE_EVERY == 0:
            saved_path = saver.save(sess,
                                    os.path.join(
                                        settings.CKPT_PATH,
                                        'chatbot-ckpt-{}'.format(
                                            str(round(time.time())))),
                                    global_step=model.global_step)
            print("\nModel saved to {}".format(saved_path))

        if iteration % settings.EVAL_EVERY == 0:
            # run evaluation on development set and print their loss
            _eval_test_set(sess, model, test_buckets)

        sys.stdout.flush()
예제 #16
0
                    print "Topic", ti, "=", attention[si][ti]
                docid += 1

    np.save(open(output_file, "w"), dt_dist)


def run_epoch(sents, docs, tags, (tm, lm), pad_id, cf, idxvocab):
    #generate the batches
    tm_num_batches, lm_num_batches = int(math.ceil(float(len(sents[0]))/cf.batch_size)), \
        int(math.ceil(float(len(sents[1]))/cf.batch_size))

    #run an epoch to compute tm and lm perplexities
    if tm != None:
        tm_costs, tm_words = 0.0, 0.0
        for bi in xrange(tm_num_batches):
            _, y, m, d, t = get_batch(sents[0], docs[0], tags, bi, cf.doc_len, cf.tm_sent_len, cf.tag_len, cf.batch_size, \
                pad_id, False)
            tm_cost = sess.run(tm.tm_cost, {
                tm.y: y,
                tm.tm_mask: m,
                tm.doc: d,
                tm.tag: t
            })
            tm_costs += tm_cost * cf.batch_size
            tm_words += np.sum(m)
        print "\ntest topic model perplexity = %.3f" % (np.exp(
            tm_costs / tm_words))

    if lm != None:
        lm_costs, lm_words = 0.0, 0.0
        for bi in xrange(lm_num_batches):
            x, y, m, d, t = get_batch(sents[1], docs[1], tags, bi, cf.doc_len, cf.lm_sent_len, cf.tag_len, cf.batch_size, \
예제 #17
0
def train(model,
          dataset,
          optimizer,
          dest_dir,
          batch_size=128,
          max_epoch=None,
          gpu=None,
          save_every=5,
          test_every=5,
          alpha_init=1.,
          alpha_delta=0.,
          l2_weight_gen=0.,
          l2_weight_con=0.):
    """Common training procedure.
    :param model: model to train
    :param dataset: training & validation data
    :param optimizer: chainer optimizer
    :param dest_dir: destination directory
    :param batch_size: number of sample in minibatch
    :param max_epoch: maximum number of epochs to train (None to train indefinitely)
    :param gpu: ID of GPU (None to use CPU)
    :param save_every: save every this number of epochs (first epoch and last epoch are always saved)
    :param alpha_init: initial value of alpha
    :param alpha_delta: change of alpha at every batch
    """
    if gpu is not None:
        # set up GPU
        cuda.get_device(gpu).use()
        model.to_gpu(gpu)

    logger = logging.getLogger()

    # set up optimizer
    opt_enc = util.list2optimizer(optimizer)
    opt_enc.setup(model.encoder)
    if hasattr(model, 'controller'):
        opt_con = util.list2optimizer(optimizer)
        opt_con.setup(model.controller)
    opt_gen = util.list2optimizer(optimizer)
    opt_gen.setup(model.generator)

    # training loop
    epoch = 0
    alpha = alpha_init
    test_losses = []
    train_losses = []
    train_data = dataset["train_data"]
    test_data = dataset["valid_data"]
    split = 'test'

    while True:
        if max_epoch is not None and epoch >= max_epoch:
            # terminate training
            break

        # Every ten epochs, try validation set
        if split == 'train':
            x_data, _ = util.get_batch(train_data, batch_size=batch_size)
        else:
            x_data, _ = util.get_batch(test_data, batch_size=batch_size)

        # create batches
        x_data = x_data.astype(np.float32)

        # copy data to GPU
        if gpu is not None:
            x_data = cuda.to_gpu(x_data)

        # create variable
        xs = []
        [xs.append(Variable(x.astype(np.float32))) for x in x_data]

        # set new alpha
        alpha += alpha_delta
        alpha = min(alpha, 1.)
        alpha = max(alpha, 0.)

        time_start = time.time()

        # encoder
        _, h_bxtxd = model.encoder(xs)
        h_bxtxd = F.stack(h_bxtxd, 0)
        d_dims = h_bxtxd.data.shape[2]

        # generator
        g0_bxd, kl_g0 = model.generator.sample_g0(
            F.concat(
                [h_bxtxd[:, 0, -d_dims / 2:], h_bxtxd[:, -1, :d_dims / 2]],
                axis=1))
        f0_bxd = model.generator.l_f(g0_bxd)

        # main
        x_hat = []
        rec_loss_total = 0
        if hasattr(model, 'controller'):
            kl_u_total = 0

        for i in range(0, h_bxtxd[0].data.shape[0]):
            if i == 0:
                if hasattr(model, 'controller'):
                    con_i = model.controller(
                        F.concat((f0_bxd, h_bxtxd[:, i, :d_dims / 2],
                                  h_bxtxd[:, i, d_dims / 2:]),
                                 axis=1))
                    u_i_bxd, kl_u = model.generator.sample_u_1(con_i)
                    kl_u_total += kl_u
                    g_i_bxd = model.generator(u_i_bxd, hx=g0_bxd)
                else:
                    g_i_bxd = model.generator(F.concat(
                        (h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i,
                                                             d_dims / 2:]),
                        axis=1),
                                              hx=g0_bxd)
            else:
                if hasattr(model, 'controller'):
                    con_i = model.controller(F.concat([
                        f_i, h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i,
                                                                 d_dims / 2:]
                    ],
                                                      axis=1),
                                             hx=con_i)
                    u_i_bxd, kl_u = model.generator.sample_u_i(con_i, u_i_bxd)
                    kl_u_total += kl_u
                    g_i_bxd = model.generator(u_i_bxd, hx=g_i_bxd)
                else:
                    g_i_bxd = model.generator(F.concat([
                        h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i, d_dims / 2:]
                    ],
                                                       axis=1),
                                              hx=g_i_bxd)

            f_i = model.generator.l_f(g_i_bxd)
            x_hat_i, rec_loss_i = model.generator.sample_x_hat(
                f_i, xs=Variable(x_data[:, i, :]), nrep=1)
            x_hat.append(x_hat_i)
            rec_loss_total += rec_loss_i

        # calculate loss
        if hasattr(model, 'controller'):
            loss = rec_loss_total + alpha * (kl_g0 + kl_u_total)
        else:
            loss = rec_loss_total + alpha * kl_g0

        l2_loss = 0
        if l2_weight_gen > 0:
            l2_W_gen = F.sum(F.square(model.generator.gru.W.W))
            l2_W_r_gen = F.sum(F.square(model.generator.gru.W_r.W))
            l2_W_z_gen = F.sum(F.square(model.generator.gru.W_z.W))
            l2_gen = l2_weight_gen * (l2_W_gen + l2_W_r_gen + l2_W_z_gen)
            l2_loss += l2_gen
        if hasattr(model, 'controller') and l2_weight_con > 0:
            l2_W_con = F.sum(F.square(model.controller.gru.W.W))
            l2_W_r_con = F.sum(F.square(model.controller.gru.W_r.W))
            l2_W_z_con = F.sum(F.square(model.controller.gru.W_z.W))
            l2_con = l2_weight_con * (l2_W_con + l2_W_r_con + l2_W_z_con)
            l2_loss += l2_con
        loss += l2_loss

        # update
        if split == 'train':
            model.cleargrads()
            model.encoder.cleargrads()
            if hasattr(model, 'controller'):
                model.controller.cleargrads()
            model.generator.cleargrads()
            loss.backward()
            opt_enc.update()
            if hasattr(model, 'controller'):
                opt_con.update()
            opt_gen.update()

        # report training status

        time_end = time.time()
        time_delta = time_end - time_start

        # report training status
        status = OrderedDict()
        status['epoch'] = epoch
        status['time'] = int(time_delta * 1000)  # time in msec
        status['alpha'] = alpha

        status[split + '_loss'] = '{:.4}'.format(float(
            loss.data))  # total training loss
        status[split + '_rec_loss'] = '{:.4}'.format(float(
            rec_loss_total.data))  # reconstruction loss
        status[split + '_kl_g0'] = '{:.4}'.format(float(
            kl_g0.data))  # KL-divergence loss for g0
        if hasattr(model, 'controller'):
            status[split + '_kl_u_total'] = '{:.4}'.format(
                float(kl_u_total.data))  # KL-divergence loss for us
            if l2_weight_con > 0:
                status[split + '_l2_loss_con'] = '{:.4}'.format(
                    float(l2_con.data))  # L2 loss for controller
        if l2_weight_gen > 0:
            status[split + '_l2_loss_gen'] = '{:.4}'.format(
                float(l2_gen.data))  # L2 loss for generator
        logger.info(_status_str(status))

        # # save model
        if ((epoch % save_every) == 0 or
            (max_epoch is not None
             and epoch == max_epoch - 1)) and split == 'train':
            model.save(dest_dir, epoch)

        if split == 'train' and epoch % test_every == 0:
            split = 'test'
        else:
            split = 'train'
            epoch += 1
예제 #18
0
파일: TB2.py 프로젝트: Willardtm/LeNet5
#Y_test = Y_test[:1000]

batch_size = 16
D_out = 10

model = nn.LeNet5()
losses = []
optim = optimizer.SGD(model.get_params(), lr=0.00003)
#optim = optimizer.SGDMomentum(model.get_params(), lr=0.00003, momentum=0.80, reg=0.0003)
criterion = loss.SoftmaxLoss()

# Train
ITER = 30000
for i in range(ITER):
    # get batch, make onehot
    X_batch, Y_batch = util.get_batch(X_train, Y_train, batch_size)
    Y_batch = util.MakeOneHot(Y_batch, D_out)

    # forward, loss, backward, step
    Y_pred = model.forward(X_batch)
    loss, dout = criterion.get(Y_pred, Y_batch)
    model.backward(dout)
    optim.step()

    print("%s%% iter: %s, loss: %s" % (100 * i / ITER, i, loss))
    losses.append(loss)
    """
    if i % 100 == 0:
        print("%s%% iter: %s, loss: %s" % (100*i/ITER,i, loss))
        losses.append(loss)
    """
예제 #19
0
# Merge all summaries into a single operator
merged_summary_op = tf.summary.merge_all()

sess = tf.Session()
sess.run(init)

# Set the logs writer to the folder /tmp/tensorflow_logs
summary_writer = tf.summary.FileWriter(BASE_DIR + '/logs',
                                       graph_def=sess.graph_def)

batch_size = 100
number_of_batches = number_of_samples / batch_size
for epoch in range(20):
    for i in range(number_of_batches):
        batch_x, batch_y = util.get_batch(train_set, batch_size, i)
        batch_x_blury, _ = util.get_batch(blurry_set, batch_size, i)
        batch_x_cropped, _ = util.get_batch(cropped_set, batch_size, i)
        train_data = {
            x: batch_x,
            label: batch_y,
            x_blury: batch_x_blury,
            x_cropped: batch_x_cropped
        }
        sess.run(train_step,
                 feed_dict={
                     x: batch_x,
                     label: batch_y,
                     x_blury: batch_x_blury,
                     x_cropped: batch_x_cropped,
                     pkeep: 0.75
예제 #20
0
def train():
    batch_size = 10
    print "Starting ABC-CNN training"
    vqa = dl.load_questions_answers('data')

    # Create subset of data for over-fitting
    sub_vqa = {}
    sub_vqa['training'] = vqa['training'][:10]
    sub_vqa['validation'] = vqa['validation'][:10]
    sub_vqa['answer_vocab'] = vqa['answer_vocab']
    sub_vqa['question_vocab'] = vqa['question_vocab']
    sub_vqa['max_question_length'] = vqa['max_question_length']

    train_size = len(vqa['training'])
    max_itr = (train_size // batch_size) * 10

    with tf.Session() as sess:
        image, ques, ans, optimizer, loss, accuracy = abc.model(
            sess, batch_size)
        print "Defined ABC model"

        train_loader = util.get_batch(sess, vqa, batch_size, 'training')
        print "Created train dataset generator"

        valid_loader = util.get_batch(sess, vqa, batch_size, 'validation')
        print "Created validation dataset generator"

        writer = abc.write_tensorboard(sess)
        init = tf.global_variables_initializer()
        merged = tf.summary.merge_all()
        sess.run(init)
        print "Initialized Tensor variables"

        itr = 1

        while itr < max_itr:
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()

            _, vgg_batch, ques_batch, answer_batch = train_loader.next()
            _, valid_vgg_batch, valid_ques_batch, valid_answer_batch = valid_loader.next(
            )
            sess.run(optimizer,
                     feed_dict={
                         image: vgg_batch,
                         ques: ques_batch,
                         ans: answer_batch
                     })
            [train_summary, train_loss,
             train_accuracy] = sess.run([merged, loss, accuracy],
                                        feed_dict={
                                            image: vgg_batch,
                                            ques: ques_batch,
                                            ans: answer_batch
                                        },
                                        options=run_options,
                                        run_metadata=run_metadata)
            [valid_loss, valid_accuracy] = sess.run(
                [loss, accuracy],
                feed_dict={
                    image: valid_vgg_batch,
                    ques: valid_ques_batch,
                    ans: valid_answer_batch
                })

            writer.add_run_metadata(run_metadata, 'step%03d' % itr)
            writer.add_summary(train_summary, itr)
            writer.flush()
            print "Iteration:%d\tTraining Loss:%f\tTraining Accuracy:%f\tValidation Loss:%f\tValidation Accuracy:%f" % (
                itr, train_loss, 100. * train_accuracy, valid_loss,
                100. * valid_accuracy)
            itr += 1
예제 #21
0
def log(config,
        data,
        pretrain_data,
        word2idx_dict,
        model,
        sess,
        writer=None,
        label="train",
        entropy=None,
        bound=None):
    global_step = sess.run(model.global_step) + 1
    golds, preds, vals, sim_preds, sim_vals = [], [], [], [], []
    simss = []
    for batch, _ in zip(
            get_batch(config, data, word2idx_dict, shuffle=False),
            get_pretrain_batch(config,
                               pretrain_data,
                               word2idx_dict,
                               pretrain=False)):
        gold, pred, val, sim_pred, sim_val = sess.run([
            model.gold, model.pred, model.max_val, model.sim_pred,
            model.sim_max_val
        ],
                                                      feed_dict=get_feeddict(
                                                          model,
                                                          batch,
                                                          _,
                                                          is_train=False))
        prt_sim = sess.run(model.sim,
                           feed_dict=get_feeddict(model,
                                                  batch,
                                                  _,
                                                  is_train=False))

        batch_sents = batch['raw_sent']

        golds += gold.tolist()
        preds += pred.tolist()
        vals += val.tolist()
        sim_preds += sim_pred.tolist()
        sim_vals += sim_val.tolist()

    threshold = [0.01 * i for i in range(1, 200)]
    threshold2 = [0.05 * i for i in range(1, 20)]
    acc, recall, f1, jac = 0., 0., 0., 0.
    acc2, recall2, f12, jac2 = 0., 0., 0., 0.
    best_entro = 0.
    best_bound = 0.

    if entropy is None:
        for t in threshold:
            _preds = (np.asarray(vals, dtype=np.float32) <= t).astype(
                np.int32) * np.asarray(preds, dtype=np.int32)
            _preds = _preds.tolist()
            _acc, _recall, _f1, _jac = evaluate(golds, _preds)
            if _f1 > f1:
                acc, recall, f1, jac = _acc, _recall, _f1, _jac
                best_entro = t
    else:
        preds = (np.asarray(vals, dtype=np.float32) <= entropy).astype(
            np.int32) * np.asarray(preds, dtype=np.int32)
        preds = preds.tolist()
        acc, recall, f1, jac = evaluate(golds, preds)

    if bound is None:
        for t in threshold2:
            _sim_preds = (np.asarray(sim_vals, dtype=np.float32) >= t).astype(
                np.int32) * np.asarray(sim_preds, dtype=np.int32)
            _sim_preds = _sim_preds.tolist()
            _acc2, _recall2, _f12, _jac2 = evaluate(golds, _sim_preds)
            if _f12 > f12:
                acc2, recall2, f12, jac2 = _acc2, _recall2, _f12, _jac2
                best_bound = t
    else:
        sim_preds = (np.asarray(sim_vals, dtype=np.float32) >= bound).astype(
            np.int32) * np.asarray(sim_preds, dtype=np.int32)
        sim_preds = sim_preds.tolist()
        acc2, recall2, f12, jac2 = evaluate(golds, sim_preds)

    acc_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/acc".format(label), simple_value=acc),
    ])
    rec_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/rec".format(label), simple_value=recall),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(label), simple_value=f1),
    ])
    jac_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/jac".format(label), simple_value=jac),
    ])

    acc_sum2 = tf.Summary(value=[
        tf.Summary.Value(tag="{}/sim_acc".format(label), simple_value=acc2),
    ])
    rec_sum2 = tf.Summary(value=[
        tf.Summary.Value(tag="{}/sim_rec".format(label), simple_value=recall2),
    ])
    f1_sum2 = tf.Summary(value=[
        tf.Summary.Value(tag="{}/sim_f1".format(label), simple_value=f12),
    ])
    jac_sum2 = tf.Summary(value=[
        tf.Summary.Value(tag="{}/sim_jac".format(label), simple_value=jac2),
    ])

    entropy_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/entro".format(label),
                         simple_value=sum(vals) / len(vals)),
    ])
    if writer is not None:
        writer.add_summary(acc_sum, global_step)
        writer.add_summary(rec_sum, global_step)
        writer.add_summary(f1_sum, global_step)
        writer.add_summary(jac_sum, global_step)
        writer.add_summary(acc_sum2, global_step)
        writer.add_summary(rec_sum2, global_step)
        writer.add_summary(f1_sum2, global_step)
        writer.add_summary(jac_sum2, global_step)
        writer.add_summary(entropy_sum, global_step)
    res = [golds, preds]
    return (acc, recall, f1), (acc2, recall2, f12), (best_entro,
                                                     best_bound), res
예제 #22
0
    start = global_step.eval()
    step = 0
    r = ReadData.Actionreader()
    ckpt_dir = "/home/cxr/BvhLstm1-2"
    filename = "/home/cxr/7-2"
    if  Need_to_restore:
        if restore(ckpt_dir+"/"):
            print "restore_seccessfully"
            if not Use_to_train:
                r.reset()
                v,timelist=utl.readData(filename)
                length = len(v)
                i=0
                step = 0
                batch_xs, batch_ys = utl.get_batch(v, i, length, classnumber=classnum, batchsize=batch_size,
                                                           n_sequence=n_sequence)
                print len(batch_xs)
                while batch_xs and step<=2000:
                    pre = sess.run([predic], feed_dict={
                        x: batch_xs,
                    })
                    r.out_data(pre[0],ckpt_dir)

                    #batch_xs = batch_xs[0]
                    #batch_xs = batch_xs[1:]
                    #batch_xs.append(utl.transform(pre,classnum))
                    #batch_xs = [batch_xs]
                    batch_xs, batch_ys = utl.get_batch(v, i, length, classnumber=classnum, batchsize=batch_size,
                                                           n_sequence=n_sequence)

                    #print batch_xs
예제 #23
0
def train():
    t1 = time.time()
    tf.reset_default_graph()
    with tf.variable_scope(name_or_scope='train', reuse=tf.AUTO_REUSE):
        cls, (x, y), (w0, w1, w2, w3, w4) = gm.result()
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=cls,
                                                              name='loss')
        loss_mean = tf.reduce_mean(loss, name='loss_mean')
        global_step = tf.Variable(0, name='global_step')
        learning_rate = tf.train.exponential_decay(constant.LEARNING_RATE,
                                                   global_step,
                                                   1000,
                                                   0.96,
                                                   staircase=True,
                                                   name='learning_rate')
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           name='optimizer')
        train_op = optimizer.minimize(loss_mean,
                                      global_step=global_step,
                                      name='train_op')
    data_train = util.load_data(constant.DATA_TRAIN)
    data_test = util.load_data(constant.DATA_TEST)
    graph = tf.get_default_graph()
    #     var_list = [i for i in tf.global_variables() if i.name.split('/')[1] == 'result']
    #     saver = tf.train.Saver(var_list=var_list, max_to_keep=5)
    #     [print(i) for i in tf.global_variables()]
    #     [print(i.name) for i in graph.get_operations()]
    saver = tf.train.Saver(max_to_keep=5)
    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        idx_train = np.linspace(0,
                                constant.TRAIN_TOTAL_SIZE - 1,
                                constant.TRAIN_TOTAL_SIZE,
                                dtype=np.int32)
        step = 0
        accuracies_train = []
        accuracies_test = []
        losses = []
        ws = (w0, w1, w2, w3, w4)
        wa = WeightAdjust()
        wa.init(len(ws))
        for i in range(constant.EPOCH):
            np.random.shuffle(idx_train)
            for j in range(constant.TRAIN_TIMES_FOR_EPOCH):
                idx_j = np.linspace(j * constant.BATCH_SIZE,
                                    (j + 1) * constant.BATCH_SIZE - 1,
                                    constant.BATCH_SIZE,
                                    dtype=np.int32)
                idx_train_batch = idx_train[idx_j]
                _, labels_train, _, images_train = util.get_batch(
                    data_train, idx_train_batch)
                feed_dict_train = {x: images_train, y: labels_train}
                cls_train, _loss, _ = sess.run([cls, loss_mean, train_op],
                                               feed_dict=feed_dict_train)
                arg_idx_train = np.argmax(cls_train, axis=1)
                accuracy_train = sum(
                    labels_train == arg_idx_train) / constant.BATCH_SIZE
                # test
                idx_test_batch = np.random.randint(0, constant.TEST_TOTAL_SIZE,
                                                   [constant.BATCH_SIZE])
                _, labels_test, _, images_test = util.get_batch(
                    data_test, idx_test_batch)
                feed_dict_test = {x: images_test, y: labels_test}
                cls_test = sess.run(cls, feed_dict=feed_dict_test)
                arg_idx_test = np.argmax(cls_test, axis=1)
                accuracy_test = sum(
                    labels_test == arg_idx_test) / constant.BATCH_SIZE

                step += 1
                if step % constant.PRINT_EVERY_TIMES == 0:
                    print(
                        'time:{},epoch:{},loss:{},accuracy_train:{:.2%},accuracy_test:{:.2%}'
                        .format(util.cur_time(), step, _loss, accuracy_train,
                                accuracy_test))
                    accuracies_train.append(accuracy_train)
                    accuracies_test.append(accuracy_test)
                    losses.append(_loss)

            times = int(constant.TRAIN_TIMES_FOR_EPOCH /
                        constant.PRINT_EVERY_TIMES)
            train_mean = util.mean(accuracies_train[-times:])
            test_mean = util.mean(accuracies_test[-times:])
            print('save model,step: {},train_mean:{},test_mean:{}'.format(
                step, train_mean, test_mean))
            saver.save(sess,
                       save_path='./model/resnet/cifar-resnet.ckpt',
                       global_step=step)
            wa.adjust(train_mean, test_mean, step)
            print(wa.action)
            if wa.action == 'adjust':
                print('本次迭代权重经过调整:{}'.format(wa.weights))
                assigns = gm.assign_weight(wa, ws)
                sess.run(assigns)
            elif wa.action == 'stop':
                break
            else:
                pass
        accuracy_map = {
            'accuracies_train': accuracies_train,
            'accuracies_test': accuracies_test,
            'losses': losses,
            'weights': wa
        }
        util.dump_data(accuracy_map, './accuracy_map.pkl')

    t2 = time.time()
    print('耗时:{}'.format(util.str_time(t2 - t1)))
예제 #24
0
파일: sl.py 프로젝트: angelotran05/NExT
def pseudo_labeling(config, data):
    word2idx_dict, fixed_emb, traiable_emb, train_data, dev_data, test_data, pretrain_data, pretrain_data2 = data

    pretrain_test_data = (pretrain_data[0][:config.pretrain_test_size],
                          pretrain_data[1][:config.pretrain_test_size],
                          pretrain_data[2][:config.pretrain_test_size, :])
    pretrain_data = (
        pretrain_data[0][config.pretrain_test_size:config.pretrain_test_size +
                         config.pretrain_train_size],
        pretrain_data[1][config.pretrain_test_size:config.pretrain_test_size +
                         config.pretrain_train_size],
        pretrain_data[2][config.pretrain_test_size:config.pretrain_test_size +
                         config.pretrain_train_size, :])

    lfs = get_lfs(config, word2idx_dict)
    identifier = "_{}".format(config.tag)

    with tf.variable_scope("models", reuse=tf.AUTO_REUSE):
        regex = Pat_Match(config)
        match = Soft_Match(config,
                           lfs['lfs'],
                           np.array(lfs['rels'], np.float32),
                           lfs['keywords'],
                           lfs['keywords_rels'],
                           lfs['raw_keywords'],
                           mat=((
                               fixed_emb,
                               traiable_emb,
                           )),
                           word2idx_dict=word2idx_dict,
                           pseudo=True)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    if os.path.exists('labeled_data.pkl'):
        with open('labeled_data.pkl', 'rb') as f:
            labeled_data = pickle.load(f)
        with open('unlabeled_data.pkl', 'rb') as f:
            unlabeled_data = pickle.load(f)
        with open('weights.pkl', 'rb') as f:
            lfs["weights"] = pickle.load(f)
    else:
        with open('exp2pat.json', 'r') as f:
            exp2pat = json.load(f)
        exp2pat = {int(key): val for key, val in exp2pat.items()}
        lab_d = []
        unlab_d = []

        tacred_labeled = []
        tacred_unlabeled = []
        labeled_data = []
        unlabeled_data = []
        idxx = -1

        idx2rel = {val: key for key, val in constant.LABEL_TO_ID.items()}

        for x in tqdm(train_data):
            idxx += 1
            batch = [x["phrase"]]
            res, pred = regex.match(batch)
            lfs["weights"] += res[0]
            new_dict = {}
            if np.amax(res) > 0:

                x["rel"] = pred.tolist()[0]
                x["logic_form"] = np.argmax(res, axis=1).tolist()[0]
                new_dict['tokens'] = x['phrase'].token
                new_dict['start'] = min(x['phrase'].subj_posi,
                                        x['phrase'].obj_posi) + 1
                new_dict['end'] = max(x['phrase'].subj_posi,
                                      x['phrase'].obj_posi) - 1
                new_dict['rel'] = pred.tolist()[0]
                try:
                    new_dict['pat'] = exp2pat[np.argmax(res,
                                                        axis=1).tolist()[0]]
                    lab_d.append(new_dict)
                except:
                    new_dict['pat'] = -1
                    unlab_d.append(new_dict)
                tacred_labeled.append((idxx, idx2rel[x['rel']]))
                labeled_data.append(x)
            else:
                tacred_unlabeled.append(idxx)
                new_dict['tokens'] = x['phrase'].token
                new_dict['start'] = min(x['phrase'].subj_posi,
                                        x['phrase'].obj_posi) + 1
                new_dict['end'] = max(x['phrase'].subj_posi,
                                      x['phrase'].obj_posi) - 1
                new_dict['rel'] = pred.tolist()[0]
                new_dict['pat'] = -1
                x["rel"] = 0
                unlab_d.append(new_dict)
                unlabeled_data.append(x)

        new_weight = np.array([
            elem for i, elem in enumerate(list(lfs['weights'])) if i in exp2pat
        ], np.float32)
        new_weight = new_weight / np.sum(new_weight)
        lfs["weights"] = lfs["weights"] / np.sum(lfs["weights"])

        with open('tacred_labeled.json', 'w') as f:
            json.dump(tacred_labeled, f)

        with open('tacred_unlabeled.json', 'w') as f:
            json.dump(tacred_unlabeled, f)

        with open('labeled_data.pkl', 'wb') as f:
            pickle.dump(labeled_data, f)
        with open('unlabeled_data.pkl', 'wb') as f:
            pickle.dump(unlabeled_data, f)
        with open('weights.pkl', 'wb') as f:
            pickle.dump(lfs["weights"], f)

        with open('lab_d.pkl', 'wb') as f:
            pickle.dump(lab_d, f)
        with open('unlab_d.pkl', 'wb') as f:
            pickle.dump(unlab_d, f)
        with open('weights_d.pkl', 'wb') as f:
            pickle.dump(new_weight, f)

    random.shuffle(unlabeled_data)

    print('unlabdel data:', str(len(unlabeled_data)), 'labeled data:',
          str(len(labeled_data)))

    dev_history, test_history = [], []
    dev_history2, test_history2 = [], []

    with tf.Session(config=sess_config) as sess:

        lr = float(config.init_lr)
        writer = tf.summary.FileWriter(config.log_dir + identifier)
        sess.run(tf.global_variables_initializer())

        print('---Pretrain-----')
        for epoch in range(config.pretrain_epoch):
            loss_list, pretrain_loss_lis, sim_loss_lis = [], [], []
            for batch in get_pretrain_batch(config, pretrain_data,
                                            word2idx_dict):
                pretrain_loss_prt, sim_loss_prt, loss, _ = sess.run(
                    [
                        match.pretrain_loss, match.sim_loss,
                        match.pretrain_loss_v2, match.pre_train_op
                    ],
                    feed_dict={
                        match.pretrain_sents: batch['sents'],
                        match.pretrain_pats: batch['pats'],
                        match.pretrain_labels: batch['labels'],
                        match.is_train: True
                    })
                loss_list.append(loss)
                pretrain_loss_lis.append(pretrain_loss_prt)
                sim_loss_lis.append(sim_loss_prt)
            print("{} epoch:".format(str(epoch)))
            print("loss:{} pretrain_loss:{} sim_loss:{}".format(
                str(np.mean(loss_list)), str(np.mean(pretrain_loss_lis)),
                str(np.mean(sim_loss_lis))))
            pred_labels = []
            goldens = []
            prt_id = 0
            for batch in get_pretrain_batch(config,
                                            pretrain_data2,
                                            word2idx_dict,
                                            shuffle=False):
                prt_id += 1
                pp, ppp, pred_label = sess.run(
                    [
                        match.prt_loss, match.prt_pred,
                        match.pretrain_pred_labels
                    ],
                    feed_dict={
                        match.pretrain_sents: batch['sents'],
                        match.pretrain_pats: batch['pats'],
                        match.is_train: False,
                        match.pretrain_labels: batch['labels']
                    })
                pred_label = list(pred_label)
                golden = list(np.reshape(batch['labels'], [-1]))
                assert len(golden) == len(pred_label)
                pred_labels.extend(pred_label)
                goldens.extend(golden)
            p, r, f = f_score(pred_labels, goldens)
            print('PRF:', (p, r, f))
            if p > 0.9 and r > 0.9:
                break
            print('\n')
        print('----Training----')
        for epoch in range(1, config.num_epoch + 1):
            pretrain_loss_lis,sim_loss_lis, labeled_loss_lis, unlabeled_loss_lis, hard_train_loss_lis, loss_lis = [],[],[],[],[],[]
            for batch1, batch2, batch3 in zip(
                    get_batch(config, labeled_data, word2idx_dict),
                    get_batch(config,
                              unlabeled_data,
                              word2idx_dict,
                              pseudo=True),
                    get_pretrain_batch(config,
                                       pretrain_data,
                                       word2idx_dict,
                                       pretrain=False)):
                batch = merge_batch(batch1, batch2)
                global_step = sess.run(match.global_step) + 1
                pretrain_loss, sim_loss, labeled_loss, unlabeled_loss, hard_train_loss, loss, _ = sess.run(
                    [
                        match.pretrain_loss, match.sim_loss,
                        match.labeled_loss, match.unlabeled_loss,
                        match.hard_train_loss, match.loss, match.train_op
                    ],
                    feed_dict=get_feeddict(match, batch, batch3))

                pretrain_loss_lis.append(pretrain_loss)
                sim_loss_lis.append(sim_loss)
                labeled_loss_lis.append(labeled_loss)
                unlabeled_loss_lis.append(unlabeled_loss)
                hard_train_loss_lis.append(hard_train_loss)
                loss_lis.append(loss)

                if global_step % config.period == 0:
                    loss_sum = tf.Summary(value=[
                        tf.Summary.Value(tag="model/loss", simple_value=loss),
                    ])
                    writer.add_summary(loss_sum, global_step)
                    writer.flush()

            (dev_acc, dev_rec,
             dev_f1), (dev_acc2, dev_rec2,
                       dev_f12), (best_entro, best_bound), _ = log(
                           config, dev_data, pretrain_data, word2idx_dict,
                           match, sess, writer, "dev")
            (test_acc, test_rec,
             test_f1), (test_acc2, test_rec2,
                        test_f12), _, _ = log(config,
                                              test_data,
                                              pretrain_data,
                                              word2idx_dict,
                                              match,
                                              sess,
                                              writer,
                                              "test",
                                              entropy=best_entro,
                                              bound=best_bound)
            writer.flush()

            print('\n')
            print("{} epoch:".format(str(epoch)))
            print(
                "pretrain_loss:{} sim_loss:{} labeled_loss:{} unlabeled_loss:{} hard_train_loss:{} loss:{} best_bound:{}:"
                .format(str(np.mean(pretrain_loss_lis)),
                        str(np.mean(sim_loss_lis)),
                        str(np.mean(labeled_loss_lis)),
                        str(np.mean(unlabeled_loss_lis)),
                        str(np.mean(hard_train_loss_lis)),
                        str(np.mean(loss_lis)), str(best_bound)))
            print(
                "dev_acc:{} dev_rec:{} dev_f1:{} dev_acc_2:{} dev_rec_2:{} dev_f1_2:{}\ntest_acc:{} test_rec:{} test_f1:{} test_acc_2:{} test_rec_2:{} test_f1_2:{}"
                .format(str(dev_acc), str(dev_rec), str(dev_f1), str(dev_acc2),
                        str(dev_rec2), str(dev_f12), str(test_acc),
                        str(test_rec), str(test_f1), str(test_acc2),
                        str(test_rec2), str(test_f12)))

            dev_history.append((dev_acc, dev_rec, dev_f1))
            test_history.append((test_acc, test_rec, test_f1))
            dev_history2.append((dev_acc2, dev_rec2, dev_f12))
            test_history2.append((test_acc2, test_rec2, test_f12))
            if len(dev_history) >= 1 and dev_f1 <= dev_history[-1][2]:
                lr *= config.lr_decay
                sess.run(tf.assign(match.lr, lr))

        max_idx = dev_history.index(max(dev_history, key=lambda x: x[2]))
        max_idx2 = dev_history2.index(max(dev_history2, key=lambda x: x[2]))
        max_acc, max_rec, max_f1 = test_history[max_idx]
        max_acc2, max_rec2, max_f12 = test_history2[max_idx2]
        print("acc: {}, rec: {}, f1: {}, acc2 {}, rec2 {}, f12 {}".format(
            max_acc, max_rec, max_f1, max_acc2, max_rec2, max_f12))
        sys.stdout.flush()
    return max_acc, max_rec, max_f1, max_acc2, max_rec2, max_f12
예제 #25
0
    def run(self,
            data_x,
            data_x_,
            hidden_dim,
            activation,
            loss,
            lr,
            print_step,
            epoch,
            batch_size=100):
        tf.reset_default_graph()

        input_dim = len(data_x[0])

        with tf.Session() as sess:
            x = tf.placeholder(dtype=tf.float32,
                               shape=[None, input_dim],
                               name='x')
            x_ = tf.placeholder(dtype=tf.float32,
                                shape=[None, input_dim],
                                name='x_')

            encode = {
                'weights':
                tf.Variable(tf.truncated_normal([input_dim, hidden_dim],
                                                dtype=tf.float32),
                            name='weight'),
                'biases':
                tf.Variable(tf.truncated_normal([hidden_dim],
                                                dtype=tf.float32),
                            name='bias')
            }
            decode = {
                'biases':
                tf.Variable(tf.truncated_normal([input_dim],
                                                dtype=tf.float32)),
                'weights':
                tf.transpose(encode['weights'])
            }

            encoded = self.activate(
                tf.matmul(x, encode['weights']) + encode['biases'], activation)
            decoded = tf.matmul(encoded, decode['weights']) + decode['biases']

            # reconstruction loss
            if loss == 'rmse':
                loss = tf.sqrt(
                    tf.reduce_mean(tf.square(tf.subtract(x_, decoded))))
            elif loss == 'cross-entropy':
                eps = 1e-10
                # loss = -tf.reduce_mean(x_ * tf.log(decoded + eps))
                loss = tf.reduce_mean(-1 * x_ * tf.log(decoded + eps) - 1 *
                                      (1 - x_) * tf.log(1 - decoded + eps))
            elif loss == 'l1':
                loss = tf.reduce_mean(tf.abs(tf.subtract(x_, decoded)))
            elif loss == 'l2':
                loss = tf.sqrt(
                    tf.reduce_mean(tf.square(tf.subtract(x_, decoded))))

            train_op = tf.train.AdamOptimizer(lr).minimize(loss)

            sess.run(tf.global_variables_initializer())

            for i in range(epoch):
                b_x, b_x_ = util.get_batch(data_x, data_x_, batch_size)

                sess.run(train_op, feed_dict={x: b_x, x_: b_x_})

                if (i + 1) % print_step == 0:
                    l = sess.run(loss, feed_dict={x: data_x, x_: data_x_})
                    print('epoch {0}: SAE pretraining loss = {1}'.format(i, l))

            self.weights.append(sess.run(encode['weights']))
            self.biases.append(sess.run(encode['biases']))

            return sess.run(encoded, feed_dict={x: data_x_})
epochs = 20000

with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    start = global_step.eval()
    ckpt_dir = "/home/cxr/BvhLstm1-2"
    filename = "/home/cxr/7-2"
    for epoch in range(epochs):
        print "tarining Epochs = ", epoch
        r = ReadData.Actionreader()
        v, _ = utl.readData(filename)
        length = len(v)
        i = 0
        step = 0

        batch_xs, batch_ys = utl.get_batch(i, v, sequenceLength, batch_size)

        while batch_xs != None and batch_ys != None:
            _, losss = sess.run(
                [train_op, loss],
                feed_dict={
                    encoder_inputs_raw: batch_xs,
                    decoder_targets_raw: batch_ys,
                    decoder_inputs_raw: batch_ys,
                })
            if step % 20 == 0:
                print losss
            if step % 200 == 0:
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                    global_step.assign(step).eval()