示例#1
0
def fitMLP(layers, data, img_size_flat, num_classes, parameters):
    x, y_true, y_true_cls = initialize_x_y(img_size_flat, num_classes)
    num_layers = len(layers)
    weights = {
        'h1': tf.Variable(tf.truncated_normal([img_size_flat, layers[0]])),
        'out': tf.Variable(tf.truncated_normal([layers[-1], num_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.zeros([layers[0]])),
        'out': tf.Variable(tf.zeros([num_classes]))
    }
    for i in range(1, num_layers):
        weights['h' + str(i + 1)] = tf.Variable(
            tf.truncated_normal([layers[i - 1], layers[i]]))
        biases['b' + str(i + 1)] = tf.Variable(tf.zeros([layers[i]]))

    logits = neural_net(x, weights, biases, num_layers)
    y_pred = tf.nn.softmax(logits)
    y_pred_cls = tf.argmax(y_pred, dimension=1)

    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_true))
    optimizer = tf.train.AdagradOptimizer(
        learning_rate=parameters['learning_rate']).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    saver = tf.train.Saver()
    tfObject = {
        'x': x,
        'y_true': y_true,
        'optimizer': optimizer,
        'y_pred': y_pred,
        'y_pred_cls': y_pred_cls,
        'accuracy': accuracy,
        'loss': loss,
        'saver': saver
    }

    train_data = convertToDataset(data, parameters['batch_size'])

    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path, run_optimize = createCheckPoints(session, saver,
                                                parameters['name'] + '/MLP')
    parameters['save_path'] = save_path
    if run_optimize:
        optimize(parameters,
                 train_data,
                 data,
                 tfObjects=tfObject,
                 session=session)
    pred_labels, acc = print_test_accuracy(data, 250, tfObject, session)
    session.close()
    return pred_labels, acc
示例#2
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          num_iterations=2000,
          learning_rate=0.5,
          print_cost=False):
    """
    Builds the logistic regression model by calling the function you've implemented previously
    
    Arguments:
    X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train)
    Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
    X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test)
    Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
    num_iterations -- hyperparameter representing the number of iterations to optimize the parameters
    learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
    print_cost -- Set to true to print the cost every 100 iterations
    
    Returns:
    d -- dictionary containing information about the model.
    """

    # initialize parameters with zeros (≈ 1 line of code)
    w, b = initialize_with_zeros(X_train.shape[0])

    # Gradient descent (≈ 1 line of code)
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations,
                                        learning_rate, print_cost)

    # Retrieve parameters w and b from dictionary "parameters"
    w = parameters["w"]
    b = parameters["b"]

    # Predict test/train set examples (≈ 2 lines of code)
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)

    # Print train/test Errors
    print("train accuracy: {} %".format(
        100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(
        100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    d = {
        "costs": costs,
        "Y_prediction_test": Y_prediction_test,
        "Y_prediction_train": Y_prediction_train,
        "w": w,
        "b": b,
        "learning_rate": learning_rate,
        "num_iterations": num_iterations
    }

    return d
示例#3
0
    def e_step(self, X_texts, old_mean, old_std):
        X = []
        for ref, hyp in X_texts:
            pairs, costs = optimize(*tokenizer_split(ref, hyp), self.cost)
            # alignment(pairs, costs)
            X.append(
                np.sum([self.vector(word1, word2)
                        for word1, word2 in pairs], 0) /
                max(len(ref), len(hyp)))

        X, new_mean, new_std, self.weights = standard(np.array(X),
                                                      self.weights, old_mean,
                                                      old_std)
        return X, new_mean, new_std
示例#4
0
    def __call__(self, ref, hyp, show=False, *args, **kwargs):
        ref_words, hyp_words = tokenizer_split(ref, hyp)
        pairs, costs = optimize(ref_words, hyp_words, self.cost)
        diff_ref, diff_hyp, diff_cost = alignment(pairs, costs)

        value = sum(costs) / max(len(ref_words), len(hyp_words))
        value = expit(value)

        if show:
            print(diff_ref)
            print(diff_hyp)
            print(diff_cost)

        return value, {
            "diff_ref": diff_ref,
            "diff_hyp": diff_hyp,
            "diff_cost": diff_cost
        }
# 线性回归模型。
net = nn.Sequential()
net.add(nn.Dense(1))

X, y = geneData()

net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.2,
    'momentum': 0.99
})
utils.optimize(batch_size=10,
               trainer=trainer,
               num_epochs=3,
               decay_epoch=2,
               log_interval=10,
               X=X,
               y=y,
               net=net)

net.initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.2,
    'momentum': 0.9
})
utils.optimize(batch_size=10,
               trainer=trainer,
               num_epochs=3,
               decay_epoch=2,
               log_interval=10,
               X=X,
示例#6
0


# ce = tf.losses.sigmoid_cross_entropy(y_true, seg)
# kl = utils.kl_div(posterior_mvn, prior_mvn)
# beta = 1.0
# loss = tf.reduce_sum(ce + beta * kl)

loss = utils.elbo(y_true, seg, prior_mvn, posterior_mvn, 2)

# lambda_=1e-5
# l2_norms = [tf.nn.l2_loss(v) for v in tf.trainable_variables()]
# l2_norm = tf.reduce_sum(l2_norms)
# reg_loss = loss + lambda_*l2_norm

optimizer = utils.optimize(loss)

# colección para guardar las variables en el entrenamiento
tf.add_to_collection('saved_variables', value=images)
tf.add_to_collection('saved_variables', value=seg)
tf.add_to_collection('saved_variables', value=loss)
tf.add_to_collection('saved_variables', value=optimizer)
tf.add_to_collection('saved_variables', value=unet_seg)
tf.add_to_collection('saved_variables', value=y_true)
tf.add_to_collection('saved_variables', value=posterior_mvn)
tf.add_to_collection('saved_variables', value=prior_mvn)
# tf.add_to_collection('saved_variables', value=seg_prior)
# tf.add_to_collection('saved_variables', value=ce)
# tf.add_to_collection('saved_variables', value=kl)

# template = 'Epoch {}, train_loss: {:.4f} - train_ce: {:.4f} - \
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)

    # use model_path to indicate directory of clustering results
    result_dir = os.path.join(
        cfg.model_path,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # load data
    train_data = pickle.load(
        open(os.path.join(cfg.model_path, 'train_data.pkl'), 'rb'))
    val_data = pickle.load(
        open(os.path.join(cfg.model_path, 'val_data.pkl'), 'rb'))

    val_A_idx, val_B_idx = prepare_val(val_data['labels'])
    val_input = np.concatenate([
        np.expand_dims(val_data['feats'][val_A_idx], axis=1),
        np.expand_dims(val_data['feats'][val_B_idx], axis=1)
    ],
                               axis=1)
    val_label = (val_data['labels'][val_A_idx] == val_data['labels'][val_B_idx]
                 ).astype('int32')
    print("Shape of validation data: ".format(val_input.shape))

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        input_ph = tf.placeholder(tf.float32, shape=[None, 2, cfg.emb_dim])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model = networks.PairSim(n_input=cfg.emb_dim)
        model.forward(input_ph, dropout_ph)
        logits = model.logits
        prob = model.prob
        pred = tf.argmax(logits, -1)

        label_ph = tf.placeholder(tf.int32, shape=[None])
        CE_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ph,
                                                           logits=logits))
        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = CE_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # defin phase to control the number of negative sample
                if epoch < cfg.static_epochs:
                    phase = 1
                else:
                    phase = 1 + (epoch - cfg.static_epochs) / float(
                        (cfg.max_epochs - cfg.static_epochs) / 5)

                # loop for batch
                # use cfg.batch_size to indicate num_pos chosen for a batch
                batch_count = 0
                for A_idx, B_idx in enumerate_batch(train_data['labels'],
                                                    cfg.batch_size, phase):
                    batch_input = np.concatenate([
                        np.expand_dims(train_data['feats'][A_idx], axis=1),
                        np.expand_dims(train_data['feats'][B_idx], axis=1)
                    ],
                                                 axis=1)
                    batch_label = (train_data['labels'][A_idx] ==
                                   train_data['labels'][B_idx]).astype('int32')

                    start_time_train = time.time()
                    err, y_pred, _, step, summ = sess.run(
                        [total_loss, pred, train_op, global_step, summary_op],
                        feed_dict={
                            input_ph: batch_input,
                            dropout_ph: cfg.keep_prob,
                            label_ph: np.squeeze(batch_label),
                            lr_ph: learning_rate
                        })

                    # calculate accuracy
                    acc = accuracy_score(batch_label, y_pred)

                    train_time = time.time() - start_time_train
                    print ("Epoch: [%d][%d/%d]\tTrain_time: %.3f\tLoss %.4f\tAcc: %.4f" % \
                                    (epoch+1, batch_label.sum(), batch_label.shape[0], train_time, err, acc))
                    batch_count += 1

                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag="train_loss", simple_value=err),
                        tf.Summary.Value(tag="train_acc", simple_value=acc),
                        tf.Summary.Value(
                            tag="pos_ratio",
                            simple_value=float(batch_label.sum()) /
                            batch_label.shape[0])
                    ])
                    summary_writer.add_summary(summary, step)
                    summary_writer.add_summary(summ, step)

                print("Epoch %d done!" % (epoch + 1))
                epoch += 1

                # validation on val_set
                print("Evaluating on validation set...")
                val_err, val_pred, val_prob = sess.run(
                    [total_loss, pred, prob],
                    feed_dict={
                        input_ph: val_input,
                        dropout_ph: 1.0,
                        label_ph: np.squeeze(val_label)
                    })

                val_acc = accuracy_score(val_label, val_pred)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Validation acc",
                                     simple_value=val_acc),
                    tf.Summary.Value(tag="Validation loss",
                                     simple_value=val_err)
                ])
                summary_writer.add_summary(summary, step)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

        # print log for analysis
        with open(os.path.join(result_dir, 'val_results.txt'), 'w') as fout:
            fout.write("A_idx\tB_idx\tlabel\tprob_0\tprob_1\n")

            for i in range(val_prob.shape[0]):
                fout.write("%d\t%d\t%d\t%.4f\t%.4f\n" %
                           (val_A_idx[i], val_B_idx[i], val_label[i],
                            val_prob[i, 0], val_prob[i, 1]))
def main():

    cfg = TrainConfig().parse()
    print (cfg.name)
    result_dir = os.path.join(cfg.result_root, 
            cfg.name+'_'+datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root)
    batch_per_epoch = len(train_set)//cfg.sess_per_batch
    labeled_session = train_session[:cfg.label_num]

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root)


    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        subtract_global_step_op = tf.assign(global_step, global_step-1)
        
        ####################### Load models here ########################

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph, dropout_ph)    # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            sensors_emb_dim = 32
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/","")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("hallucination_sensors"):
            # load backbone model
            if cfg.network == "convtsn":
                hal_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim)
            elif cfg.network == "convrtsn":
                hal_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim)
            else:
                raise NotImplementedError

            hal_emb_sensors.forward(input_ph, dropout_ph)    # for lstm has variable scope

        ############################# Forward Pass #############################


        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10)
            embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden, axis=-1, epsilon=1e-10)
            embedding_hal_sensors = tf.nn.l2_normalize(hal_emb_sensors.hidden, axis=-1, epsilon=1e-10)
        else:
            embedding = model_emb.hidden
            embedding_sensors = model_emb_sensors.hidden
            embedding_hal_sensors = hal_emb_sensors.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(tf.reshape(embedding, [-1,3,cfg.emb_dim]), 3, 1)
        anc_sensors, pos_sensors, neg_sensors = tf.unstack(tf.reshape(embedding_sensors, [-1,3,sensors_emb_dim]), 3, 1)
        anc_hal_sensors, pos_hal_sensors, neg_hal_sensors = tf.unstack(tf.reshape(embedding_hal_sensors, [-1,3,sensors_emb_dim]), 3, 1)

        # a fusion embedding
        anc_fused = tf.concat((anchor, anc_hal_sensors), axis=1)
        pos_fused = tf.concat((positive, pos_hal_sensors), axis=1)
        neg_fused = tf.concat((negative, neg_hal_sensors), axis=1)

        ############################# Calculate loss #############################

        # triplet loss
        metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) + \
                      networks.triplet_loss(anc_sensors, pos_sensors, neg_sensors, cfg.alpha) + \
                      networks.triplet_loss(anc_hal_sensors, pos_hal_sensors, neg_hal_sensors, cfg.alpha) + \
                      networks.triplet_loss(anc_fused, pos_fused, neg_fused, cfg.alpha)

        # hallucination loss (regression loss)
        hal_loss = tf.nn.l2_loss(embedding_sensors - embedding_hal_sensors)

        regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        # use lambda_multimodal for hal_loss
        lambda_metric_ph = tf.placeholder(tf.float32, shape=[])
        lambda_hal_ph = tf.placeholder(tf.float32, shape=[])
        total_loss = lambda_metric_ph * metric_loss + lambda_hal_ph * hal_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                           lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)
        summary_op = tf.summary.merge_all()    # not logging histogram of variables because it will cause problem when only unimodal_train_op is called

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(feat_paths_ph, feat2_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[model_emb.prepare_input, model_emb_sensors.prepare_input])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(session[0], session[-1], model_emb.prepare_input_test)    # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id]*eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _,_ = load_data_and_label(session[1], session[-1], utils.mean_pool_input)
            val_feats2.append(eve2_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print ("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(i, val_labels[i,0], val_sess[i],
                                            val_boundaries[i][0], val_boundaries[i][1]))

        #########################################################################


        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print ("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            print ("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs-1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)]*cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer, feed_dict={feat_paths_ph: feat_paths,
                  feat2_paths_ph: feat2_paths,
                  label_paths_ph: label_paths})

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, lab, batch_sess = sess.run(next_train)
                        load_time = time.time() - start_time
    
                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # for labeled sessions, use facenet sampling
                        eve_labeled = []
                        eve_sensors_labeled = []
                        lab_labeled = []
                        for i in range(eve.shape[0]):
                            if batch_sess[i,0] in labeled_session:
                                eve_labeled.append(eve[i])
                                eve_sensors_labeled.append(eve_sensors[i])
                                lab_labeled.append(lab[i])

                        if len(eve_labeled):    # if labeled sessions exist
                            eve_labeled = np.concatenate(eve_labeled, axis=0)
                            eve_sensors_labeled = np.concatenate(eve_sensors_labeled, axis=0)
                            lab_labeled = np.concatenate(lab_labeled, axis=0)

                            # Get the embeddings of all events
                            eve_embedding = np.zeros((eve_labeled.shape[0], cfg.emb_dim), dtype='float32')
                            for start, end in zip(range(0, eve_labeled.shape[0], cfg.batch_size),
                                                range(cfg.batch_size, eve_labeled.shape[0]+cfg.batch_size, cfg.batch_size)):
                                end = min(end, eve_labeled.shape[0])
                                emb = sess.run(embedding, feed_dict={input_ph: eve_labeled[start:end], dropout_ph: 1.0})
                                eve_embedding[start:end] = np.copy(emb)
        
                            # sample triplets within sampled sessions
                            triplet_input_idx, negative_count = utils.select_triplets_facenet(lab_labeled,eve_embedding,cfg.triplet_per_batch,cfg.alpha,num_negative=cfg.num_negative)
                            if triplet_input_idx is None:
                                continue
                            
                            triplet_input = eve_labeled[triplet_input_idx]
                            sensors_input = eve_sensors_labeled[triplet_input_idx]
                            if len(triplet_input.shape) > 5:    # debugging
                                pdb.set_trace()

                        # for all sessions
                        temp_num = (eve.shape[0] // 3) * 3    # for triplet shape
                        all_triplet_input = eve[:temp_num]
                        all_sensors_input = eve_sensors[:temp_num]

                        select_time = time.time() - start_time

    
                        ##################### Start training  ########################
    
                        # supervised initialization
                        if epoch < cfg.multimodal_epochs:
                            err, metric_err, hal_err, _, step, summ = sess.run(
                                    [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op],
                                    feed_dict = {input_ph: triplet_input,
                                                input_sensors_ph: sensors_input,
                                                dropout_ph: cfg.keep_prob,
                                                lr_ph: learning_rate,
                                                lambda_metric_ph: 1.0,    # only metric learning
                                                lambda_hal_ph: 0.0})
                        
                        else:
                            # supervised training if labeled sessions available
                            if len(eve_labeled):
                                err, metric_err, hal_err, _, step, summ = sess.run(
                                        [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op],
                                        feed_dict = {input_ph: triplet_input,
                                                    input_sensors_ph: sensors_input,
                                                    dropout_ph: cfg.keep_prob,
                                                    lr_ph: learning_rate,
                                                    lambda_metric_ph: 1.0,
                                                    lambda_hal_ph: cfg.lambda_multimodal})

                            # unsupervised learning on all sessions
                            if len(eve_labeled):
                                sess.run(subtract_global_step_op)
                            err, metric_err, hal_err, _, step, summ = sess.run(
                                    [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op],
                                    feed_dict = {input_ph: all_triplet_input,
                                                input_sensors_ph: all_sensors_input,
                                                dropout_ph: cfg.keep_prob,
                                                lr_ph: learning_rate,
                                                lambda_metric_ph: 0.0,    # no metric learning
                                                lambda_hal_ph: 1.0})    # only hal loss

    
                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tMetric Loss %.4f\tHal Loss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, metric_err, hal_err))
    
                        summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=err),
                                    tf.Summary.Value(tag="negative_count", simple_value=negative_count),
                                    tf.Summary.Value(tag="metric_loss", simple_value=metric_err),
                                    tf.Summary.Value(tag="hallucination_loss", simple_value=hal_err)])
    
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1
                    
                    except tf.errors.OutOfRangeError:
                        print ("Epoch %d done!" % (epoch+1))
                        break

                # validation on val_set
                print ("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                                feed_dict = {input_ph: val_feats,
                                                             dropout_ph: 1.0})
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                                            tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec)])
                summary_writer.add_summary(summary, step)
                print ("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch+1,mAP,mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess, os.path.join(result_dir, cfg.name+'.ckpt'), global_step=step)
示例#9
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    feat_train = np.load('/mnt/work/CUB_200_2011/data/feat_train.npy')
    val_feats = np.load('/mnt/work/CUB_200_2011/data/feat_test.npy')
    label_train = np.load('/mnt/work/CUB_200_2011/data/label_train.npy')
    label_train -= 1  # make labels start from 0
    val_labels = np.load('/mnt/work/CUB_200_2011/data/label_test.npy')

    class_idx_dict = {}
    for i, l in enumerate(label_train):
        l = int(l)
        if l not in class_idx_dict:
            class_idx_dict[l] = [i]
        else:
            class_idx_dict[l].append(i)
    C = len(list(class_idx_dict.keys()))

    val_triplet_idx = select_triplets_random(val_labels, 1000)

    # generate metadata.tsv for visualize embedding
    with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
        for l in val_labels:
            fout.write('{}\n'.format(int(l)))

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        model_emb = networks.CUBLayer(n_input=1024, n_output=cfg.emb_dim)
        #model_emb = networks.OutputLayer(n_input=1024, n_output=cfg.emb_dim)

        # get the embedding
        input_ph = tf.placeholder(tf.float32, shape=[None, 1024])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.logits,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.logits

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        #        diffs = utils.all_diffs_tf(embedding, embedding)
        #        all_dist = utils.cdist_tf(diffs)
        #        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            ################## Training loop ##################
            for epoch in range(cfg.max_epochs):

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # sample images
                class_in_batch = set()
                idx_batch = np.array([], dtype=np.int32)
                while len(idx_batch) < cfg.batch_size:
                    sampled_class = np.random.choice(
                        list(class_idx_dict.keys()))
                    if not sampled_class in class_in_batch:
                        class_in_batch.add(sampled_class)
                        subsample_size = np.random.choice(range(5, 11))
                        subsample = np.random.permutation(
                            class_idx_dict[sampled_class])[:subsample_size]
                        idx_batch = np.append(idx_batch, subsample)
                idx_batch = idx_batch[:cfg.batch_size]

                feat_batch = feat_train[idx_batch]
                lab_batch = label_train[idx_batch]

                emb = sess.run(embedding,
                               feed_dict={
                                   input_ph: feat_batch,
                                   dropout_ph: 1.0
                               })

                # get distance for all pairs
                all_diff = utils.all_diffs(emb, emb)
                triplet_input_idx, active_count = select_triplets_facenet(
                    lab_batch,
                    utils.cdist(all_diff, metric=cfg.metric),
                    cfg.triplet_per_batch,
                    cfg.alpha,
                    num_negative=cfg.num_negative)

                if triplet_input_idx is not None:
                    triplet_input = feat_batch[triplet_input_idx]

                    # perform training on the selected triplets
                    err, _, step, summ = sess.run(
                        [total_loss, train_op, global_step, summary_op],
                        feed_dict={
                            input_ph: triplet_input,
                            dropout_ph: cfg.keep_prob,
                            lr_ph: learning_rate
                        })

                    print ("%s\tEpoch: %d\tImages num: %d\tTriplet num: %d\tLoss %.4f" % \
                            (cfg.name, epoch+1, feat_batch.shape[0], triplet_input.shape[0]//3, err))

                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag="train_loss", simple_value=err),
                        tf.Summary.Value(tag="active_count",
                                         simple_value=active_count),
                        tf.Summary.Value(tag="images_num",
                                         simple_value=feat_batch.shape[0]),
                        tf.Summary.Value(tag="triplet_num",
                                         simple_value=triplet_input.shape[0] //
                                         3)
                    ])
                    summary_writer.add_summary(summary, step)
                    summary_writer.add_summary(summ, step)

                # validation on val_set
                if (epoch + 1) % 100 == 0:
                    print("Evaluating on validation set...")
                    val_err = sess.run(total_loss,
                                       feed_dict={
                                           input_ph:
                                           val_feats[val_triplet_idx],
                                           dropout_ph: 1.0
                                       })

                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag="Valiation loss",
                                         simple_value=val_err),
                    ])
                    print("Epoch: [%d]\tloss: %.4f" % (epoch + 1, val_err))

                    if (epoch + 1) % 1000 == 0:
                        val_embeddings, _ = sess.run([embedding, set_emb],
                                                     feed_dict={
                                                         input_ph: val_feats,
                                                         dropout_ph: 1.0
                                                     })
                        mAP, mPrec, recall = utils.evaluate_simple(
                            val_embeddings, val_labels)
                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="Valiation mAP",
                                             simple_value=mAP),
                            tf.Summary.Value(tag="Validation Recall@1",
                                             simple_value=recall),
                            tf.Summary.Value(tag="Validation [email protected]",
                                             simple_value=mPrec)
                        ])
                        print("Epoch: [%d]\tmAP: %.4f\trecall: %.4f" %
                              (epoch + 1, mAP, recall))

                        # config for embedding visualization
                        config = projector.ProjectorConfig()
                        visual_embedding = config.embeddings.add()
                        visual_embedding.tensor_name = emb_var.name
                        visual_embedding.metadata_path = os.path.join(
                            result_dir, 'metadata_val.tsv')
                        projector.visualize_embeddings(summary_writer, config)

                    summary_writer.add_summary(summary, step)

                    # save model
                    saver.save(sess,
                               os.path.join(result_dir, cfg.name + '.ckpt'),
                               global_step=step)
示例#10
0
文件: train.py 项目: SOULOF/easy_nn
    for i in range(epochs):
        loss = 0
        for j in range(steps_per_epoch):
            # Step 1
            # Randomly sample a batch of examples
            X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

            # Reset value of X and y Inputs
            X.value = X_batch
            y.value = y_batch

            # Step 2
            _ = None
            forward_and_backward(_, graph)  # set output node not important.

            # Step 3
            rate = 1e-2

            optimize(trainables, rate)

            loss += graph[-1].value

        if i % 100 == 0:
            print("Epoch: {}, Loss: {:.3f}".format(i + 1,
                                                   loss / steps_per_epoch))
            losses.append(loss / steps_per_epoch)

    plt.plot(losses)
    plt.show()
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    if cfg.task == "supervised":  # fully supervised task
        train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch
    labeled_session = train_session[:cfg.label_num]

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################
        sensors_emb_dim = 32
        segment_emb_dim = 32

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

            with tf.variable_scope("sensors"):
                model_output_sensors = networks.OutputLayer(
                    n_input=cfg.emb_dim, n_output=sensors_emb_dim)
            with tf.variable_scope("segment"):
                model_output_segment = networks.OutputLayer(
                    n_input=cfg.emb_dim, n_output=segment_emb_dim)

        lambda_mul_ph = tf.placeholder(tf.float32, shape=[])
        with tf.variable_scope("modality_sensors"):
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("modality_segment"):
            model_emb_segment = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=segment_emb_dim,
                                              n_input=357)

            input_segment_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 357])
            model_emb_segment.forward(input_segment_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_segment"):
                    var_list[v.op.name.replace("modality_segment/", "")] = v
            restore_saver_segment = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
            embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
            embedding_segment = tf.nn.l2_normalize(model_emb_segment.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
        else:
            embedding = model_emb.hidden
            embedding_sensors = model_emb_sensors.hidden
            embedding_segment = model_emb_segment.hidden

        # get the number of unsupervised training
        unsup_num = tf.shape(input_sensors_ph)[0]

        # variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([1116, cfg.emb_dim], dtype=tf.float32),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding[:-unsup_num], [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        model_output_sensors.forward(tf.nn.relu(embedding[-unsup_num:]),
                                     dropout_ph)
        logits_sensors = model_output_sensors.logits
        model_output_segment.forward(tf.nn.relu(embedding[-unsup_num:]),
                                     dropout_ph)
        logits_segment = model_output_segment.logits

        # MSE loss
        MSE_loss_sensors = tf.losses.mean_squared_error(
            embedding_sensors, logits_sensors) / sensors_emb_dim
        MSE_loss_segment = tf.losses.mean_squared_error(
            embedding_sensors, logits_segment) / segment_emb_dim
        MSE_loss = MSE_loss_sensors + MSE_loss_segment
        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = tf.cond(
            tf.equal(unsup_num,
                     tf.shape(embedding)[0]), lambda: MSE_loss * lambda_mul_ph
            + regularization_loss * cfg.lambda_l2, lambda: metric_loss +
            MSE_loss * lambda_mul_ph + regularization_loss * cfg.lambda_l2)

        tf.summary.scalar('learning_rate', lr_ph)
        # only train the core branch
        train_var_list = [
            v for v in tf.global_variables()
            if v.op.name.startswith("modality_core")
        ]
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, train_var_list)

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        feat3_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            feat3_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input,
                model_emb_segment.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_feats3 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(
                session[1], session[-1], model_emb_sensors.prepare_input_test)
            val_feats2.append(eve2_batch)

            eve3_batch, _, _ = load_data_and_label(
                session[2], session[-1], model_emb_segment.prepare_input_test)
            val_feats3.append(eve3_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_feats3 = np.concatenate(val_feats3, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())
            print("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)
            print("Restoring segment model: %s" % cfg.segment_path)
            restore_saver_segment.restore(sess, cfg.segment_path)

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                feat3_paths = [[p[2] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             feat3_paths_ph: feat3_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, eve_segment, lab, batch_sess = sess.run(
                            next_train)

                        # for memory concern, 1000 events are used in maximum
                        if eve.shape[0] > 1000:
                            idx = np.random.permutation(eve.shape[0])[:1000]
                            eve = eve[idx]
                            eve_sensors = eve_sensors[idx]
                            eve_segment = eve_segment[idx]
                            lab = lab[idx]
                            batch_sess = batch_sess[idx]
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # for labeled sessions, use facenet sampling
                        eve_labeled = []
                        lab_labeled = []
                        for i in range(eve.shape[0]):
                            # FIXME: use decode again to get session_id str
                            if batch_sess[i, 0].decode() in labeled_session:
                                eve_labeled.append(eve[i])
                                lab_labeled.append(lab[i])

                        if len(eve_labeled):  # if labeled sessions exist
                            eve_labeled = np.stack(eve_labeled, axis=0)
                            lab_labeled = np.stack(lab_labeled, axis=0)

                            # Get the embeddings of all events
                            eve_embedding = np.zeros(
                                (eve_labeled.shape[0], cfg.emb_dim),
                                dtype='float32')
                            for start, end in zip(
                                    range(0, eve_labeled.shape[0],
                                          cfg.batch_size),
                                    range(
                                        cfg.batch_size,
                                        eve_labeled.shape[0] + cfg.batch_size,
                                        cfg.batch_size)):
                                end = min(end, eve_labeled.shape[0])
                                emb = sess.run(embedding,
                                               feed_dict={
                                                   input_ph:
                                                   eve_labeled[start:end],
                                                   dropout_ph: 1.0
                                               })
                                eve_embedding[start:end] = np.copy(emb)

                            # Second, sample triplets within sampled sessions
                            all_diff = utils.all_diffs(eve_embedding,
                                                       eve_embedding)
                            triplet_input_idx, active_count = utils.select_triplets_facenet(
                                lab_labeled,
                                utils.cdist(all_diff, metric=cfg.metric),
                                cfg.triplet_per_batch,
                                cfg.alpha,
                                num_negative=cfg.num_negative)

                            if len(triplet_input_idx) == 0:
                                triplet_input = eve_labeled[triplet_input_idx]

                        else:
                            active_count = -1

                        # for all sessions in the batch
                        perm_idx = np.random.permutation(eve.shape[0])
                        perm_idx = perm_idx[:min(3 * (len(perm_idx) // 3), 3 *
                                                 cfg.triplet_per_batch)]
                        mul_input = eve[perm_idx]

                        if len(eve_labeled) and triplet_input_idx is not None:
                            triplet_input = np.concatenate(
                                (triplet_input, mul_input), axis=0)
                        else:
                            triplet_input = mul_input
                        sensors_input = eve_sensors[perm_idx]
                        segment_input = eve_segment[perm_idx]

                        ##################### Start training  ########################

                        # supervised initialization
                        if epoch < cfg.multimodal_epochs:
                            if not len(eve_labeled
                                       ):  # if no labeled sessions exist
                                continue
                            err, mse_err, _, step, summ = sess.run(
                                [
                                    total_loss, MSE_loss, train_op,
                                    global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    dropout_ph: cfg.keep_prob,
                                    lambda_mul_ph: 0.0,
                                    lr_ph: learning_rate
                                })
                        else:
                            print(triplet_input.shape)
                            err, mse_err1, mse_err2, _, step, summ = sess.run(
                                [
                                    total_loss, MSE_loss_sensors,
                                    MSE_loss_segment, train_op, global_step,
                                    summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    input_segment_ph: segment_input,
                                    dropout_ph: cfg.keep_prob,
                                    lambda_mul_ph: cfg.lambda_multimodal,
                                    lr_ph: learning_rate
                                })
                        train_time = time.time() - start_time

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], load_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(
                                tag="triplet_num",
                                simple_value=(triplet_input.shape[0] -
                                              sensors_input.shape[0]) // 3),
                            tf.Summary.Value(tag="MSE_loss_sensors",
                                             simple_value=mse_err1),
                            tf.Summary.Value(tag="MSE_loss_segment",
                                             simple_value=mse_err2)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_err1, val_err2, val_embeddings, _ = sess.run(
                    [MSE_loss_sensors, MSE_loss_segment, embedding, set_emb],
                    feed_dict={
                        input_ph: val_feats,
                        input_sensors_ph: val_feats2,
                        input_segment_ph: val_feats3,
                        dropout_ph: 1.0
                    })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec),
                    tf.Summary.Value(tag="Validation mse loss sensors",
                                     simple_value=val_err1),
                    tf.Summary.Value(tag="Validation mse loss segment",
                                     simple_value=val_err2)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#12
0
from Person import Person
import utils

persons = utils.loadPersons('data/persons.json')

names = list(map(lambda person : person.name, persons))
bill = utils.loadBill('data/bill.json', names)
print('Bill')
for b in bill:
    b.show()

utils.calculatePayMent(persons, bill)
utils.optimize(persons)

print('')
print('Payments')
for person in persons:
    person.show()

print('')
utils.check(persons)
utils.summary(bill, persons)
示例#13
0
import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon import nn
import sys
sys.path.append('..')
import utils

num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

net = nn.Sequential()
net.add(nn.Dense(1))

net.collect_params().initialize(mx.init.Normal(sigma=1), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.2})
utils.optimize(batch_size=10,
               trainer=trainer,
               num_epochs=3,
               decay_epoch=None,
               log_interval=10,
               features=features,
               labels=labels,
               net=net)
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session[:3]
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)

        # subtract global_step by 1 if needed (for hard negative mining, keep global_step unchanged)
        subtract_global_step_op = tf.assign(global_step, global_step - 1)

        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        model_ver = networks.PairSim(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        label_ph = tf.placeholder(tf.int32, shape=[None])
        model_emb.forward(input_ph, dropout_ph)
        embedding = model_emb.hidden

        # split embedding into A and B
        emb_A, emb_B = tf.unstack(tf.reshape(embedding, [-1, 2, cfg.emb_dim]),
                                  2, 1)
        pairs = tf.stack([emb_A, emb_B], axis=1)

        model_ver.forward(pairs, dropout_ph)
        logits = model_ver.logits
        prob = model_ver.prob
        pred = tf.argmax(logits, -1)

        ver_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ph,
                                                           logits=logits))

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = ver_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        val_idx, val_labels = random_pairs(val_labels, 1000000, test=True)
        val_feats = val_feats[val_idx]
        val_labels = np.asarray(val_labels, dtype='int32')
        print("Shape of val_feats: ", val_feats.shape)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # select pairs for training
                        pair_idx, train_labels = random_pairs(
                            lab, cfg.batch_size, cfg.num_negative)

                        train_input = eve[pair_idx]
                        train_labels = np.asarray(train_labels, dtype='int32')
                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        start_time_train = time.time()
                        # perform training on the selected pairs
                        err, y_pred, y_prob, _, step, summ = sess.run(
                            [
                                total_loss, pred, prob, train_op, global_step,
                                summary_op
                            ],
                            feed_dict={
                                input_ph: train_input,
                                label_ph: train_labels,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })
                        acc = accuracy_score(train_labels, y_pred)

                        negative_count = 0
                        if epoch >= cfg.negative_epochs:
                            hard_idx, hard_labels, negative_count = hard_pairs(
                                train_labels, y_prob, 0.5)
                            if negative_count > 0:
                                hard_input = train_input[hard_idx]
                                hard_labels = np.asarray(hard_labels,
                                                         dtype='int32')

                                step = sess.run(subtract_global_step_op)
                                hard_err, y_pred, _, step = sess.run(
                                    [total_loss, pred, train_op, global_step],
                                    feed_dict={
                                        input_ph: hard_input,
                                        label_ph: hard_labels,
                                        dropout_ph: cfg.keep_prob,
                                        lr_ph: learning_rate
                                    })

                        train_time = time.time() - start_time_train

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss: %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, select_time2, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="acc", simple_value=acc),
                            tf.Summary.Value(tag="negative_count",
                                             simple_value=negative_count)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_err, val_pred, val_prob = sess.run(
                    [total_loss, pred, prob],
                    feed_dict={
                        input_ph: val_feats,
                        label_ph: val_labels,
                        dropout_ph: 1.0
                    })
                val_acc = accuracy_score(val_labels, val_pred)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation acc",
                                     simple_value=val_acc),
                    tf.Summary.Value(tag="Validation loss",
                                     simple_value=val_err)
                ])
                summary_writer.add_summary(summary, step)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

        # print log for analysis
        with open(os.path.join(result_dir, 'val_results.txt'), 'w') as fout:
            fout.write("acc = %.4f\n" % val_acc)
            fout.write("label\tprob_0\tprob_1\tA_idx\tB_idx\n")
            for i in range(val_prob.shape[0]):
                fout.write("%d\t%.4f\t%.4f\t%d\t%d\n" %
                           (val_labels[i], val_prob[i, 0], val_prob[i, 1],
                            val_idx[2 * i], val_idx[2 * i + 1]))
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            sensors_emb_dim = 32
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)
            model_pairsim_sensors = networks.PairSim(n_input=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)

        # Sensors branch
        emb_sensors = model_emb_sensors.hidden
        A_sensors, B_sensors, C_sensors = tf.unstack(
            tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1)
        AB_pairs_sensors = tf.stack([A_sensors, B_sensors], axis=1)
        AC_pairs_sensors = tf.stack([A_sensors, C_sensors], axis=1)
        pairs_sensors = tf.concat([AB_pairs_sensors, AC_pairs_sensors], axis=0)
        model_pairsim_sensors.forward(pairs_sensors, dropout_ph)
        prob_sensors = model_pairsim_sensors.prob
        prob_sensors = tf.concat([
            prob_sensors[:tf.shape(A_sensors)[0]],
            prob_sensors[tf.shape(A_sensors)[0]:]
        ],
                                 axis=1)  # shape: [N, 4]

        # fuse prob from all modalities
        prob = prob_sensors

        ############################# Calculate loss #############################

        # triplet loss for labeled inputs
        metric_loss1 = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        # weighted triplet loss for multimodal inputs
        mul_num = tf.shape(prob)[0]
        metric_loss2 = networks.triplet_loss(anchor[:mul_num],
                                             positive[:mul_num],
                                             negative[:mul_num], cfg.alpha)
        weighted_metric_loss, weights = networks.weighted_triplet_loss(
            anchor[-mul_num:], positive[-mul_num:], negative[-mul_num:],
            prob[:, 1], prob[:, 3], cfg.alpha)

        unimodal_var_list = [
            v for v in tf.global_variables()
            if v.op.name.startswith("modality_core")
        ]

        # whether to apply joint optimization
        if cfg.no_joint:
            multimodal_var_list = unimodal_var_list
        else:
            multimodal_var_list = tf.global_variables()

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        unimodal_loss = metric_loss1 + regularization_loss * cfg.lambda_l2
        multimodal_loss = metric_loss2 + cfg.lambda_multimodal * weighted_metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        unimodal_train_op = utils.optimize(unimodal_loss, global_step,
                                           cfg.optimizer, lr_ph,
                                           unimodal_var_list)
        multimodal_train_op = utils.optimize(multimodal_loss, global_step,
                                             cfg.optimizer, lr_ph,
                                             multimodal_var_list)

        saver = tf.train.Saver(max_to_keep=10)
        summary_op = tf.summary.merge_all(
        )  # not logging histogram of variables because it will cause problem when only unimodal_train_op is called

        summ_prob = tf.summary.histogram('Prob_histogram', prob)
        summ_weights = tf.summary.histogram('Weights_histogram', weights)

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(session[1], session[-1],
                                                   utils.mean_pool_input)
            val_feats2.append(eve2_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            #print ("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, lab = sess.run(next_train)
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = np.copy(emb)

                        # sample triplets within sampled sessions
                        triplet_input_idx, negative_count = utils.select_triplets_facenet(
                            lab,
                            eve_embedding,
                            cfg.triplet_per_batch,
                            cfg.alpha,
                            num_negative=cfg.num_negative)
                        if triplet_input_idx is None:
                            continue

                        multimodal_count = 0
                        if epoch >= cfg.multimodal_epochs:
                            # Get the similairty prediction of all pos-neg pairs
                            pos_neg_idx = pos_neg_pairs(lab)
                            sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                                dtype='float32') * np.nan
                            for start, end in zip(
                                    range(0, len(pos_neg_idx),
                                          3 * cfg.batch_size),
                                    range(
                                        3 * cfg.batch_size,
                                        len(pos_neg_idx) + 3 * cfg.batch_size,
                                        3 * cfg.batch_size)):
                                ####### for debugging
                                if pos_neg_idx is None:
                                    pdb.set_trace()
                                end = min(end, len(pos_neg_idx))
                                batch_idx = pos_neg_idx[start:end]
                                batch_prob, histo_prob = sess.run(
                                    [prob, summ_prob],
                                    feed_dict={
                                        input_sensors_ph:
                                        eve_sensors[batch_idx],
                                        dropout_ph: 1.0
                                    })
                                summary_writer.add_summary(histo_prob, step)

                                for i in range(batch_prob.shape[0]):
                                    sim_prob[batch_idx[i * 3],
                                             batch_idx[i * 3 + 1]] = np.copy(
                                                 batch_prob[i, 1])

                            # post-process the similarity prediction matrix [N,N]
                            # average two predictions sim(A,B) and sim(B,A)
                            # not implemented because of nan for backgrounds
                            #sim_prob = 0.5 * (sim_prob + sim_prob.T)

                            # sample triplets from similarity prediction
                            # maximum number not exceed the number of triplet_input from facenet selection
                            if cfg.multimodal_select == "confidence":
                                multimodal_input_idx, multimodal_count = select_triplets_multimodal(
                                    sim_prob,
                                    threshold=0.9,
                                    max_num=len(triplet_input_idx) // 3)
                            elif cfg.multimodal_select == "nopos":
                                multimodal_input_idx, multimodal_count = nopos_triplets_multimodal(
                                    sim_prob,
                                    max_num=len(triplet_input_idx) // 3)
                            elif cfg.multimodal_select == "random":
                                multimodal_input_idx, multimodal_count = random_triplets_multimodal(
                                    sim_prob,
                                    max_num=len(triplet_input_idx) // 3)
                            else:
                                raise NotImplementedError

                            print(len(triplet_input_idx),
                                  len(multimodal_input_idx), multimodal_count)
                            sensors_input = eve_sensors[multimodal_input_idx]
                            triplet_input_idx.extend(multimodal_input_idx)

                        triplet_input = eve[triplet_input_idx]

                        select_time = time.time() - start_time

                        if len(triplet_input.shape) > 5:  # debugging
                            pdb.set_trace()

                        ##################### Start training  ########################

                        # be careful that for multimodal_count = 0 we just optimize unimodal part
                        if epoch < cfg.multimodal_epochs or multimodal_count == 0:
                            err, metric_err, _, step, summ = sess.run(
                                [
                                    unimodal_loss, metric_loss1,
                                    unimodal_train_op, global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })
                            mul_err = 0.0
                        else:
                            err, w, metric_err, mul_err, _, step, summ, histo_w = sess.run(
                                [
                                    multimodal_loss, weights, metric_loss2,
                                    weighted_metric_loss, multimodal_train_op,
                                    global_step, summary_op, summ_weights
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })

                            # add summary of weights histogram
                            summary_writer.add_summary(histo_w, step)

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="negative_count",
                                             simple_value=negative_count),
                            tf.Summary.Value(tag="multimodal_count",
                                             simple_value=multimodal_count),
                            tf.Summary.Value(tag="metric_loss",
                                             simple_value=metric_err),
                            tf.Summary.Value(tag="weghted_metric_loss",
                                             simple_value=mul_err)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(
        cfg.feature_root, val_session, cfg.feat,
        cfg.label_root)  # only have one modality in testing time

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        input_ph = tf.placeholder(tf.float32,
                                  shape=[None, cfg.num_seg, None, None, None])
        output_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) + cfg.feat_dim[cfg.feat[1]])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        hidden = model_emb.hidden
        embedding = tf.nn.l2_normalize(model_emb.hidden,
                                       axis=-1,
                                       epsilon=1e-10)

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        model_output = networks.OutputLayer(
            n_input=cfg.emb_dim, n_output=cfg.feat_dim[cfg.feat[1]][0])
        model_output.forward(tf.nn.relu(hidden), dropout_ph)
        logits = model_output.logits

        # MSE loss
        MSE_loss = tf.losses.mean_squared_error(output_ph, logits)
        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = MSE_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[model_emb.prepare_input, utils.mean_pool_input])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(session[1], session[-1],
                                                   utils.mean_pool_input)
            val_feats2.append(eve2_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        start_time = time.time()
                        eve, eve2, lab = sess.run(next_train)
                        load_time = time.time() - start_time

                        ##################### Start training  ########################
                        start_time = time.time()

                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: eve,
                                output_ph: eve2,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })
                        train_time = time.time() - start_time

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], load_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_err, val_embeddings, val_pred, _ = sess.run(
                    [total_loss, embedding, logits, set_emb],
                    feed_dict={
                        input_ph: val_feats,
                        output_ph: val_feats2,
                        dropout_ph: 1.0
                    })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)
                mAP2, mPrec2 = utils.evaluate_simple(
                    val_pred, val_labels)  # use prediction for retrieval

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec),
                    tf.Summary.Value(tag="Validation mAP 2",
                                     simple_value=mAP2),
                    tf.Summary.Value(tag="Validation [email protected] 2",
                                     simple_value=mPrec2),
                    tf.Summary.Value(tag="Validation loss",
                                     simple_value=val_err)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#17
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg,
                                      emb_dim=cfg.emb_dim,
                                      n_input=cfg.n_input)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_ver = networks.PDDM(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        model_ver.forward(tf.stack((anchor, positive), axis=1))
        pddm_ap = model_ver.prob[:, 0]
        model_ver.forward(tf.stack((anchor, negative), axis=1))
        pddm_an = model_ver.prob[:, 0]
        pddm_loss = tf.reduce_mean(
            tf.maximum(tf.add(tf.subtract(pddm_ap, pddm_an), 0.6), 0.0), 0)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = pddm_loss + 0.5 * metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # Get the similarity of all events
                        sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                            dtype='float32') * np.nan
                        comb = list(
                            itertools.combinations(range(eve.shape[0]), 2))
                        for start, end in zip(
                                range(0, len(comb), cfg.batch_size),
                                range(cfg.batch_size,
                                      len(comb) + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, len(comb))
                            comb_idx = []
                            for c in comb[start:end]:
                                comb_idx.extend([c[0], c[1], c[1]])
                            emb = sess.run(pddm_ap,
                                           feed_dict={
                                               input_ph: eve[comb_idx],
                                               dropout_ph: 1.0
                                           })
                            for i in range(emb.shape[0]):
                                sim_prob[comb[start + i][0],
                                         comb[start + i][1]] = emb[i]
                                sim_prob[comb[start + i][1],
                                         comb[start + i][0]] = emb[i]

                        # Second, sample triplets within sampled sessions
                        triplet_selected, active_count = utils.select_triplets_facenet(
                            lab, sim_prob, cfg.triplet_per_batch, cfg.alpha)

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        start_time_train = time.time()
                        triplet_input_idx = [
                            idx for triplet in triplet_selected
                            for idx in triplet
                        ]
                        triplet_input = eve[triplet_input_idx]
                        # perform training on the selected triplets
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: triplet_input,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train
                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, select_time1, select_time2, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(
                                tag="triplet_num",
                                simple_value=triplet_input.shape[0] // 3)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                val_sim_prob = np.zeros(
                    (val_feats.shape[0], val_feats.shape[0]),
                    dtype='float32') * np.nan
                val_comb = list(
                    itertools.combinations(range(val_feats.shape[0]), 2))
                for start, end in zip(
                        range(0, len(val_comb), cfg.batch_size),
                        range(cfg.batch_size,
                              len(val_comb) + cfg.batch_size, cfg.batch_size)):
                    end = min(end, len(val_comb))
                    comb_idx = []
                    for c in val_comb[start:end]:
                        comb_idx.extend([c[0], c[1], c[1]])
                    emb = sess.run(pddm_ap,
                                   feed_dict={
                                       input_ph: val_feats[comb_idx],
                                       dropout_ph: 1.0
                                   })
                    for i in range(emb.shape[0]):
                        val_sim_prob[val_comb[start + i][0],
                                     val_comb[start + i][1]] = emb[i]
                        val_sim_prob[val_comb[start + i][1],
                                     val_comb[start + i][0]] = emb[i]

                mAP_PDDM = 0.0
                count = 0
                for i in range(val_labels.shape[0]):
                    if val_labels[i] > 0:
                        temp_labels = np.delete(val_labels, i, 0)
                        temp = np.delete(val_sim_prob, i, 1)
                        mAP_PDDM += average_precision_score(
                            np.squeeze(temp_labels == val_labels[i, 0]),
                            np.squeeze(1 - temp[i]))
                        count += 1
                mAP_PDDM /= count

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Validation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation mAP_PDDM",
                                     simple_value=mAP_PDDM),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f\tmAP_PDDM: %.4f" %
                      (epoch + 1, mAP, mPrec, mAP_PDDM))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#18
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    tfrecords_files = glob.glob(cfg.tfrecords_root + '*.tfrecords')
    tfrecords_files = sorted(tfrecords_files)
    train_set = [
        f for f in tfrecords_files
        if os.path.basename(f).split('_')[0] in train_session
    ]
    print("Number of training events: %d" % len(train_set))

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model and get the embdding
        if cfg.network == "tsn":
            model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            seqlen_ph = tf.placeholder(tf.int32,
                                       shape=[None])  # fake, for consistency
            model.forward(input_ph)

        elif cfg.network == "lstm":
            model = networks.ConvLSTM(max_time=cfg.MAX_LENGTH_FRAMES,
                                      emb_dim=cfg.emb_dim)
            input_ph = tf.placeholder(
                tf.float32,
                shape=[None, cfg.MAX_LENGTH_FRAMES, None, None, None])
            seqlen_ph = tf.placeholder(tf.int32, shape=[None])
            model.forward(input_ph, seqlen_ph)

        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        triplet_loss = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = triplet_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        tf_paths_ph = tf.placeholder(tf.string, shape=[None])
        feat_dict = {'resnet': 98304}
        context_dict = {'label': 'int', 'length': 'int'}
        train_data = event_generator(tf_paths_ph,
                                     feat_dict,
                                     context_dict,
                                     event_per_batch=cfg.event_per_batch,
                                     num_threads=4,
                                     shuffled=True,
                                     preprocess_func=model.prepare_input_tf)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        val_lengths = []
        for session in val_set:
            eve_batch, lab_batch, bou_batch = load_data_and_label(
                session[0], session[1], model.prepare_input)
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_lengths.extend([b[1] - b[0] for b in bou_batch])
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        val_lengths = np.asarray(val_lengths, dtype='int32')
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.pretrained_model:
                print("Restoring pretrained model: %s" % cfg.pretrained_model)
                saver.restore(sess, cfg.pretrained_model)

            ################## Training loop ##################
            epoch = 0
            while epoch < cfg.max_epochs:
                step = sess.run(global_step, feed_dict=None)

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={tf_paths_ph: train_set})

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        start_time_select = time.time()

                        context, feature_lists = sess.run(next_train)
                        select_time = time.time() - start_time_select

                        eve = feature_lists[cfg.feat].reshape(
                            (-1, cfg.num_seg) + cfg.feat_dim[cfg.feat])
                        lab = context['label']
                        seq_len = context['length']

                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               seqlen_ph: seq_len[start:end]
                                           })
                            eve_embedding[start:end] = emb

                        # Second, sample triplets within sampled sessions
                        # return the triplet input indices
                        if cfg.triplet_select == 'random':
                            triplet_input = select_triplets_random(
                                eve, lab, cfg.triplet_per_batch)
                            negative_count = 0
                        elif cfg.triplet_select == 'facenet':
                            if epoch < cfg.negative_epochs:
                                triplet_input = select_triplets_random(
                                    eve, lab, cfg.triplet_per_batch)
                                negative_count = 0
                            else:
                                triplet_input_idx, negative_count = select_triplets_facenet(
                                    lab,
                                    eve_embedding,
                                    cfg.triplet_per_batch,
                                    cfg.alpha,
                                    metric=cfg.metric)
                        else:
                            raise NotImplementedError

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        if triplet_input_idx is not None:

                            triplet_input = eve[triplet_input_idx]
                            triplet_length = seq_len[triplet_input_idx]

                            start_time_train = time.time()
                            # perform training on the selected triplets
                            err, _, step, summ = sess.run(
                                [
                                    total_loss, train_op, global_step,
                                    summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    seqlen_ph: triplet_length,
                                    lr_ph: learning_rate
                                })

                            train_time = time.time() - start_time_train
                            print ("Epoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                    (epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err))

                            summary = tf.Summary(value=[
                                tf.Summary.Value(tag="train_loss",
                                                 simple_value=err),
                                tf.Summary.Value(tag="negative_count",
                                                 simple_value=negative_count),
                                tf.Summary.Value(tag="select_time1",
                                                 simple_value=select_time1)
                            ])
                            summary_writer.add_summary(summary, step)
                            summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 seqlen_ph: val_lengths
                                             })
                mAP, _ = utils.evaluate(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
def retrieve_replay_update(args,
                           model,
                           opt,
                           input_x,
                           input_y,
                           buffer,
                           task,
                           loader=None,
                           rehearse=True):
    """ finds buffer samples with maxium interference """
    '''
    ER - MIR and regular ER
    '''

    updated_inds = None
    if args.imprint:

        imprint(model, input_x, input_y, args.imprint > 1)

    hid = model.return_hidden(input_x)

    logits = model.linear(hid)
    if args.multiple_heads:
        logits = logits.masked_fill(loader.dataset.mask == 0, -1e9)

    opt.zero_grad()

    if not rehearse:
        loss_a = F.cross_entropy(logits, input_y, reduction='none')
        loss = (loss_a).sum() / loss_a.size(0)
        loss.backward()

        if args.friction:
            get_weight_accumelated_gradient_norm(model)
        if args.kl_far == -1:  #it was a dummy parameter to try normalizing the last layer weights
            weight_loss = get_weight_norm_diff(model, task=task, cl=5)
            weight_loss.backward()
        opt.step()

        return model

#####################################################################################################
    if args.method == 'mir_replay':
        bx, by, bt, subsample = buffer.sample(args.subsample,
                                              exclude_task=task,
                                              ret_ind=True)
        if args.cuda:
            bx = bx.to(args.device)
            by = by.to(args.device)
            bt = bt.to(args.device)
        grad_dims = []
        for param in model.parameters():
            grad_dims.append(param.data.numel())
        grad_vector = get_grad_vector(args, model.parameters, grad_dims)
        model_temp = get_future_step_parameters(model,
                                                grad_vector,
                                                grad_dims,
                                                lr=args.lr)

        with torch.no_grad():
            logits_track_pre = model(bx)
            buffer_hid = model_temp.return_hidden(bx)
            logits_track_post = model_temp.linear(buffer_hid)

            if args.multiple_heads:
                mask = torch.zeros_like(logits_track_post)
                mask.scatter_(1, loader.dataset.task_ids[bt], 1)
                assert mask.nelement() // mask.sum() == args.n_tasks
                logits_track_post = logits_track_post.masked_fill(
                    mask == 0, -1e9)
                logits_track_pre = logits_track_pre.masked_fill(
                    mask == 0, -1e9)

            pre_loss = F.cross_entropy(logits_track_pre, by, reduction="none")
            post_loss = F.cross_entropy(logits_track_post,
                                        by,
                                        reduction="none")
            scores = post_loss - pre_loss
            EN_logits = entropy_fn(logits_track_pre)
            if args.compare_to_old_logits:
                old_loss = F.cross_entropy(buffer.logits[subsample],
                                           by,
                                           reduction="none")

                updated_mask = pre_loss < old_loss
                updated_inds = updated_mask.data.nonzero().squeeze(1)
                scores = post_loss - torch.min(pre_loss, old_loss)

            all_logits = scores
            big_ind = all_logits.sort(
                descending=True)[1][:args.buffer_batch_size]

            idx = subsample[big_ind]

        mem_x, mem_y, logits_y, b_task_ids = bx[big_ind], by[
            big_ind], buffer.logits[idx], bt[big_ind]

        logits_buffer = model(mem_x)

        if args.mask:
            yy = torch.cat((input_y, mem_y), dim=0)
            mask = get_mask_unused_memories(yy, logits.size(1))
            logits = logits.masked_fill(mask, 1e-9)
            logits_buffer = logits_buffer.masked_fill(mask, 1e-9)

        F.cross_entropy(logits, input_y).backward()

        (args.multiplier * F.cross_entropy(logits_buffer, mem_y)).backward()
##############################Nearest Neighbours###############################################

#########################Nearest Neighbours and Making Updated Local#################################################
    if args.method == 'oth_cl_neib_replay_mid_fix':
        #subsampling
        bx, by, bt, subsample = buffer.sample(args.subsample,
                                              exclude_task=task,
                                              ret_ind=True)
        #get hidden representation
        model.eval()
        b_hidden = model.return_hidden(bx)
        input_hidden = model.return_hidden(input_x)
        model.train()
        # get nearby samples
        close_indices, all_dist = get_nearbysamples(args, input_x,
                                                    input_hidden, b_hidden, by)
        #Not neighbouring indices
        _, sorted_indices = torch.sort(all_dist / args.buffer_batch_size)
        not_neighnor_inds = [
            x for x in sorted_indices if x not in close_indices
        ]
        if args.far:  #fix the furthest points
            far_indices = torch.tensor(not_neighnor_inds[-len(close_indices):])
        else:  #fix the mid points
            far_indices = torch.tensor(
                not_neighnor_inds[int(len(not_neighnor_inds) / 2):min(
                    len(not_neighnor_inds),
                    int(len(not_neighnor_inds) / 2) + len(close_indices))])

        #get corresponding samples of neighbours and far samples
        close_indices = torch.tensor(close_indices)
        mem_x = bx[close_indices]
        mem_y = by[close_indices]
        logits_buffer = model(mem_x)
        far_indices = torch.tensor(far_indices)
        far_mem_x = bx[far_indices]
        if args.untilconvergence:
            utils.optimize(args, model, opt, input_x, input_y, mem_x, mem_y,
                           far_mem_x, task)
            return model
        else:
            utils.compute_lossgrad(args, model, input_x, input_y, mem_x, mem_y,
                                   far_mem_x)


####################################RAND########################################

    if args.method == 'rand_replay':

        mem_x, mem_y, bt = buffer.sample(args.buffer_batch_size,
                                         exclude_task=task)
        if args.untilconvergence:
            utils.rand_optimize(args, model, opt, input_x, input_y, mem_x,
                                mem_y, task)
            return model

        logits_buffer = model(mem_x)

        if args.mask:
            yy = torch.cat((input_y, mem_y), dim=0)
            mask = get_mask_unused_memories(yy, logits.size(1))
            logits = logits.masked_fill(mask, 1e-9)
            logits_buffer = logits_buffer.masked_fill(mask, 1e-9)

        F.cross_entropy(logits, input_y).backward()

        (args.multiplier * F.cross_entropy(logits_buffer, mem_y)).backward()

    if updated_inds is not None:
        buffer.logits[subsample[updated_inds]] = deepcopy(
            logits_track_pre[updated_inds])

    if args.friction:
        get_weight_accumelated_gradient_norm(model)
        weight_gradient_norm(model, args.friction)
    if args.kl_far == -1:
        weight_loss = get_weight_norm_diff(model, task=task, cl=5)
        weight_loss.backward()
    opt.step()

    return model
示例#20
0
def train():
    with tf.name_scope('input'):
        train_image_batch, train_label_batch = input_data.read_cifar10(
            data_dir=data_dir,
            is_train=True,
            batch_size=BATCH_SIZE,
            shuffle=True)
        test_image_batch, test_label_batch = input_data.read_cifar10(
            data_dir=data_dir,
            is_train=False,
            batch_size=BATCH_SIZE,
            shuffle=False)
    logits = models.VGG16(train_image_batch, N_CLASSES, IS_PRETRAIN)

    loss = utils.loss(logits, train_label_batch)
    accuracy = utils.accuracy(logits, train_label_batch)
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = utils.optimize(loss, learning_rate, my_global_step)

    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # load the parameter file, assign the parameters, skip the specific layers
    utils.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    train_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            train_images, train_labels = sess.run(
                [train_image_batch, train_label_batch])
            _, train_loss, train_acc = sess.run([train_op, loss, accuracy],
                                                feed_dict={
                                                    x: train_images,
                                                    y_: train_labels
                                                })
            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d, train_loss: %.4f, train_accuracy: %.4f%%' %
                      (step, train_loss, train_acc))
                summary_str = sess.run(summary_op)
                train_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                test_images, test_labels = sess.run(
                    [test_image_batch, test_label_batch])
                test_loss, test_acc = sess.run([loss, accuracy],
                                               feed_dict={
                                                   x: test_images,
                                                   y_: test_labels
                                               })
                print(
                    '**  Step: %d, test_loss: %.2f, test_accuracy: %.2f%%  **'
                    % (step, test_loss, test_acc))

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    tfrecords_files = glob.glob(cfg.tfrecords_root + '*.tfrecords')
    tfrecords_files = sorted(tfrecords_files)
    train_set = [
        f for f in tfrecords_files
        if os.path.basename(f).split('_')[0] in train_session
    ]
    print("Number of training events: %d" % len(train_set))

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model = networks.ConvTSNClassifier(n_seg=cfg.num_seg,
                                               output_keep_prob=cfg.keep_prob)

        # get prediction
        input_ph = tf.placeholder(tf.float32,
                                  shape=[None, cfg.num_seg, None, None, None])
        output_ph = tf.placeholder(tf.int32, shape=[None])
        model.forward(input_ph)
        embedding = tf.nn.l2_normalize(model.feat,
                                       axis=1,
                                       epsilon=1e-10,
                                       name='embedding')
        logits = model.logits
        pred = tf.argmax(logits, 1)

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=output_ph,
                                                           logits=logits))

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        tf_paths_ph = tf.placeholder(tf.string, shape=[None])
        train_data = event_generator(tf_paths_ph,
                                     cfg.feat_dict,
                                     cfg.context_dict,
                                     event_per_batch=cfg.event_per_batch,
                                     num_threads=1,
                                     shuffled=True,
                                     preprocess_func=model.prepare_input_tf)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model.prepare_input_test)
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.pretrained_model:
                print("Restoring pretrained model: %s" % cfg.pretrained_model)
                saver.restore(sess, cfg.pretrained_model)

            ################## Training loop ##################
            epoch = 0
            while epoch < cfg.max_epochs:
                step = sess.run(global_step, feed_dict=None)

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={tf_paths_ph: train_set})

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        start_time_select = time.time()

                        context, feature_lists = sess.run(next_train)
                        select_time = time.time() - start_time_select

                        eve = feature_lists[cfg.feat].reshape(
                            (-1, cfg.num_seg) + cfg.feat_dim[cfg.feat])
                        lab = context['label']

                        # perform training on the batch
                        start_time_train = time.time()
                        err, y_pred, _, step, summ = sess.run([
                            total_loss, pred, train_op, global_step, summary_op
                        ],
                                                              feed_dict={
                                                                  input_ph:
                                                                  eve,
                                                                  output_ph:
                                                                  lab,
                                                                  lr_ph:
                                                                  learning_rate
                                                              })

                        # classification accuracy on batch
                        acc = accuracy_score(lab, y_pred)

                        train_time = time.time() - start_time_train
                        print ("Epoch: [%d: %d]\tSelect_time: %.3f\tTrain_time: %.3f\tLoss: %.4f\tAcc: %.4f" % \
                                    (epoch+1, batch_count, select_time, train_time, err, acc))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="train_acc", simple_value=acc)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        epoch += 1

                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, val_pred, _ = sess.run(
                    [embedding, pred, set_emb],
                    feed_dict={input_ph: val_feats})
                acc = accuracy_score(val_labels, val_pred)
                mAP, _ = utils.evaluate(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation ACC", simple_value=acc)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # write summary and save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#22
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        # multitask loss (verification)
        model_ver = networks.PairSim2(n_input=cfg.emb_dim)
        #model_ver = networks.PairSim(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        # verification loss
        pos_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(positive, axis=1)],
            axis=1)
        pos_label = tf.ones((tf.shape(pos_pairs)[0], ), tf.int32)
        neg_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(negative, axis=1)],
            axis=1)
        neg_label = tf.zeros((tf.shape(neg_pairs)[0], ), tf.int32)

        ver_pairs = tf.concat([pos_pairs, neg_pairs], axis=0)
        ver_label = tf.concat([pos_label, neg_label], axis=0)

        model_ver.forward(ver_pairs, dropout_ph)
        logits = model_ver.logits
        pred = tf.argmax(logits, -1)

        ver_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ver_label,
                                                           logits=logits))

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + cfg.lambda_ver * ver_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = emb

                        # Second, sample triplets within sampled sessions
                        triplet_input, negative_count = select_triplets_facenet(
                            eve,
                            lab,
                            eve_embedding,
                            cfg.triplet_per_batch,
                            cfg.alpha,
                            metric=cfg.metric)

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        if triplet_input is not None:
                            start_time_train = time.time()
                            # perform training on the selected triplets
                            err, metric_err, ver_err, y_pred, _, step, summ = sess.run(
                                [
                                    total_loss, metric_loss, ver_loss, pred,
                                    train_op, global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })

                            train_time = time.time() - start_time_train

                            # calculate accuracy
                            batch_label = np.hstack(
                                (np.ones((triplet_input.shape[0] // 3, ),
                                         dtype='int32'),
                                 np.zeros((triplet_input.shape[0] // 3, ),
                                          dtype='int32')))
                            acc = accuracy_score(batch_label, y_pred)
                            print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                    (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err))

                            summary = tf.Summary(value=[
                                tf.Summary.Value(tag="train_loss",
                                                 simple_value=err),
                                tf.Summary.Value(tag="metric_loss",
                                                 simple_value=metric_err),
                                tf.Summary.Value(tag="ver_loss",
                                                 simple_value=ver_err),
                                tf.Summary.Value(tag="acc", simple_value=acc),
                                tf.Summary.Value(tag="negative_count",
                                                 simple_value=negative_count)
                            ])
                            summary_writer.add_summary(summary, step)
                            summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#23
0
def main():

    cfg = TrainConfig().parse()
    print (cfg.name)
    result_dir = os.path.join(cfg.result_root, 
            cfg.name+'_'+datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    images_root = '/mnt/work/CUB_200_2011/images/'
    with open('/mnt/work/CUB_200_2011/images.txt', 'r') as fin:
        image_files = fin.read().strip().split('\n')
    with open('/mnt/work/CUB_200_2011/image_class_labels.txt', 'r') as fin:
        labels = fin.read().strip().split('\n')

    train_files = []
    train_labels = []
    val_files = []
    val_labels = []
    for i in range(len(image_files)):
        label = int(labels[i].split(' ')[1])
        if label <= 100:
            train_files.append(images_root+image_files[i].split(' ')[1])
            train_labels.append(label)
        else:
            val_files.append(images_root+image_files[i].split(' ')[1])
            val_labels.append(label)

    class_idx_dict = {}
    for i, l in enumerate(train_labels):
        l = int(l)
        if l not in class_idx_dict:
            class_idx_dict[l] = [i]
        else:
            class_idx_dict[l].append(i)
    C = len(list(class_idx_dict.keys()))

    val_images = np.zeros((len(val_files), 256, 256, 3), dtype=np.uint8)
    for i in range(len(val_files)):
        img = Image.open(val_files[i]).convert('RGB').resize((256,256))
        val_images[i] = np.array(img)

    # generate metadata.tsv for visualize embedding
    with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
        for l in val_labels:
            fout.write('{}\n'.format(int(l)))


    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        model_emb = networks.CUBLayer(n_input=1024, n_output=cfg.emb_dim)

        # get the embedding
        input_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3])
        label_ph = tf.placeholder(tf.int32, shape=[None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])

        pool5 = networks.Inception_V2(input_ph)
        model_emb.forward(pool5, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.logits, axis=-1, epsilon=1e-10)
        else:
            embedding = model_emb.logits

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # use tensorflow implementation...
        if cfg.loss == 'triplet':
            metric_loss = metric_loss_ops.triplet_semihard_loss(
                          labels=label_ph,
                          embeddings=embedding,
                          margin=cfg.alpha)
        elif cfg.loss == 'lifted':
            metric_loss = metric_loss_ops.lifted_struct_loss(
                          labels=label_ph,
                          embeddings=embedding,
                          margin=cfg.alpha)
        elif cfg.loss == 'mylifted':
            metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss(all_dist, label_ph, cfg.alpha, weighted=False)

        else:
            raise NotImplementedError

        regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            ################## Training loop ##################
            for epoch in range(cfg.max_epochs):

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))


                # sample images
                class_in_batch = set()
                idx_batch = np.array([], dtype=np.int32)
                while len(idx_batch) < cfg.batch_size:
                    sampled_class = np.random.choice(list(class_idx_dict.keys()))
                    if not sampled_class in class_in_batch:
                        class_in_batch.add(sampled_class)
                        subsample_size = np.random.choice(range(5, 11))
                        subsample = np.random.permutation(class_idx_dict[sampled_class])[:subsample_size]
                        idx_batch = np.append(idx_batch, subsample)
                idx_batch = idx_batch[:cfg.batch_size]

                image_batch = np.zeros((len(idx_batch), 256, 256, 3), dtype=np.uint8)
                lab_batch = np.zeros((len(idx_batch), ), dtype=np.int32)
                for i, idx in enumerate(idx_batch):
                    # load image with random flipping
                    if np.random.rand() < 0.5:
                        img = Image.open(train_files[idx]).convert('RGB').resize((256,256)).transpose(Image.FLIP_LEFT_RIGHT)
                    else:
                        img = Image.open(train_files[idx]).convert('RGB').resize((256,256))
                    image_batch[i] = np.array(img)
                    lab_batch[i] = train_labels[idx]

                pdb.set_trace()
                # perform training on the selected triplets
                err, _, step, summ = sess.run([total_loss, train_op, global_step, summary_op],
                                feed_dict = {input_ph: image_batch,
                                            label_ph: lab_batch,
                                            dropout_ph: cfg.keep_prob,
                                            lr_ph: learning_rate})

                print ("%s\tEpoch: %d\tImages num: %d\tLoss %.4f" % \
                        (cfg.name, epoch+1, feat_batch.shape[0], err))

                summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=err),
                        tf.Summary.Value(tag="images_num", simple_value=feat_batch.shape[0])])
                summary_writer.add_summary(summary, step)
                summary_writer.add_summary(summ, step)

                # validation on val_set
                if (epoch+1) % 1000 == 0:
                    val_embeddings, _ = sess.run([embedding,set_emb], feed_dict={input_ph: val_images, label_ph:val_labels, dropout_ph: 1.0})
                    mAP, mPrec, recall = utils.evaluate_simple(val_embeddings, val_labels)
                    summary = tf.Summary(value=[tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                                        tf.Summary.Value(tag="Validation Recall@1", simple_value=recall),
                                        tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec)])
                    print ("Epoch: [%d]\tmAP: %.4f\trecall: %.4f" % (epoch+1,mAP,recall))

                    # config for embedding visualization
                    config = projector.ProjectorConfig()
                    visual_embedding = config.embeddings.add()
                    visual_embedding.tensor_name = emb_var.name
                    visual_embedding.metadata_path = os.path.join(result_dir, 'metadata_val.tsv')
                    projector.visualize_embeddings(summary_writer, config)

                    summary_writer.add_summary(summary, step)


                    # save model
                    saver.save(sess, os.path.join(result_dir, cfg.name+'.ckpt'), global_step=step)
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')
        label_ph = tf.placeholder(tf.int32, shape=[None], name="label")

        ####################### Load models here ########################

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            sensors_emb_dim = 32
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("hallucination_sensors"):
            # load backbone model
            if cfg.network == "convtsn":
                hal_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg,
                                                   emb_dim=sensors_emb_dim)
            elif cfg.network == "convrtsn":
                hal_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg,
                                                    emb_dim=sensors_emb_dim)
            else:
                raise NotImplementedError

            hal_emb_sensors.forward(input_ph,
                                    dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_segment"):
            segment_emb_dim = 32
            model_emb_segment = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=segment_emb_dim,
                                              n_input=357)

            input_segment_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 357])
            model_emb_segment.forward(input_segment_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_segment"):
                    var_list[v.op.name.replace("modality_segment/", "")] = v
            restore_saver_segment = tf.train.Saver(var_list)

        with tf.variable_scope("hallucination_segment"):
            # load backbone model
            if cfg.network == "convtsn":
                hal_emb_segment = networks.ConvTSN(n_seg=cfg.num_seg,
                                                   emb_dim=segment_emb_dim)
            elif cfg.network == "convrtsn":
                hal_emb_segment = networks.ConvRTSN(n_seg=cfg.num_seg,
                                                    emb_dim=segment_emb_dim)
            else:
                raise NotImplementedError

            hal_emb_segment.forward(input_ph,
                                    dropout_ph)  # for lstm has variable scope

        ############################# Forward Pass #############################

        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
            embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
            embedding_hal_sensors = tf.nn.l2_normalize(hal_emb_sensors.hidden,
                                                       axis=-1,
                                                       epsilon=1e-10)
            embedding_segment = tf.nn.l2_normalize(model_emb_segment.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
            embedding_hal_segment = tf.nn.l2_normalize(hal_emb_segment.hidden,
                                                       axis=-1,
                                                       epsilon=1e-10)
        else:
            embedding = model_emb.hidden
            embedding_sensors = model_emb_sensors.hidden
            embedding_hal_sensors = hal_emb_sensors.hidden
            embedding_segment = model_emb_segment.hidden
            embedding_hal_segment = hal_emb_segment.hidden

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # a fusion embedding
        embedding_fused = tf.concat(
            (embedding, embedding_hal_sensors, embedding_hal_segment), axis=1)

        ############################# Calculate loss #############################

        # Use tensorflow implementation for loss functions
        if cfg.loss == 'triplet':
            metric_loss1, active_count = loss_tf.triplet_semihard_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
            metric_loss2, _ = loss_tf.triplet_semihard_loss(
                labels=label_ph,
                embeddings=embedding_sensors,
                margin=cfg.alpha)
            metric_loss3, _ = loss_tf.triplet_semihard_loss(
                labels=label_ph,
                embeddings=embedding_hal_sensors,
                margin=cfg.alpha)
            metric_loss4, _ = loss_tf.triplet_semihard_loss(
                labels=label_ph,
                embeddings=embedding_segment,
                margin=cfg.alpha)
            metric_loss5, _ = loss_tf.triplet_semihard_loss(
                labels=label_ph,
                embeddings=embedding_hal_segment,
                margin=cfg.alpha)
            metric_loss6, _ = loss_tf.triplet_semihard_loss(
                labels=label_ph, embeddings=embedding_fused, margin=cfg.alpha)
            metric_loss = metric_loss1 + metric_loss2 + metric_loss3 + metric_loss4 + metric_loss5 + metric_loss6


#        elif cfg.loss == 'lifted':
#            metric_loss, active_count = loss_tf.lifted_struct_loss(
#                          labels=label_ph,
#                          embeddings=embedding,
#                          margin=cfg.alpha)
        else:
            raise NotImplementedError

        # hallucination loss (regression loss)
        hal_loss_sensors = tf.nn.l2_loss(embedding_sensors -
                                         embedding_hal_sensors)
        hal_loss_segment = tf.nn.l2_loss(embedding_segment -
                                         embedding_hal_segment)
        hal_loss = hal_loss_sensors + hal_loss_segment

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        # use lambda_multimodal for hal_loss
        total_loss = metric_loss + cfg.lambda_multimodal * hal_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        feat3_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            feat3_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input,
                model_emb_segment.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_feats3 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(
                session[1], session[-1], model_emb_sensors.prepare_input_test)
            val_feats2.append(eve2_batch)

            eve3_batch, _, _ = load_data_and_label(
                session[2], session[-1], model_emb_segment.prepare_input_test)
            val_feats3.append(eve3_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_feats3 = np.concatenate(val_feats3, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=10)

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            print("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)
            print("Restoring segment model: %s" % cfg.segment_path)
            restore_saver_segment.restore(sess, cfg.segment_path)

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                feat3_paths = [[p[2] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             feat3_paths_ph: feat3_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, eve_segment, lab, batch_sess = sess.run(
                            next_train)
                        # for memory concern, cfg.event_per_batch events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            eve_sensors = eve_sensors[idx]
                            eve_segment = eve_segment[idx]
                            batch_sess = batch_sess[idx]
                            lab = lab[idx]
                        load_time = time.time() - start_time

                        ##################### Start training  ########################

                        err, metric_err, hal_err, _, step, summ = sess.run(
                            [
                                total_loss, metric_loss, hal_loss, train_op,
                                global_step, summary_op
                            ],
                            feed_dict={
                                input_ph: eve,
                                input_sensors_ph: eve_sensors,
                                input_segment_ph: eve_segment,
                                label_ph: np.squeeze(lab),
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tMetric Loss %.4f\tHal Loss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, metric_err, hal_err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(tag="metric_loss",
                                             simple_value=metric_err),
                            tf.Summary.Value(tag="hallucination_loss",
                                             simple_value=hal_err)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, hal_err, _ = sess.run(
                    [embedding, hal_loss, set_emb],
                    feed_dict={
                        input_ph: val_feats,
                        input_sensors_ph: val_feats2,
                        input_segment_ph: val_feats3,
                        dropout_ph: 1.0
                    })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec),
                    tf.Summary.Value(tag="Validation hal loss",
                                     simple_value=hal_err)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#25
0
def main():

    # Load configurations and write to config.txt
    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        label_ph = tf.placeholder(tf.int32, shape=[None], name="label")
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Define model here ########################

        # Load embedding model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # Use tensorflow implementation for loss functions
        if cfg.loss == 'triplet':
            metric_loss, active_count = loss_tf.triplet_semihard_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        elif cfg.loss == 'lifted':
            metric_loss, active_count = loss_tf.lifted_struct_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        else:
            raise NotImplementedError

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        ####################### Define data loader ############################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # Prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=10)

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Get a batch
                        start_time_select = time.time()

                        eve, se, lab = sess.run(next_train)
                        # for memory concern, cfg.event_per_batch events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            se = se[idx]
                            lab = lab[idx]

                        select_time = time.time() - start_time_select

                        start_time_train = time.time()

                        # perform training on the batch
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: eve,
                                label_ph: np.squeeze(lab),
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec, recall = utils.evaluate_simple(
                    val_embeddings, val_labels)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation Recall@1",
                                     simple_value=recall),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#26
0
    is_verbose=is_verbose)

# lstm = LSTM(batch_size, embedding_size, vocab_size, hidden_size, max_size)

x = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="x")
label = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="label")
teacher_forcing = tf.placeholder(tf.bool, (), name="teacher_forcing")

output, softmax_output = lstm(x, label, vocab_size, hidden_size, max_size,
                              batch_size, embedding_size, teacher_forcing)

with tf.Session() as sess:
    onehot = tf.argmax(softmax_output, 1)

with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
    optimizer, loss = optimize(output, label, learning_rate)
    # perplexity = tf.pow(2, loss)
    tf.summary.scalar('loss', loss)
"""Now let's execute the graph in the session.

We ge a data batch with `dataloader.get_batch(batch_size)`. This fetches a batch of word sequences.

We then need to transform that into a batch of word index. We can achieve this with the helper function
`word_to_index_transform(word_to_index, word_batch)` defined before.

furthermore, we need to seperate the batch into the input batch and the target batch.
We will do that by separating the `max_size - 1` first index of the sequences into the input sequences and
assign the `max_size - 1` last tokens into the target sequences.
"""
nthreads_intra = args.nthreads // 2
nthreads_inter = args.nthreads - args.nthreads // 2
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    if cfg.task == "supervised":  # fully supervised task
        train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch
    labeled_session = train_session[:cfg.label_num]

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################
        sensors_emb_dim = 32
        segment_emb_dim = 32

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)
            model_pairsim_sensors = networks.PDDM(n_input=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("modality_segment"):
            model_emb_segment = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=segment_emb_dim,
                                              n_input=357)
            model_pairsim_segment = networks.PDDM(n_input=segment_emb_dim)

            input_segment_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 357])
            model_emb_segment.forward(input_segment_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_segment"):
                    var_list[v.op.name.replace("modality_segment/", "")] = v
            restore_saver_segment = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # get the number of multimodal triplets (x3)
        mul_num_ph = tf.placeholder(tf.int32, shape=[])
        margins_ph = tf.placeholder(tf.float32, shape=[None])
        struct_num = tf.shape(margins_ph)[0] * 3

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding[:(tf.shape(embedding)[0] - mul_num_ph)],
                       [-1, 3, cfg.emb_dim]), 3, 1)
        anchor_hard, positive_hard, negative_hard = tf.unstack(
            tf.reshape(embedding[-mul_num_ph:-struct_num],
                       [-1, 3, cfg.emb_dim]), 3, 1)
        anchor_struct, positive_struct, negative_struct = tf.unstack(
            tf.reshape(embedding[-struct_num:], [-1, 3, cfg.emb_dim]), 3, 1)

        # Sensors branch
        emb_sensors = model_emb_sensors.hidden
        A_sensors, B_sensors, C_sensors = tf.unstack(
            tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1)
        model_pairsim_sensors.forward(tf.stack([A_sensors, B_sensors], axis=1))
        pddm_AB_sensors = model_pairsim_sensors.prob[:, 1]
        model_pairsim_sensors.forward(tf.stack([A_sensors, C_sensors], axis=1))
        pddm_AC_sensors = model_pairsim_sensors.prob[:, 1]

        # Segment branch
        emb_segment = model_emb_segment.hidden
        A_segment, B_segment, C_segment = tf.unstack(
            tf.reshape(emb_segment, [-1, 3, segment_emb_dim]), 3, 1)
        model_pairsim_segment.forward(tf.stack([A_segment, B_segment], axis=1))
        pddm_AB_segment = model_pairsim_segment.prob[:, 1]
        model_pairsim_segment.forward(tf.stack([A_segment, C_segment], axis=1))
        pddm_AC_segment = model_pairsim_segment.prob[:, 1]

        # fuse prob from all modalities
        prob_AB = 0.5 * (pddm_AB_sensors + pddm_AB_segment)
        prob_AC = 0.5 * (pddm_AC_sensors + pddm_AC_segment)

        ############################# Calculate loss #############################

        # triplet loss for labeled inputs
        metric_loss1 = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        # weighted triplet loss for multimodal inputs
        #        if cfg.weighted:
        #            metric_loss2, _ = networks.weighted_triplet_loss(anchor_hard, positive_hard, negative_hard, prob_AB, prob_AC, cfg.alpha)
        #        else:

        # triplet loss for hard examples from multimodal data
        metric_loss2 = networks.triplet_loss(anchor_hard, positive_hard,
                                             negative_hard, cfg.alpha)

        # margin-based triplet loss for structure mining from multimodal data
        metric_loss3 = networks.triplet_loss(anchor_struct, positive_struct,
                                             negative_struct, margins_ph)

        # whether to apply joint optimization
        if cfg.no_joint:
            unimodal_var_list = [
                v for v in tf.global_variables()
                if v.op.name.startswith("modality_core")
            ]
            train_var_list = unimodal_var_list
        else:
            multimodal_var_list = [
                v for v in tf.global_variables()
                if not (v.op.name.startswith("modality_sensors/RTSN")
                        or v.op.name.startswith("modality_segment/RTSN"))
            ]
            train_var_list = multimodal_var_list

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = tf.cond(
            tf.greater(mul_num_ph, 0), lambda: tf.cond(
                tf.equal(mul_num_ph,
                         tf.shape(embedding)[0]), lambda:
                (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal +
                regularization_loss * cfg.lambda_l2, lambda: metric_loss1 +
                (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal +
                regularization_loss * cfg.lambda_l2),
            lambda: metric_loss1 + regularization_loss * cfg.lambda_l2)

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, train_var_list)

        saver = tf.train.Saver(max_to_keep=10)
        summary_op = tf.summary.merge_all(
        )  # not logging histogram of variables because it will cause problem when only unimodal_train_op is called

        summ_prob_AB = tf.summary.histogram('Prob_AB_histogram', prob_AB)
        summ_prob_AC = tf.summary.histogram('Prob_AC_histogram', prob_AC)
        #        summ_weights = tf.summary.histogram('Weights_histogram', weights)

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        feat3_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            feat3_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input,
                model_emb_segment.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_feats3 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(
                session[1], session[-1], model_emb_sensors.prepare_input_test)
            val_feats2.append(eve2_batch)

            eve3_batch, _, _ = load_data_and_label(
                session[2], session[-1], model_emb_segment.prepare_input_test)
            val_feats3.append(eve3_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_feats3 = np.concatenate(val_feats3, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            print("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)
            print("Restoring segment model: %s" % cfg.segment_path)
            restore_saver_segment.restore(sess, cfg.segment_path)

            ################## Training loop ##################

            # Initialize pairwise embedding distance for each class on validation set
            val_embeddings, _ = sess.run([embedding, set_emb],
                                         feed_dict={
                                             input_ph: val_feats,
                                             dropout_ph: 1.0
                                         })
            dist_dict = {}
            for i in range(np.max(val_labels) + 1):
                temp_emb = val_embeddings[np.where(val_labels == i)[0]]
                dist_dict[i] = [
                    np.mean(
                        utils.cdist(utils.all_diffs(temp_emb, temp_emb),
                                    metric=cfg.metric))
                ]

            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                feat3_paths = [[p[2] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             feat3_paths_ph: feat3_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, eve_segment, lab, batch_sess = sess.run(
                            next_train)

                        # for memory concern, 1000 events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            eve_sensors = eve_sensors[idx]
                            eve_segment = eve_segment[idx]
                            lab = lab[idx]
                            batch_sess = batch_sess[idx]
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = np.copy(emb)

                        # sample triplets within sampled sessions
                        all_diff = utils.all_diffs(eve_embedding,
                                                   eve_embedding)
                        triplet_selected, active_count = utils.select_triplets_facenet(
                            lab, utils.cdist(all_diff, metric=cfg.metric),
                            cfg.triplet_per_batch, cfg.alpha)

                        hard_count = 0
                        struct_count = 0
                        if epoch >= cfg.multimodal_epochs:
                            # Get the similarity of all events
                            sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                                dtype='float32') * np.nan
                            comb = list(
                                itertools.combinations(range(eve.shape[0]), 2))
                            for start, end in zip(
                                    range(0, len(comb), cfg.batch_size),
                                    range(cfg.batch_size,
                                          len(comb) + cfg.batch_size,
                                          cfg.batch_size)):
                                end = min(end, len(comb))
                                comb_idx = []
                                for c in comb[start:end]:
                                    comb_idx.extend([c[0], c[1], c[1]])
                                sim = sess.run(prob_AB,
                                               feed_dict={
                                                   input_sensors_ph:
                                                   eve_sensors[comb_idx],
                                                   input_segment_ph:
                                                   eve_segment[comb_idx],
                                                   dropout_ph:
                                                   1.0
                                               })
                                for i in range(sim.shape[0]):
                                    sim_prob[comb[start + i][0],
                                             comb[start + i][1]] = sim[i]
                                    sim_prob[comb[start + i][1],
                                             comb[start + i][0]] = sim[i]

                            # sample triplets from similarity prediction
                            # maximum number not exceed the cfg.triplet_per_batch

                            triplet_input_idx, margins, triplet_count, hard_count, struct_count = select_triplets_mul(
                                triplet_selected, lab, sim_prob, dist_dict,
                                cfg.triplet_per_batch, 3, 0.8, 0.2)

                            # add up all multimodal triplets
                            multimodal_count = hard_count + struct_count

                            sensors_input = eve_sensors[
                                triplet_input_idx[-(3 * multimodal_count):]]
                            segment_input = eve_segment[
                                triplet_input_idx[-(3 * multimodal_count):]]

                        print(triplet_count, hard_count, struct_count)
                        triplet_input = eve[triplet_input_idx]

                        select_time = time.time() - start_time

                        if len(triplet_input.shape) > 5:  # debugging
                            pdb.set_trace()

                        ##################### Start training  ########################

                        # supervised initialization
                        if multimodal_count == 0:
                            if triplet_count == 0:
                                continue
                            err, metric_err1, _, step, summ = sess.run(
                                [
                                    total_loss, metric_loss1, train_op,
                                    global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    mul_num_ph: 0,
                                    lr_ph: learning_rate
                                })
                            metric_err2 = 0
                            metric_err3 = 0
                        else:
                            err, metric_err1, metric_err2, metric_err3, _, step, summ, s_AB, s_AC = sess.run(
                                [
                                    total_loss, metric_loss1, metric_loss2,
                                    metric_loss3, train_op, global_step,
                                    summary_op, summ_prob_AB, summ_prob_AC
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    input_segment_ph: segment_input,
                                    mul_num_ph: multimodal_count * 3,
                                    margins_ph: margins,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })
                            summary_writer.add_summary(s_AB, step)
                            summary_writer.add_summary(s_AC, step)


                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_count+multimodal_count, load_time, select_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(tag="triplet_count",
                                             simple_value=triplet_count),
                            tf.Summary.Value(tag="hard_count",
                                             simple_value=hard_count),
                            tf.Summary.Value(tag="struct_count",
                                             simple_value=struct_count),
                            tf.Summary.Value(tag="metric_loss1",
                                             simple_value=metric_err1),
                            tf.Summary.Value(tag="metric_loss3",
                                             simple_value=metric_err3),
                            tf.Summary.Value(tag="metric_loss2",
                                             simple_value=metric_err2)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec, recall = utils.evaluate_simple(
                    val_embeddings, val_labels)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation Recall@1",
                                     simple_value=recall),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # update dist_dict
                if (epoch + 1) == 50 or (epoch + 1) % 200 == 0:
                    for i in dist_dict.keys():
                        temp_emb = val_embeddings[np.where(val_labels == i)[0]]
                        dist_dict[i].append(
                            np.mean(
                                utils.cdist(utils.all_diffs(
                                    temp_emb, temp_emb),
                                            metric=cfg.metric)))

                    pickle.dump(
                        dist_dict,
                        open(os.path.join(result_dir, 'dist_dict.pkl'), 'wb'))

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
z_posterior = posterior_mvn.sample()
seg = model.Fcomb(unet_seg, z_posterior, 'posterior')

# ================================================================
#                LOSS
# ================================================================
dice_coef = utils.dice_coef(y_true, unet_seg)

loss, kl, ce = utils.elbo(y_true, seg, prior_mvn, posterior_mvn, 2)

lambda_ = 1e-6
l2_norms = [tf.nn.l2_loss(v) for v in tf.trainable_variables()]
l2_norm = tf.reduce_sum(l2_norms)
reg_loss = loss + lambda_ * l2_norm

optimizer = utils.optimize(reg_loss)

# ================================================================
#               SAVER
# ================================================================

tf.add_to_collection('saved_variables', value=images)
tf.add_to_collection('saved_variables', value=y_true)

tf.add_to_collection('saved_variables', value=unet_seg)
tf.add_to_collection('saved_variables', value=posterior_mvn)
tf.add_to_collection('saved_variables', value=z_posterior)
tf.add_to_collection('saved_variables', value=seg)
tf.add_to_collection('saved_variables', value=prior_mvn)

tf.add_to_collection('saved_variables', value=reg_loss)
示例#29
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)

        # get the embedding
        input_ph = tf.placeholder(tf.float32,
                                  shape=[None, cfg.num_seg, None, None, None])
        label_ph = tf.placeholder(tf.float32, shape=[None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss(
            all_dist, label_ph, cfg.alpha)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('active_ratio', num_active)
        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # First, sample sessions for a batch
                        start_time_select = time.time()
                        eve, se, lab = sess.run(next_train)
                        select_time1 = time.time() - start_time_select

                        # Second, select samples for a batch
                        batch_idx = select_batch(lab, cfg.batch_size)
                        eve = eve[batch_idx]
                        lab = lab[batch_idx]

                        # Third, perform training on a batch
                        start_time_train = time.time()
                        err, _, step, summ, diff_v, weights_v, fp_v, cn_v, dist_v = sess.run(
                            [
                                total_loss, train_op, global_step, summary_op,
                                diff, weights, fp, cn, all_dist
                            ],
                            feed_dict={
                                input_ph: eve,
                                dropout_ph: cfg.keep_prob,
                                label_ph: np.squeeze(lab),
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train
                        print ("Epoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="select_time1",
                                             simple_value=select_time1)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
示例#30
0
y_pred_cls = tf.argmax(y_pred, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

session = tf.Session()
session.run(tf.global_variables_initializer())

utils.print_test_accuracy(session=session,
                          x=x,
                          y_true=y_true,
                          y_pred_cls=y_pred_cls,
                          show_example_errors=False,
                          show_confusion_matrix=False)
utils.optimize(session=session,
               x=x,
               y_true=y_true,
               num_iterations=1,
               accuracy=accuracy,
               optimizer=optimizer)
utils.print_test_accuracy(session=session,
                          x=x,
                          y_true=y_true,
                          y_pred_cls=y_pred_cls)
utils.optimize(session=session,
               x=x,
               y_true=y_true,
               num_iterations=99,
               accuracy=accuracy,
               optimizer=optimizer)  # We already performed 1 iteration above.
utils.print_test_accuracy(session=session,
                          x=x,
                          y_true=y_true,