예제 #1
0
def main():
    """Entry point."""
    set_logging_verbosity(3)

    placeholders = {
        "image":
        tf.placeholder(tf.float32,
                       shape=(None, NUM_CHANNELS, *IMAGE_SHAPE),
                       name="image")
    }

    pretrain_autoencoder(placeholders)
예제 #2
0
                     ("Hidden dimensions for RNN hidden vectors."))
flags.DEFINE_integer(
    "output_hidden_dim", 128,
    ("Hidden dimensions for output hidden vectors before softmax layer."))
flags.DEFINE_float("word_dropout_keep_prob", 0.9,
                   ("Dropout keep rate for word embeddings."))
flags.DEFINE_float(
    "recurrent_dropout_keep_prob", 0.6,
    ("Dropout keep rate for recurrent input and output vectors."))
flags.DEFINE_float("output_dropout_keep_prob", 0.5,
                   ("Dropout keep rate for output vectors."))

FLAGS = flags.FLAGS
opts = FLAGS.__flags  # dict TODO: make class?

set_logging_verbosity(FLAGS.logging_verbosity)


def _get_n_batches(batch_size, corpus_size):
    return int(corpus_size // batch_size)


def set_initial_ops():
    local_init_op = tf.local_variables_initializer()
    global_init_op = tf.global_variables_initializer()
    init_op = tf.group(local_init_op, global_init_op)
    return init_op


def set_train_op(loss, tvars):
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
예제 #3
0
def main():
    """."""
    set_logging_verbosity(3)

    net = PSPNet34(SEMSEG_SHAPE, FLAGS.logs_dir, SEMSEG_NUM_CLASSES, FLAGS.learning_rate)

    with net.graph.as_default():

        global_step = tf.Variable(0, name='global_step', trainable=False)
        best_loss = tf.Variable(1e7, name='best_loss', trainable=False, dtype=tf.float32)
        best_iou = tf.Variable(0, name='best_iou', trainable=False, dtype=tf.float32)
        best_px_acc = tf.Variable(0, name='best_px_acc', trainable=False, dtype=tf.float32)

        # Get shortcuts to all placeholders as well as the computation graph
        image = net.placeholders["image"]
        keep_prob = net.placeholders["keep_probability"]
        annotation = net.placeholders["annotation"]
        is_training = net.placeholders["is_training"]

        # Add summary ops for tensorboard
        rgb_image = tf.reverse(image, axis=[3])
        tf.summary.image("input_image", rgb_image, max_outputs=4)
        tf.summary.image("ground_truth", tf.cast(tf.multiply(annotation, 255), tf.uint8), max_outputs=4)

        loss = tf.reduce_mean(
                (tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=net.logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy")))

        #loss = utils.semseg_loss(annotation, logits)
        tf.summary.scalar("entropy", loss)

        # Create expression for running mean IoU and pixel accuracy
        pred_flatten = tf.reshape(net.predict, [-1,])
        label_flatten = tf.reshape(annotation, [-1,])

        mask = tf.not_equal(label_flatten, IGNORE_LABEL)
        indices = tf.squeeze(tf.where(mask), 1)
        gt = tf.cast(tf.gather(label_flatten, indices), tf.int32)
        pred = tf.gather(pred_flatten, indices)

        mIoU, miou_update_op = tf.metrics.mean_iou(
            predictions=pred, labels=gt, num_classes=SEMSEG_NUM_CLASSES, name="mIoU")
        px_acc, px_acc_update_op = tf.metrics.accuracy(predictions=pred, labels=gt, name="px_acc")

        # Group the update ops for both metrics for convenience
        metrics_update_op = tf.group(miou_update_op, px_acc_update_op)

        # Make an operation to reset running variables for mIoU and pixel accuracy so
        # we can reset mIoU for each validation
        running_miou_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="mIoU")
        running_px_acc_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="px_acc")

        running_metrics_init = tf.group(tf.variables_initializer(var_list=running_miou_vars),
                                        tf.variables_initializer(var_list=running_px_acc_vars))

        # Create the training operation, summary operation, and session
        trainable_var = tf.trainable_variables()
        train_op = net.add_train_ops(loss, trainable_var, global_step)

        logging.info("Setting up summary op...")
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, net.sess.graph)

        path_to_train_tfrecord = os.path.join(FLAGS.path_to_records, "{}_train.tfrecords".format(FLAGS.records_prefix))
        path_to_val_tfrecord = os.path.join(FLAGS.path_to_records, "{}_val.tfrecords".format(FLAGS.records_prefix))

        image_batch, label_batch = build_tfrecords_batch(
            path_to_train_tfrecord,
            image_shape=SEMSEG_SHAPE,
            batch_size=FLAGS.batch_size,
            max_iterations=MAX_ITERATIONS,
            min_after_dequeue=FLAGS.min_after_dequeue)

        valid_image_batch, valid_label_batch = build_tfrecords_batch(
            path_to_val_tfrecord,
            image_shape=SEMSEG_SHAPE,
            batch_size=FLAGS.batch_size,
            max_iterations=MAX_ITERATIONS,
            min_after_dequeue=FLAGS.min_after_dequeue)

        # Re-run global variable initializer for the training ops just added and tfrecords
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        net.sess.run(init_op)

        # We need to reload the model since we just wiped all variables by reiniting
        net.load_network_weights(net.network_weights_loc)

        if FLAGS.mode == "train":
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=net.sess, coord=coord)

            # Do the training
            for i in range(MAX_ITERATIONS):
                itr = tf.train.global_step(net.sess, global_step)

                train_images, train_annotations = net.sess.run([image_batch, label_batch])

                feed_dict = {image: train_images, annotation: train_annotations,
                             keep_prob: 0.85, is_training: True}

                net.sess.run(train_op, feed_dict=feed_dict)

                if itr % 10 == 0 and itr > 0:
                    train_loss, summary_str = net.sess.run([loss, summary_op], feed_dict=feed_dict)
                    logging.info("Step: %d, Train_loss:%g" % (itr, train_loss))
                    summary_writer.add_summary(summary_str, itr)

                if itr % 500 == 0 and itr > 0:
                    num_batches_per_epoch = (FLAGS.num_samples_per_val // FLAGS.batch_size) + 1
                    val_losses = []

                    # Reset mIoU and pixel accuracy running vars so we get a fresh evaluation
                    # for each validation
                    net.sess.run(running_metrics_init)

                    for i in range(num_batches_per_epoch):

                        val_images, val_annotations = net.sess.run([valid_image_batch, valid_label_batch])
                        val_feed_dict = {image: val_images,
                                         annotation: val_annotations,
                                         keep_prob: 1.0, is_training: False}

                        val_loss, _ = net.sess.run([loss, metrics_update_op], feed_dict=val_feed_dict)
                        val_losses.append(val_loss)

                    avg_val_loss = np.mean(val_losses)
                    best_loss_val = net.sess.run(best_loss)

                    # Get current and best mean IoUs and pixel accuracies
                    val_iou, best_iou_val = net.sess.run([mIoU, best_iou])
                    val_px_acc, best_px_acc_val = net.sess.run([px_acc, best_px_acc])

                    logging.info(
                        "{} ---> Validation_loss: {} (best: {}), Validation_IoU: {} (best: {}), "
                        "Validation pixel accuracy: {} (best: {})".format(
                                    datetime.datetime.now(),
                                    avg_val_loss, best_loss_val,
                                    val_iou, best_iou_val,
                                    val_px_acc, best_px_acc_val))

                    if avg_val_loss < best_loss_val:
                        net.sess.run(best_loss.assign(avg_val_loss))
                        net.sess.run(best_iou.assign(val_iou))
                        net.sess.run(best_px_acc.assign(val_px_acc))
                        logging.info(
                            "New best loss: {} with iou: {} and px acc: {} at step {}".format(
                                avg_val_loss, val_iou, val_px_acc, itr))

                        net.saver.save(
                            net.sess, os.path.join(FLAGS.logs_dir, "best_model.ckpt"),
                            global_step=itr)

                if itr >= MAX_ITERATIONS:
                    break

            coord.request_stop()
            coord.join(threads)
예제 #4
0
def main():

    set_logging_verbosity(3)

    net = ICNet(SEMSEG_SHAPE, FLAGS.logs_dir, SEMSEG_NUM_CLASSES,
                FLAGS.learning_rate)

    with net.graph.as_default():

        global_step = tf.Variable(0, name='global_step', trainable=False)
        best_loss = tf.Variable(1e7,
                                name='best_loss',
                                trainable=False,
                                dtype=tf.float32)
        best_iou = tf.Variable(0,
                               name='best_iou',
                               trainable=False,
                               dtype=tf.float32)
        best_px_acc = tf.Variable(0,
                                  name='best_px_acc',
                                  trainable=False,
                                  dtype=tf.float32)

        # Get shortcuts to all placeholders as well as the computation graph
        image = net.placeholders["image"]
        keep_prob = net.placeholders["keep_probability"]
        annotation = net.placeholders["annotation"]
        is_training = net.placeholders["is_training"]

        # Add summary ops for tensorboard
        rgb_image = tf.reverse(image, axis=[3])
        tf.summary.image("input_image", rgb_image, max_outputs=4)
        tf.summary.image("ground_truth",
                         tf.cast(tf.multiply(annotation, 255), tf.uint8),
                         max_outputs=4)

        # Create cascade label guidence based loss expression
        lambda1, lambda2, lambda3 = 0.16, 0.4, 1.0
        sub_4_loss = create_loss(net.low_res_logits, annotation,
                                 SEMSEG_NUM_CLASSES, IGNORE_LABEL)
        sub_24_loss = create_loss(net.med_res_logits, annotation,
                                  SEMSEG_NUM_CLASSES, IGNORE_LABEL)
        sub_124_loss = create_loss(net.high_res_logits, annotation,
                                   SEMSEG_NUM_CLASSES, IGNORE_LABEL)
        loss = lambda1 * sub_4_loss + lambda2 * sub_24_loss + lambda3 * sub_124_loss
        tf.summary.scalar("entropy", loss)

        # Create expression for running mean IoU
        pred_flatten = tf.reshape(net.predict, [
            -1,
        ])
        label_flatten = tf.reshape(annotation, [
            -1,
        ])

        mask = tf.not_equal(label_flatten, IGNORE_LABEL)
        indices = tf.squeeze(tf.where(mask), 1)
        gt = tf.cast(tf.gather(label_flatten, indices), tf.int32)
        pred = tf.gather(pred_flatten, indices)

        mIoU, miou_update_op = tf.metrics.mean_iou(
            predictions=pred,
            labels=gt,
            num_classes=SEMSEG_NUM_CLASSES,
            name="mIoU")
        px_acc, px_acc_update_op = tf.metrics.accuracy(predictions=pred,
                                                       labels=gt,
                                                       name="px_acc")

        # Group the update ops for both metrics for convenience
        metrics_update_op = tf.group(miou_update_op, px_acc_update_op)

        # Make an operation to reset running variables for mIoU and pixel accuracy so
        # we can reset mIoU for each validation
        running_miou_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                              scope="mIoU")
        running_px_acc_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                                scope="px_acc")

        running_metrics_init = tf.group(
            tf.variables_initializer(var_list=running_miou_vars),
            tf.variables_initializer(var_list=running_px_acc_vars))

        # Create the training and summary operations
        trainable_var = tf.trainable_variables()
        train_op = net.add_train_ops(loss, trainable_var, global_step)

        logging.info("Setting up summary op...")
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, net.sess.graph)

        # We assume a single records for train, val, test, but the
        # build_tfrecords_batch method can accept arbitrary paths
        path_to_train_tfrecord = os.path.join(
            FLAGS.path_to_records,
            "{}_train.tfrecords".format(FLAGS.records_prefix))
        path_to_val_tfrecord = os.path.join(
            FLAGS.path_to_records,
            "{}_val.tfrecords".format(FLAGS.records_prefix))
        path_to_test_tfrecord = os.path.join(
            FLAGS.path_to_records,
            "{}_test.tfrecords".format(FLAGS.records_prefix))

        if FLAGS.mode in ["train", "visualize"]:
            # Create data generators for train and validation
            image_batch, label_batch = build_tfrecords_batch(
                path_to_train_tfrecord,
                input_shape=SEMSEG_SHAPE,
                batch_size=FLAGS.batch_size,
                min_after_dequeue=FLAGS.min_after_dequeue,
                max_iterations=MAX_ITERATIONS)
            val_image_batch, val_label_batch = build_tfrecords_batch(
                path_to_val_tfrecord,
                input_shape=SEMSEG_SHAPE,
                batch_size=FLAGS.batch_size,
                min_after_dequeue=FLAGS.min_after_dequeue,
                max_iterations=MAX_ITERATIONS)

        # Re-run global variable initializer for the training ops just added and tfrecords
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        net.sess.run(init_op)

        # We need to reload the model since we just wiped all variables by reiniting
        net.load_network_weights(net.network_weights_loc)

        if FLAGS.mode == "train":
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=net.sess, coord=coord)

            # Begin training
            for i in range(MAX_ITERATIONS):
                itr = tf.train.global_step(net.sess, global_step)

                train_images, train_annotations = net.sess.run(
                    [image_batch, label_batch])

                feed_dict = {
                    image: train_images,
                    annotation: train_annotations,
                    keep_prob: 0.85,
                    is_training: True
                }

                net.sess.run(train_op, feed_dict=feed_dict)

                if itr % 10 == 0 and itr > 0:
                    train_loss, summary_str = net.sess.run([loss, summary_op],
                                                           feed_dict=feed_dict)

                    logging.info("Step: %d, Train_loss:%g" % (itr, train_loss))
                    summary_writer.add_summary(summary_str, itr)

                if itr % 500 == 0 and itr > 0:
                    num_batches_per_epoch = (FLAGS.num_samples_per_val //
                                             FLAGS.batch_size) + 1
                    val_losses = []

                    # Reset mIoU running vars so we get a fresh evaluation for each validation
                    net.sess.run(running_metrics_init)

                    for i in range(num_batches_per_epoch):

                        val_images, val_annotations = net.sess.run(
                            [val_image_batch, val_label_batch])
                        val_feed_dict = {
                            image: val_images,
                            annotation: val_annotations,
                            keep_prob: 1.0,
                            is_training: False
                        }

                        val_loss, _ = net.sess.run([loss, metrics_update_op],
                                                   feed_dict=val_feed_dict)
                        val_losses.append(val_loss)

                    avg_val_loss = np.mean(val_losses)
                    best_loss_val = net.sess.run(best_loss)

                    # Get current and best mean IoUs and pixel accuracies
                    val_iou, best_iou_val = net.sess.run([mIoU, best_iou])
                    val_px_acc, best_px_acc_val = net.sess.run(
                        [px_acc, best_px_acc])

                    logging.info(
                        "{} ---> Validation_loss: {} (best: {}), Validation_IoU: {} (best: {}), "
                        "Validation pixel accuracy: {} (best: {})".format(
                            datetime.datetime.now(), avg_val_loss,
                            best_loss_val, val_iou, best_iou_val, val_px_acc,
                            best_px_acc_val))

                    # If this is the best we've done on validation, update checkpoint
                    if avg_val_loss < best_loss_val:
                        net.sess.run(best_loss.assign(avg_val_loss))
                        net.sess.run(best_iou.assign(val_iou))
                        net.sess.run(best_px_acc.assign(val_px_acc))
                        logging.info(
                            "New best loss: {} with iou: {} and px acc: {} at step {}"
                            .format(avg_val_loss, val_iou, val_px_acc, itr))

                        net.saver.save(net.sess,
                                       os.path.join(FLAGS.logs_dir,
                                                    "best_model.ckpt"),
                                       global_step=itr)

                if itr >= MAX_ITERATIONS:
                    break

            coord.request_stop()
            coord.join(threads)

        elif FLAGS.mode == "test":
            if FLAGS.dataset == "ade20k":
                # We don't have ade20k test data, so test on validation set
                test_generator = read_and_decode_semseg_tfrecord(
                    path_to_val_tfrecord)
            else:
                test_generator = read_and_decode_semseg_tfrecord(
                    path_to_test_tfrecord)

            all_losses = []
            for X, Y in tqdm(test_generator, desc="Testing"):
                X, Y = np.expand_dims(X, axis=0), np.expand_dims(Y, axis=0)

                feed_dict = {
                    image: X,
                    annotation: Y,
                    keep_prob: 1.0,
                    is_training: False
                }
                test_loss, _ = net.sess.run([loss, update_op],
                                            feed_dict=feed_dict)

                all_losses.append(test_loss)

            test_iou = net.sess.run(mIoU)
            logging.info("Test Loss: {}, mIoU: {}".format(
                np.mean(all_losses), test_iou))
예제 #5
0
def main():
    """Entry point."""
    set_logging_verbosity(3)

    # Make placeholder and get all hooks into the graph we'll need
    placeholders = {
        "image":
        tf.placeholder(tf.float32,
                       shape=(None, NUM_CHANNELS, *IMAGE_SHAPE),
                       name="image"),
        "target":
        tf.placeholder(tf.float32, shape=(None, k), name="target")
    }

    logging.info("Building autoencoder")
    # Define the autoencoder graph, and define a saver and session
    inp, encode_fn, decode_fn = build_autoencoder_graph(placeholders)
    saver, sess = tf.train.Saver(), tf.Session()

    # Get full X and y for MNIST dataset and encode X into latent space Z
    X, y = load_mnist()
    Z = encode_samples(X, placeholders["image"], encode_fn, sess, saver)

    # Perform KMeans clustering on Z to initialize the weights of centroids
    # used in the cluster layer (i.e. the layer which computes Student's T distribution)
    logging.info("Initializing cluster centers with k-means.")
    kmeans = KMeans(n_clusters=k, n_init=20)
    y_pred = kmeans.fit_predict(Z)
    y_pred_last = np.copy(y_pred)

    logging.info(
        "Adding clustering layer and initializing with kmeans centroids")
    cluster = cluster_layer(encode_fn, k, weights=kmeans.cluster_centers_)

    # Create loss and train ops for IDEC; set gamma=1 and comment out Lr to reduce the objective to DEC
    with tf.name_scope("loss"):
        cross_entropy = -tf.reduce_sum(
            placeholders["target"] * tf.log(cluster))
        entropy = -tf.reduce_sum(
            placeholders["target"] * tf.log(placeholders["target"] + 0.00001))
        Lc = kl_divergence = cross_entropy - entropy

        Lr = tf.losses.mean_squared_error(inp, decode_fn)

        loss = Lr + gamma * Lc

    # Add optimization ops to the graph, then reinitialize all trainable
    # variables and restore AE weights (since we reinitialized)
    trainable_var = tf.trainable_variables()
    train_op = add_train_ops(loss, trainable_var)
    load_autoencoder_weights(sess, saver)

    train_loss, index = 0, 0
    for itr in range(int(MAX_TRAIN_ITERATIONS)):
        if itr % UPDATE_P_EVERY_N_EPOCHS == 0:
            # Recompute q and p
            q = sess.run(cluster, feed_dict={placeholders["image"]: X})
            p = target_distribution(q)

            y_pred = q.argmax(1)
            if y is not None:
                acc = np.round(cluster_accuracy(y, y_pred), 5)
                nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred),
                               5)
                ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
                train_loss = np.round(train_loss, 5)
                logging.info("Itr={}, Acc={}, NMI={}, ARI={}, LOSS={}".format(
                    itr, acc, nmi, ari, train_loss / Nm))

            # check stop criterion
            delta_label = np.sum(y_pred != y_pred_last).astype(
                np.float32) / y_pred.shape[0]
            y_pred_last = y_pred

            if itr > 0 and delta_label < tol:
                logging.info(
                    "Reached tolerance threshold (delta: {} < tol: {}). "
                    "Stopping training.".format(delta_label, tol))
                break

        # train on one batch at a time
        if (index + 1) * Nm > X.shape[0]:
            feed_dict = {
                placeholders["image"]: X[index * Nm::],
                placeholders["target"]: p[index * Nm::]
            }

            sess.run(train_op, feed_dict=feed_dict)
            train_loss = sess.run(loss, feed_dict=feed_dict)
            index = 0
        else:
            feed_dict = {
                placeholders["image"]: X[index * Nm:(index + 1) * Nm],
                placeholders["target"]: p[index * Nm:(index + 1) * Nm]
            }

            sess.run(train_op, feed_dict=feed_dict)
            train_loss = sess.run(loss, feed_dict=feed_dict)
            index += 1

        if itr % TRAIN_SAVE_EVERY_N_EPOCHS == 0 and itr > 0:
            logging.info("Saving model at epoch: {}".format(itr))
            saver.save(sess, os.path.join(IDEC_LOGDIR, "model.ckpt"), itr)