def _preproc_image_batch(self, batch_size, num_threads=1):
        '''
        This function is only used for queue input pipeline. It reads a filename
        from the filename queue, decodes the image, pushes it through a pre-processing
        function and then uses tf.train.batch to generate batches.

        :param batch_size: int, batch size
        :param num_threads: int, number of input threads (default=1)
        :return: tf.Tensor, batch of pre-processed input images
        '''

        if ("resnet_v2" in self._network_name) and (self._preproc_func_name is None):
            raise ValueError("When using ResNet, please perform the pre-processing "
                            "function manually. See here for details: " 
                            "https://github.com/tensorflow/models/tree/master/slim")

        # Read image file from disk and decode JPEG
        reader = tf.WholeFileReader()
        image_filename, image_raw = reader.read(self._filename_queue)
        image = tf.image.decode_jpeg(image_raw, channels=3)
        # Image preprocessing
        preproc_func_name = self._network_name if self._preproc_func_name is None else self._preproc_func_name
        image_preproc_fn = preprocessing_factory.get_preprocessing(preproc_func_name, is_training=False)
        image_preproc = image_preproc_fn(image, self.image_size, self.image_size)
        # Read a batch of preprocessing images from queue
        image_batch = tf.train.batch(
            [image_preproc, image_filename], batch_size, num_threads=num_threads,
            allow_smaller_final_batch=True)
        return image_batch
Пример #2
0
def imagenet_input(is_training):
  """Data reader for imagenet.

  Reads in imagenet data and performs pre-processing on the images.

  Args:
     is_training: bool specifying if train or validation dataset is needed.
  Returns:
     A batch of images and labels.
  """
  if is_training:
    dataset = dataset_factory.get_dataset('imagenet', 'train',
                                          FLAGS.dataset_dir)
  else:
    dataset = dataset_factory.get_dataset('imagenet', 'validation',
                                          FLAGS.dataset_dir)

  provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      shuffle=is_training,
      common_queue_capacity=2 * FLAGS.batch_size,
      common_queue_min=FLAGS.batch_size)
  [image, label] = provider.get(['image', 'label'])

  image_preprocessing_fn = preprocessing_factory.get_preprocessing(
      'mobilenet_v1', is_training=is_training)

  image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)

  images, labels = tf.train.batch(
      tensors=[image, label],
      batch_size=FLAGS.batch_size,
      num_threads=4,
      capacity=5 * FLAGS.batch_size)
  return images, labels
Пример #3
0
def main():
  with tf.Graph().as_default():
    if not dataset_dir:
      raise ValueError('You must supply the dataset directory with --dataset_dir')

    deploy_config = model_deploy.DeploymentConfig(
        num_clones=num_clones,
        clone_on_cpu=clone_on_cpu,
        replica_id=task,
        num_replicas=worker_replicas,
        num_ps_tasks=num_ps_tasks)

    dataset = dataset_factory.get_dataset(
        dataset_name, dataset_split_name, dataset_dir)

    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    with tf.device(deploy_config.inputs_device()):
      with tf.name_scope('inputs'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=num_readers,
            common_queue_capacity=20 * batch_size,
            common_queue_min=10 * batch_size)
        [image, label, fp] = provider.get(['image', 'label', 'filepath'])
        label -= labels_offset

    train_image_size = 224

    image = image_preprocessing_fn(image, train_image_size,
                                   train_image_size)

    images, labels, fps = tf.train.batch(
        [image, label, fp],
        batch_size=batch_size,
        num_threads=num_preprocessing_threads,
        capacity=5 * batch_size)
    tf.image_summary('image', images, max_images=5)
    labels = slim.one_hot_encoding(
        labels, dataset.num_classes - labels_offset)
    batch_queue = slim.prefetch_queue.prefetch_queue(
        [images, labels, fps], capacity=2 * deploy_config.num_clones)

    images, labels, fps = batch_queue.dequeue()

    sess = tf.Session()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess, coord)

    image_data, label_data, fp_data = sess.run([images, labels, fps])

    coord.request_stop()
    coord.join(threads)
    sess.close()
    return image_data, label_data, fp_data
Пример #4
0
def get_style_features(FLAGS):
    """
    For the "style_image", the preprocessing step is:
    1. Resize the shorter side to FLAGS.image_size
    2. Apply central crop
    """
    with tf.Graph().as_default():
        network_fn = nets_factory.get_network_fn(
            FLAGS.loss_model,
            num_classes=1,
            is_training=False)
        image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
            FLAGS.loss_model,
            is_training=False)

        # Get the style image data
        size = FLAGS.image_size
        img_bytes = tf.read_file(FLAGS.style_image)
        if FLAGS.style_image.lower().endswith('png'):
            image = tf.image.decode_png(img_bytes)
        else:
            image = tf.image.decode_jpeg(img_bytes)
        # image = _aspect_preserving_resize(image, size)

        # Add the batch dimension
        images = tf.expand_dims(image_preprocessing_fn(image, size, size), 0)
        # images = tf.stack([image_preprocessing_fn(image, size, size)])

        _, endpoints_dict = network_fn(images, spatial_squeeze=False)
        features = []
        for layer in FLAGS.style_layers:
            feature = endpoints_dict[layer]
            feature = tf.squeeze(gram(feature), [0])  # remove the batch dimension
            features.append(feature)

        with tf.Session() as sess:
            # Restore variables for loss network.
            init_func = utils._get_init_fn(FLAGS)
            init_func(sess)

            # Make sure the 'generated' directory is exists.
            if os.path.exists('generated') is False:
                os.makedirs('generated')
            # Indicate cropped style image path
            save_file = 'generated/target_style_' + FLAGS.naming + '.jpg'
            # Write preprocessed style image to indicated path
            with open(save_file, 'wb') as f:
                target_image = image_unprocessing_fn(images[0, :])
                value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8))
                f.write(sess.run(value))
                tf.logging.info('Target style pattern is saved to: %s.' % save_file)

            # Return the features those layers are use for measuring style loss.
            return sess.run(features)
Пример #5
0
def main(_):

    # Get image's height and width.
    height = 0
    width = 0
    with open(FLAGS.image_file, 'rb') as img:
        with tf.Session().as_default() as sess:
            if FLAGS.image_file.lower().endswith('png'):
                image = sess.run(tf.image.decode_png(img.read()))
            else:
                image = sess.run(tf.image.decode_jpeg(img.read()))
            height = image.shape[0]
            width = image.shape[1]
    tf.logging.info('Image size: %dx%d' % (width, height))

    with tf.Graph().as_default():
        with tf.Session().as_default() as sess:

            # Read image data.
            image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn)

            # Add batch dimension
            image = tf.expand_dims(image, 0)

            generated = model.net(image, training=False)
            generated = tf.cast(generated, tf.uint8)

            # Remove batch dimension
            generated = tf.squeeze(generated, [0])

            # Restore model variables.
            saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1)
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            # Use absolute path
            FLAGS.model_file = os.path.abspath(FLAGS.model_file)
            saver.restore(sess, FLAGS.model_file)

            # Make sure 'generated' directory exists.
            generated_file = 'generated/res.jpg'
            if os.path.exists('generated') is False:
                os.makedirs('generated')

            # Generate and write image data to file.
            with open(generated_file, 'wb') as img:
                start_time = time.time()
                img.write(sess.run(tf.image.encode_jpeg(generated)))
                end_time = time.time()
                tf.logging.info('Elapsed time: %fs' % (end_time - start_time))

                tf.logging.info('Done. Please check %s.' % generated_file)
Пример #6
0
def get_data(dataset,
             model_name,
             batch_size = 32,
             shuffle_config = None,
             shuffle=None,
             is_training=True,
             height=0,
             width=0):
    """return input data for Model input
    Args:
        dataset: a slim Dataset object.
        model_name: specify Network.
        shuffle_config: a namedtuple to control shuffle queue.
         fields: {queue_capacity, num_batching_threads, min_after_dequeue}.
        shuffle: control data provider whether shuffle.
        is_training: if Ture preprocess image for train.
        width: excepted resized width
        height: excepted resized height
    """
    if not shuffle_config:
        shuffle_config = DEFAULT_SHUFFLE_CONFIG
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=shuffle,
        common_queue_capacity = 2 * batch_size,
        common_queue_min = batch_size
    )
    [image_orig, label] = provider.get(['image', 'label'])
    tf.summary.image('image_org', tf.expand_dims(image_orig, 0))
    tf.summary.scalar('label_orig', label)
    preprocessing_fn = preprocessing_factory.get_preprocessing(model_name)
    image = preprocessing_fn(image_orig,
                          width,
                          height,
                          is_training)
    label_one_shot = slim.one_hot_encoding(label, dataset.num_classes)
    images, labels, labels_one_hot = (tf.train.shuffle_batch(
        tensors=[image, label, label_one_shot],
        batch_size = batch_size,
        capacity=shuffle_config.queue_capacity,
        num_threads=shuffle_config.num_batching_threads,
        min_after_dequeue=shuffle_config.min_after_dequeue))

    return InputEndpoints(
        images=images,
        labels=labels,
        labels_one_hot=labels_one_hot)
Пример #7
0
def main(_):
    height = 0
    width = 0
    with open(FLAGS.image_file, 'rb') as img:
        with tf.Session().as_default() as sess:
            if FLAGS.image_file.lower().endswith('png'):
                image = sess.run(tf.image.decode_png(img.read()))
            else:
                image = sess.run(tf.image.decode_jpeg(img.read()))
            height = image.shape[0]
            width = image.shape[1]
    tf.logging.info('Image size: %dx%d' % (width, height))

    with tf.Graph().as_default():
        with tf.Session().as_default() as sess:
            image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            """获取经过预处理的输入图片,用于后面获取图片的content"""
            image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn)
            image = tf.expand_dims(image, 0)
            generated = model.transform_network(image, training=False)
            generated = tf.squeeze(generated, [0])
            saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1)
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

            """获取已训练好的model"""
            FLAGS.model_file = os.path.abspath(FLAGS.model_file)
            saver.restore(sess, FLAGS.model_file)

            """生成转换style后的image"""
            start_time = time.time()
            generated = sess.run(generated)
            generated = tf.cast(generated, tf.uint8)
            end_time = time.time()
            tf.logging.info('Elapsed time: %fs' % (end_time - start_time))

            generated_file = FLAGS.target_file
            if os.path.exists('static/img/generated') is False:
                os.makedirs('static/img/generated')
            with open(generated_file, 'wb') as img:
                img.write(sess.run(tf.image.encode_jpeg(generated)))
                tf.logging.info('Done. Please check %s.' % generated_file)
Пример #8
0
def get_style_features(FLAGS):
    """
    对于风格图片,预处理步骤:
    1. Resize the shorter side to FLAGS.image_size
    2. Apply central crop
    """
    with tf.Graph().as_default():
        network_fn = nets_factory.get_network_fn(
            FLAGS.loss_model,
            num_classes=1,
            is_training=False)
        image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
            FLAGS.loss_model,
            is_training=False)

        size = FLAGS.image_size
        img_bytes = tf.read_file(FLAGS.style_image)
        if FLAGS.style_image.lower().endswith('png'):
            image = tf.image.decode_png(img_bytes)
        else:
            image = tf.image.decode_jpeg(img_bytes)
        # image = _aspect_preserving_resize(image, size)
        images = tf.stack([image_preprocessing_fn(image, size, size)])
        _, endpoints_dict = network_fn(images, spatial_squeeze=False)
        features = []
        for layer in FLAGS.style_layers:
            feature = endpoints_dict[layer]
            feature = tf.squeeze(gram(feature), [0])  # remove the batch dimension
            features.append(feature)

        with tf.Session() as sess:
            init_func = utils._get_init_fn(FLAGS)
            init_func(sess)
            if os.path.exists('static/img/generated') is False:
                os.makedirs('static/img/generated')
            save_file = 'static/img/generated/target_style_' + FLAGS.naming + '.jpg'
            with open(save_file, 'wb') as f:
                target_image = image_unprocessing_fn(images[0, :])
                value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8))
                f.write(sess.run(value))
                tf.logging.info('Target style pattern is saved to: %s.' % save_file)
            return sess.run(features)
Пример #9
0
def style_transform(style, model_file, img_file, result_file):
    height = 0
    width = 0
    with open(img_file, 'rb') as img:
        with tf.Session().as_default() as sess:
            if img_file.lower().endswith('png'):
                image = sess.run(tf.image.decode_png(img.read()))
            else:
                image = sess.run(tf.image.decode_jpeg(img.read()))
            height = image.shape[0]
            width = image.shape[1]
    print('Image size: %dx%d' % (width, height))

    with tf.Graph().as_default():
        with tf.Session().as_default() as sess:
            image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            image = reader.get_image(img_file, height, width, image_preprocessing_fn)
            image = tf.expand_dims(image, 0)
            generated = model.transform_network(image, training=False)
            generated = tf.squeeze(generated, [0])
            saver = tf.train.Saver(tf.global_variables())
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            FLAGS.model_file = os.path.abspath(model_file)
            saver.restore(sess, FLAGS.model_file)

            start_time = time.time()
            generated = sess.run(generated)
            generated = tf.cast(generated, tf.uint8)
            end_time = time.time()
            print('Elapsed time: %fs' % (end_time - start_time))
            generated_file = 'static/img/generated/' + result_file
            if os.path.exists('static/img/generated') is False:
                os.makedirs('static/img/generated')
            with open(generated_file, 'wb') as img:
                img.write(sess.run(tf.image.encode_jpeg(generated)))
                print('Done. Please check %s.' % generated_file)
Пример #10
0
def main(_):
    height = 0
    width = 0
    with open(FLAGS.image_file, 'rb') as img:
        with tf.Session().as_default() as sess:
            if FLAGS.image_file.lower().endswith('png'):
                image = sess.run(tf.image.decode_png(img.read()))
            else:
                image = sess.run(tf.image.decode_jpeg(img.read()))
            height = image.shape[0]
            width = image.shape[1]
    tf.logging.info('Image size: %dx%d' % (width, height))

    with tf.Graph().as_default():
        with tf.Session().as_default() as sess:
            image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn)
            image = tf.expand_dims(image, 0)
            generated = model.net(image, training=False)
            generated = tf.squeeze(generated, [0])
            saver = tf.train.Saver(tf.all_variables())
            sess.run([tf.initialize_all_variables(), tf.initialize_local_variables()])
            FLAGS.model_file = os.path.abspath(FLAGS.model_file)
            saver.restore(sess, FLAGS.model_file)

            start_time = time.time()
            generated = sess.run(generated)
            generated = tf.cast(generated, tf.uint8)
            end_time = time.time()
            tf.logging.info('Elapsed time: %fs' % (end_time - start_time))
            generated_file = 'generated/res.jpg'
            if os.path.exists('generated') is False:
                os.makedirs('generated')
            with open(generated_file, 'wb') as img:
                img.write(sess.run(tf.image.encode_jpeg(generated)))
                tf.logging.info('Done. Please check %s.' % generated_file)
Пример #11
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():

        # Create global_step.
        # with tf.device('/gpu:0'):
        global_step = slim.create_global_step()
        # ckpt = tf.train.get_checkpoint_state(os.path.dirname('./logs/checkpoint'))
        #os.path.dirname('./logs/')
        ckpt_filename = os.path.dirname(
            './logs/') + '/mobilenet_v1_1.0_224.ckpt'
        sess = tf.InteractiveSession()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        dataset_kitti = dataset_factory.get_dataset('kitti',
                                                    FLAGS.dataset_split_name,
                                                    FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(
            num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size,
                shuffle=True)
        [image, shape, glabels, gbboxes
         ] = provider.get(['image', 'shape', 'object/label', 'object/bbox'])

        image, glabels, gbboxes = \
            image_preprocessing_fn(image, glabels, gbboxes, out_shape = ssd_shape, data_format = DATA_FORMAT)

        gclasses, glocalisations, gscores = \
            ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
        batch_shape = [1] + [len(ssd_anchors)] * 3

        r = tf.train.batch(tf_utils.reshape_list(
            [image, gclasses, glocalisations, gscores]),
                           batch_size=FLAGS.batch_size,
                           num_threads=FLAGS.num_preprocessing_threads,
                           capacity=5 * FLAGS.batch_size)

        b_image, b_gclasses, b_glocalisations, b_gscores = \
            tf_utils.reshape_list(r, batch_shape)

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        summaries.add(tf.summary.image("imgs", tf.cast(b_image, tf.float32)))

        f_i = 0
        for gt_map in b_gscores:
            gt_features = tf.reduce_max(gt_map, axis=3)
            gt_features = tf.expand_dims(gt_features, -1)
            summaries.add(
                tf.summary.image("gt_map_%d" % f_i,
                                 tf.cast(gt_features, tf.float32)))
            f_i += 1
            # for festures in gt_list:
            #     summaries.add(tf.summary.image("gt_map_%d" % f_i, tf.cast(festures, tf.float32)))
            #     f_i += 1

        arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
                                      data_format=DATA_FORMAT)
        with slim.arg_scope(arg_scope):
            predictions, localisations, logits, end_points = \
                ssd_net.net(b_image, is_training=True)

        f_i = 0
        for predict_map in predictions:
            predict_map = predict_map[:, :, :, :, 1:]
            predict_map = tf.reduce_max(predict_map, axis=4)
            predict_map = tf.reduce_max(predict_map, axis=3)
            predict_map = tf.expand_dims(predict_map, -1)
            summaries.add(
                tf.summary.image("predicte_map_%d" % f_i,
                                 tf.cast(predict_map, tf.float32)))
            f_i += 1

        ssd_net.losses(logits,
                       localisations,
                       b_gclasses,
                       b_glocalisations,
                       b_gscores,
                       0,
                       match_threshold=FLAGS.match_threshold,
                       negative_ratio=FLAGS.negative_ratio,
                       alpha=FLAGS.loss_alpha,
                       label_smoothing=FLAGS.label_smoothing)

        # with tf.name_scope('kitti' + '_data_provider'):
        #     provider_k = slim.dataset_data_provider.DatasetDataProvider(
        #         dataset_kitti,
        #         num_readers = FLAGS.num_readers,
        #         common_queue_capacity = 20 * FLAGS.batch_size,
        #         common_queue_min = 10 * FLAGS.batch_size,
        #         shuffle = True
        #     )
        # [image_k, shape_k, glabels_k, gbboxes_k] = provider_k.get(['image', 'shape', 'object/label', 'object/bbox'])
        #
        # image_preprocessing_fn_k = preprocessing_factory.get_preprocessing('kitti', is_training=True)
        # image_k, glabels_k, gbboxes_k = \
        #     image_preprocessing_fn_k(image_k, glabels_k, gbboxes_k, out_shape = ssd_shape, data_format = DATA_FORMAT)
        #
        # gclasses_k, glocalisations_k, gscores_k = \
        #     ssd_net.bboxes_encode(glabels_k, gbboxes_k, ssd_anchors)
        # #batch_shape = [1] + [len(ssd_anchors)] * 3
        #
        # r_k = tf.train.batch(
        #     tf_utils.reshape_list([image_k, gclasses_k, glocalisations_k, gscores_k]),
        #     batch_size=FLAGS.batch_size,
        #     num_threads=FLAGS.num_preprocessing_threads,
        #     capacity= 5 * FLAGS.batch_size
        # )
        #
        # b_image_k, b_gclasses_k, b_glocalisations_k, b_gscores_k = \
        #     tf_utils.reshape_list(r_k, batch_shape)
        #
        # summaries.add(tf.summary.image("k_imgs", tf.cast(b_image_k, tf.float32)))
        #
        # f_i = 0
        # for gt_map in b_gscores_k:
        #     gt_features = tf.reduce_max(gt_map, axis=3)
        #     gt_features = tf.expand_dims(gt_features, -1)
        #     summaries.add(tf.summary.image("k_gt_map_%d" % f_i, tf.cast(gt_features, tf.float32)))
        #     f_i += 1
        #     # for festures in gt_list:
        #     #     summaries.add(tf.summary.image("gt_map_%d" % f_i, tf.cast(festures, tf.float32)))
        #     #     f_i += 1
        #
        # arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT)
        # with slim.arg_scope(arg_scope):
        #     predictions_k, localisations_k, logits_k, end_points_k = \
        #         ssd_net.net(b_image_k, is_training=True, reuse=True)
        #
        # f_i = 0
        # for predict_map in predictions_k:
        #     predict_map = predict_map[:, :, :, :, 1:]
        #     predict_map = tf.reduce_max(predict_map, axis=4)
        #     predict_map = tf.reduce_max(predict_map, axis=3)
        #     predict_map = tf.expand_dims(predict_map, -1)
        #     summaries.add(tf.summary.image("k_predicte_map_%d" % f_i, tf.cast(predict_map, tf.float32)))
        #     f_i += 1
        #
        # ssd_net.losses(logits_k, localisations_k, b_gclasses_k, b_glocalisations_k, b_gscores_k, 2,
        #                match_threshold=FLAGS.match_threshold,
        #                negative_ratio=FLAGS.negative_ratio,
        #                alpha=FLAGS.loss_alpha,
        #                label_smoothing=FLAGS.label_smoothing)

        #total_loss = slim.losses.get_total_loss()
        total_loss = tf.losses.get_total_loss()
        summaries.add(tf.summary.scalar('loss', total_loss))

        for loss in tf.get_collection(tf.GraphKeys.LOSSES):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        # for variable in slim.get_model_variables():
        #     summaries.add(tf.summary.histogram(variable.op.name, variable))
        for variable in tf.trainable_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        learning_rate = tf_utils.configure_learning_rate(
            FLAGS, dataset.num_samples, global_step)
        optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
        # optimizer = tf.train.AdamOptimizer(learning_rate, beta1=FLAGS.adam_beta1,
        #                                    beta2=FLAGS.adam_beta2, epsilon=FLAGS.opt_epsilon)
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            train_op = slim.learning.create_train_op(total_loss,
                                                     optimizer,
                                                     summarize_gradients=False)

        summary_op = tf.summary.merge(list(summaries), name='summary_op')
        train_writer = tf.summary.FileWriter('./logs/', sess.graph)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)

        #variables_to_exclude = slim.get_variables_by_suffix("Adam")

        variables_to_restore = slim.get_variables_to_restore(
            exclude=["MobilenetV1/Logits", "MobilenetV1/Box", "global_step"])

        restorer = tf.train.Saver(variables_to_restore)

        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        sess.run(tf.global_variables_initializer())
        restorer.restore(sess, ckpt_filename)

        # if ckpt and ckpt.model_checkpoint_path:
        #     saver.restore(sess, ckpt.model_checkpoint_path)

        i = 0
        with slim.queues.QueueRunners(sess):

            while (i < FLAGS.max_number_of_steps):
                _, summary_str = sess.run([train_op, summary_op])
                if i % 50 == 0:
                    global_step_str = global_step.eval()
                    print('%diteraton' % (global_step_str))
                    train_writer.add_summary(summary_str, global_step_str)
                if i % 100 == 0:
                    global_step_str = global_step.eval()
                    saver.save(sess, "./logs/", global_step=global_step_str)

                i += 1
Пример #12
0
                    help="Directory containing the dataset")
parser.add_argument('--only_use_index_embedding', default="0",
                    help="Directory containing the dataset")
if __name__ == '__main__':
    tf.logging.set_verbosity(tf.logging.INFO)

    # Load the parameters from json file
    args = parser.parse_args()
    args.use_attr = bool(int(args.use_attr))
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_no
    print("CUDA Visible device", device_lib.list_local_devices())

    image_preprocessing_fn = None
    if args.preprocessing_name != "":
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(args.preprocessing_name, is_training=False)

    args.image_size = int(args.image_size)


    def train_pre_process(example_proto):
        features = {"image/encoded": tf.FixedLenFeature((), tf.string, default_value=""),
                    "image/class/label": tf.FixedLenFeature((), tf.int64, default_value=0),
                    'image/height': tf.FixedLenFeature((), tf.int64, default_value=0),
                    'image/width': tf.FixedLenFeature((), tf.int64, default_value=0)
                    }
        if args.use_attr:
            features["image/attr"] = tf.VarLenFeature(dtype=tf.int64)

        parsed_features = tf.parse_single_example(example_proto, features)
        image = tf.image.decode_jpeg(parsed_features["image/encoded"], 3)
Пример #13
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True,
        use_grayscale=FLAGS.use_grayscale)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    if FLAGS.quantize_delay >= 0:
      contrib_quantize.create_training_graph(quant_delay=FLAGS.quantize_delay)

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Пример #14
0
            self.image_idx = 0
        image, bboxes = self.data[self.image_idx]
        labels = [1] * len(bboxes)
        labels = np.reshape(labels, [-1, 1])
        return image, [bboxes], labels

if __name__ == "__main__":
    from preprocessing.preprocessing_factory import get_preprocessing
    data_provider = ICDARData()

    import numpy as np
    import tensorflow as tf
    import time
    import util
    util.proc.set_proc_name('proc-test')
    fn = get_preprocessing(True)
    with tf.Graph().as_default():
        sess = tf.Session()
        sess.as_default()
        out_shape = [150, 150]
        images = tf.placeholder("float", name='images', shape=[None, None, 3])
        bboxes = tf.placeholder("float", name='bboxes', shape=[1, None, 4])
        labels = tf.placeholder('int32', name='labels', shape=[None, 1])

        sampled_image, sampled_labels, sampled_bboxes = fn(
            images, labels, bboxes, out_shape)
        step = 0
        data = []
        while step < 10:
            step += 1
            start = time.time()
def main(_):
    print(tf.gfile.Glob('./debug/example_01?.jpg'))
    if not FLAGS.data_dir:
        raise ValueError(
            'You must supply the dataset directory with --data_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():
        global_step = slim.create_global_step()

        #print(tf.gfile.Glob('./debug/example_01?.jpg'))

        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = lambda image_, shape_, glabels_, gbboxes_: preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)(image_,
                                                  glabels_,
                                                  gbboxes_,
                                                  out_shape=
                                                  [FLAGS.train_image_size] * 2,
                                                  data_format=DATA_FORMAT)

        anchor_creator = anchor_manipulator.AnchorCreator(
            [FLAGS.train_image_size] * 2,
            layers_shapes=[(38, 38), (19, 19), (10, 10)],
            anchor_scales=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]],
            extra_anchor_scales=[[0.15], [0.35], [0.55]],
            anchor_ratios=[[2, .5], [2, .5, 3, 1. / 3], [2, .5, 3, 1. / 3]],
            layer_steps=[8, 16, 32])

        all_anchors = anchor_creator.get_all_anchors()[0]

        # sess = tf.Session()
        # print(all_anchors)
        # print(sess.run(all_anchors))
        anchor_operator = anchor_manipulator.AnchorEncoder(
            all_anchors,
            num_classes=FLAGS.num_classes,
            ignore_threshold=0.,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])
        #anchor_encoder_fn = lambda
        next_iter, _ = dataset_factory.get_dataset(
            FLAGS.dataset_name,
            FLAGS.dataset_split_name,
            FLAGS.data_dir,
            image_preprocessing_fn,
            file_pattern=None,
            reader=None,
            batch_size=FLAGS.batch_size,
            num_readers=FLAGS.num_readers,
            num_preprocessing_threads=FLAGS.num_preprocessing_threads,
            anchor_encoder=anchor_operator.encode_all_anchors)

        sess = tf.Session()
        sess.run(
            tf.group(tf.global_variables_initializer(),
                     tf.local_variables_initializer(),
                     tf.tables_initializer()))

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        count = 0
        start_time = time.time()
        try:
            while not coord.should_stop():
                count += 1
                _ = sess.run([next_iter])
                if count % 10 == 0:
                    time_elapsed = time.time() - start_time
                    print('time: {}'.format(time_elapsed / 10.))
                    start_time = time.time()
        except tf.errors.OutOfRangeError:
            log.info('Queue Done!')
        finally:
            pass

        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

        for i in range(6):
            list_from_batch = sess.run(next_iter)
            # imsave('./debug/example_%03d.jpg' % (i,), list_from_batch[0][0])
            # imsave('./debug/example_%03d_.jpg' % (i,), list_from_batch[1][0])
            image = list_from_batch[-1]
            shape = list_from_batch[-2]
            glabels = list_from_batch[:len(all_anchors)]
            gtargets = list_from_batch[len(all_anchors):2 * len(all_anchors)]
            gscores = list_from_batch[2 * len(all_anchors):3 *
                                      len(all_anchors)]

            imsave('./debug/example_%03d.jpg' % (i, ), image[0])

            print(image.shape, shape.shape, glabels[0].shape,
                  gtargets[0].shape, gscores[0].shape)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)

            accuracy = slim.metrics.accuracy(tf.to_int32(tf.argmax(logits, 1)),
                                             tf.to_int32(tf.argmax(labels, 1)))
            tf.add_to_collection('accuracy', accuracy)
            end_points['train_accuracy'] = accuracy
            return end_points

        # Get accuracies for the batch

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs

        for end_point in end_points:
            if 'accuracy' in end_point:
                continue
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))
        train_acc = end_points['train_accuracy']
        summaries.add(
            tf.summary.scalar('train_accuracy', end_points['train_accuracy']))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # @philkuz
        # Add accuracy summaries
        # TODO add if statemetn for n iterations
        # images_val, labels_val= tf.train.batch(
        #     [image, label],
        #     batch_size=FLAGS.batch_size,
        #     num_threads=FLAGS.num_preprocessing_threads,
        #     capacity=5 * FLAGS.batch_size)

        # # labels_val = slim.one_hot_encoding(
        # #     labels_val, dataset.num_classes - FLAGS.labels_offset)
        # batch_queue_val = slim.prefetch_queue.prefetch_queue(
        #     [images_val, labels_val], capacity=2 * deploy_config.num_clones)
        # logits, end_points = network_fn(images, reuse=True)
        # # predictions = tf.nn.softmax(logits)
        # predictions = tf.to_in32(tf.argmax(logits,1))

        # logits_val, end_points_val = network_fn(images_val, reuse=True)
        # predictions_val = tf.to_in32(tf.argmax(logits_val,1))

        # labels_val = tf.squeeze(labels_val)
        # labels = tf.squeeze(labels)

        # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        #       'train/accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        #       'val/accuracy': slim.metrics.streaming_accuracy(predictions_val, labels_val),
        # })
        # for metric_name, metric_value in names_to_values.items():
        #   op = tf.summary.scalar(metric_name, metric_value)
        #   # op = tf.Print(op, [metric_value], metric_name)
        #   summaries.add(op)
        # Add summaries for variables.
        # TODO something to remove some of these from tensorboard scalars
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # @philkuz
        # set the  max_number_of_steps parameter if num_epochs is available
        print('FLAGS.num_epochs', FLAGS.num_epochs)
        if FLAGS.num_epochs is not None and FLAGS.max_number_of_steps is None:
            FLAGS.max_number_of_steps = int(
                FLAGS.num_epochs * dataset.num_samples / FLAGS.batch_size)
            # FLAGS.max_number_of_steps = int(math.round(FLAGS.num_epochs / dataset.num_samples))

        # setup the logdir
        # @philkuz  the train_dir setup
        if FLAGS.experiment_name is not None:
            experiment_dir = 'bs={},lr={},epochs={}/{}'.format(
                FLAGS.batch_size, FLAGS.learning_rate, FLAGS.num_epochs,
                FLAGS.experiment_name)
            print(experiment_dir)
            FLAGS.train_dir = os.path.join(FLAGS.train_dir, experiment_dir)
            print(FLAGS.train_dir)

        # @philkuz overriding train_step
        def train_step(sess, train_op, global_step, train_step_kwargs):
            """Function that takes a gradient step and specifies whether to stop.
      Args:
        sess: The current session.
        train_op: An `Operation` that evaluates the gradients and returns the
          total loss.
        global_step: A `Tensor` representing the global training step.
        train_step_kwargs: A dictionary of keyword arguments.
      Returns:
        The total loss and a boolean indicating whether or not to stop training.
      Raises:
        ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.
      """
            start_time = time.time()

            trace_run_options = None
            run_metadata = None
            should_acc = True  # TODO make this not hardcoded @philkuz
            if 'should_trace' in train_step_kwargs:
                if 'logdir' not in train_step_kwargs:
                    raise ValueError(
                        'logdir must be present in train_step_kwargs when '
                        'should_trace is present')
                if sess.run(train_step_kwargs['should_trace']):
                    trace_run_options = config_pb2.RunOptions(
                        trace_level=config_pb2.RunOptions.FULL_TRACE)
                    run_metadata = config_pb2.RunMetadata()
            if not should_acc:
                total_loss, np_global_step = sess.run(
                    [train_op, global_step],
                    options=trace_run_options,
                    run_metadata=run_metadata)
            else:
                total_loss, acc, np_global_step = sess.run(
                    [train_op, train_acc, global_step],
                    options=trace_run_options,
                    run_metadata=run_metadata)
            time_elapsed = time.time() - start_time

            if run_metadata is not None:
                tl = timeline.Timeline(run_metadata.step_stats)
                trace = tl.generate_chrome_trace_format()
                trace_filename = os.path.join(
                    train_step_kwargs['logdir'],
                    'tf_trace-%d.json' % np_global_step)
                tf.logging.info('Writing trace to %s', trace_filename)
                file_io.write_string_to_file(trace_filename, trace)
                if 'summary_writer' in train_step_kwargs:
                    train_step_kwargs['summary_writer'].add_run_metadata(
                        run_metadata, 'run_metadata-%d' % np_global_step)

            if 'should_log' in train_step_kwargs:
                if sess.run(train_step_kwargs['should_log']):
                    if not should_acc:
                        tf.logging.info(
                            'global step %d: loss = %.4f (%.3f sec/step)',
                            np_global_step, total_loss, time_elapsed)
                    else:
                        tf.logging.info(
                            'global step %d: loss = %.4f train_acc = %.4f (%.3f sec/step)',
                            np_global_step, total_loss, acc, time_elapsed)

            if 'should_stop' in train_step_kwargs:
                should_stop = sess.run(train_step_kwargs['should_stop'])
            else:
                should_stop = False

            return total_loss, should_stop

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            train_step_fn=train_step,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
ret, img = capture.read()
shape = img.shape
height = shape[0]
width = shape[1]

with tf.Graph().as_default():
    output_graph_path = './models/wave.pb'
    with tf.gfile.FastGFile(output_graph_path, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
            'vgg_16', is_training=False)
        input_x = sess.graph.get_tensor_by_name("input:0")
        print(input_x)
        output = sess.graph.get_tensor_by_name("output:0")
        print(output)
        generated = tf.cast(output, tf.uint8)
        generated = tf.squeeze(generated, [0])

        while True:
            start_time = time.time()
            ret, img = capture.read()
            image_transfer = sess.run(generated, feed_dict={input_x: img})
            #print(frame)
            #image_transfer = cv2.cvtColor(image_transfer, cv2.COLOR_BGR2RGB)
            cv2.imshow('camera', img)
            cv2.imshow('transfer', image_transfer)
def main_fun(argv, ctx):
    import math
    import tensorflow as tf

    from datasets import dataset_factory
    from nets import nets_factory
    from preprocessing import preprocessing_factory

    sys.argv = argv

    slim = tf.contrib.slim

    tf.app.flags.DEFINE_integer('batch_size', 100,
                                'The number of samples in each batch.')

    tf.app.flags.DEFINE_integer(
        'max_num_batches', None,
        'Max number of batches to evaluate by default use all.')

    tf.app.flags.DEFINE_string('master', '',
                               'The address of the TensorFlow master to use.')

    tf.app.flags.DEFINE_string(
        'checkpoint_path', '/tmp/tfmodel/',
        'The directory where the model was written to or an absolute path to a '
        'checkpoint file.')

    tf.app.flags.DEFINE_string('eval_dir', '/tmp/tfmodel/',
                               'Directory where the results are saved to.')

    tf.app.flags.DEFINE_integer(
        'num_preprocessing_threads', 4,
        'The number of threads used to create the batches.')

    tf.app.flags.DEFINE_string('dataset_name', 'imagenet',
                               'The name of the dataset to load.')

    tf.app.flags.DEFINE_string('dataset_split_name', 'test',
                               'The name of the train/test split.')

    tf.app.flags.DEFINE_string(
        'dataset_dir', None,
        'The directory where the dataset files are stored.')

    tf.app.flags.DEFINE_integer(
        'labels_offset', 0,
        'An offset for the labels in the dataset. This flag is primarily used to '
        'evaluate the VGG and ResNet architectures which do not use a background '
        'class for the ImageNet dataset.')

    tf.app.flags.DEFINE_string('model_name', 'inception_v3',
                               'The name of the architecture to evaluate.')

    tf.app.flags.DEFINE_string(
        'preprocessing_name', None,
        'The name of the preprocessing to use. If left '
        'as `None`, then the model_name flag is used.')

    tf.app.flags.DEFINE_float(
        'moving_average_decay', None,
        'The decay to use for the moving average.'
        'If left as None, then moving averages are not used.')

    tf.app.flags.DEFINE_integer('eval_image_size', None, 'Eval image size')

    FLAGS = tf.app.flags.FLAGS

    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    cluster_spec, server = TFNode.start_cluster_server(ctx)

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall@5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.iteritems():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=names_to_updates.values(),
            variables_to_restore=variables_to_restore)
Пример #19
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=1,
        clone_on_cpu=False,
        replica_id=0,
        num_replicas=1,
        num_ps_tasks=0)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        'flowers', 'train', FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        'mobilenet_v1',
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        'mobilenet_v1',
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=4,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=4,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):

      num_epochs_per_decay = 2.5
      decay_steps = int(dataset.num_samples / FLAGS.batch_size *
                        num_epochs_per_decay)
      learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                  global_step,
                                  decay_steps,
                                  _LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True,
                                    name='exponential_decay_learning_rate')

      optimizer = tf.train.RMSPropOptimizer(
                           learning_rate,
                           decay=FLAGS.rmsprop_decay,
                           momentum=FLAGS.rmsprop_momentum,
                           epsilon=FLAGS.opt_epsilon)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    session_config = tf.ConfigProto(allow_soft_placement=True,
                                    log_device_placement=False)

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=True,
        session_config=session_config,
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=10,
        save_summaries_secs=300,
        save_interval_secs=300,
        sync_optimizer=optimizer if False else None)
def main(_):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.device

    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=False,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=dataset.num_classes,
            weight_decay=FLAGS.weight_decay,
            batch_norm_decay=None,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(labels, dataset.num_classes)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            # Noise up the images - don't do that for models where we are preprocessing the images with an existing ISP.
            with tf.device("/cpu:0"):
                noisy_batch, a, gauss_std = sensor_model.sensor_noise_rand_light_level(
                    images, [FLAGS.ll_low, FLAGS.ll_high], scale=1.0)
            bayer_mask = sensor_model.get_bayer_mask(train_image_size,
                                                     train_image_size)
            inputs = noisy_batch * bayer_mask

            # These parameters are only relevant for our special ISP functions. Mobilenet for instance will just eat them and not act upon them.
            logits, end_points, _ = network_fn(
                images=inputs,
                num_classes=dataset.num_classes,
                alpha=a,
                sigma=gauss_std,
                bayer_mask=bayer_mask,
                use_anscombe=FLAGS.use_anscombe,
                noise_channel=FLAGS.noise_channel,
                num_iters=FLAGS.num_iters,
                num_layers=FLAGS.num_layers,
                isp_model_name=FLAGS.isp_model_name,
                is_real_data=False)

            end_points['ground_truth'] = images
            # end_points['noisy'] = noisy_batch

            #############################
            # Specify the loss function #
            #############################
            tf.losses.softmax_cross_entropy(
                logits=logits,
                onehot_labels=labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)

        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # Add image summary for denoised image
        for end_point in end_points:
            if end_point in ['outputs', 'post_anscombe', 'pre_inv_anscombe']:
                summaries.add(
                    tf.summary.image(end_point, end_points[end_point]))
            if end_point in [
                    'mobilenet_input', 'noisy', 'inputs', 'ground_truth', 'R',
                    'G1', 'G2', 'B'
            ]:
                clean_image = end_points[end_point]
                summaries.add(tf.summary.image(end_point, clean_image))
                summaries.add(
                    tf.summary.scalar('bounds/%s_min' % end_point,
                                      tf.reduce_min(clean_image)))
                summaries.add(
                    tf.summary.scalar('bounds/%s_max' % end_point,
                                      tf.reduce_max(clean_image)))

        #################################
        # Configure the moving averages #
        #################################
        moving_average_variables = slim.get_model_variables()
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        # Update ops executed locally by trainer.
        update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)

        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        saver = tf.train.Saver(keep_checkpoint_every_n_hours=2)

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(train_tensor,
                            saver=saver,
                            logdir=FLAGS.train_dir,
                            master='',
                            is_chief=True,
                            init_fn=_get_init_fn(),
                            summary_op=summary_op,
                            number_of_steps=FLAGS.max_number_of_steps,
                            log_every_n_steps=FLAGS.log_every_n_steps,
                            save_summaries_secs=FLAGS.save_summaries_secs,
                            save_interval_secs=FLAGS.save_interval_secs,
                            sync_optimizer=None)
Пример #21
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()

    # images, labels = vgg.distorted_inputs()
    dataset = imagenet.get_split('train', '/data/ramyadML/TF-slim-data/imageNet/processed')

    # Creates a TF-Slim DataProvider which reads the dataset in the background
    # during both training and testing.
    provider = slim.dataset_data_provider.DatasetDataProvider(dataset,
                                                              num_readers=4,
                                                              common_queue_capacity=20*32,
                                                              common_queue_min=10*32,
                                                              shuffle=True)


    preprocessing_name = 'vgg_16'
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
                            preprocessing_name,
                            is_training=True)

    [image, label] = provider.get(['image', 'label'])
    image = image_preprocessing_fn(image, 224, 224)
    label -= 1

    # batch up some training data
    images, labels = tf.train.batch([image, label], 
                                    batch_size=32,
                                    num_threads=4,
                                    capacity=5*32)

    print (images.shape)


    images = tf.cast(images, tf.float32)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = vgg.inference(images)

    print ("logits shape:", logits.shape)
    # Calculate loss.
    print ("label shape", labels.shape)
    # Calculate loss.
    loss = vgg.loss(logits, labels)


    # Save
    list_var_names = [  'vgg_16/conv1/conv1_1/biases',
                    	'vgg_16/conv1/conv1_1/weights',
                    	'vgg_16/conv1/conv1_2/biases',
                    	'vgg_16/conv1/conv1_2/weights',
                    	'vgg_16/conv2/conv2_1/biases',
                    	'vgg_16/conv2/conv2_1/weights',
                    	'vgg_16/conv2/conv2_2/biases',
                    	'vgg_16/conv2/conv2_2/weights',
                    	'vgg_16/conv3/conv3_1/biases',
                    	'vgg_16/conv3/conv3_1/weights',
                    	'vgg_16/conv3/conv3_2/biases',
                    	'vgg_16/conv3/conv3_2/weights',
                    	'vgg_16/conv3/conv3_3/biases',
                    	'vgg_16/conv3/conv3_3/weights',
                    	'vgg_16/conv4/conv4_1/biases',
                    	'vgg_16/conv4/conv4_1/weights',
                    	'vgg_16/conv4/conv4_2/biases',
                    	'vgg_16/conv4/conv4_2/weights',
                    	'vgg_16/conv4/conv4_3/biases',
                    	'vgg_16/conv4/conv4_3/weights',
                    	'vgg_16/conv5/conv5_1/biases',
                    	'vgg_16/conv5/conv5_1/weights',
                    	'vgg_16/conv5/conv5_2/biases',
                    	'vgg_16/conv5/conv5_2/weights',
                    	'vgg_16/conv5/conv5_3/biases',
                    	'vgg_16/conv5/conv5_3/weights',
                    	'vgg_16/fc6/biases',
                    	'vgg_16/fc6/weights',
                    	'vgg_16/fc7/biases',
                    	'vgg_16/fc7/weights',
                    	'vgg_16/fc8/biases',
                    	'vgg_16/fc8/weights']

    var_list_to_restore = []
 
    for name in list_var_names:
        var_list_to_restore = var_list_to_restore + tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name)

    saver = tf.train.Saver(var_list_to_restore)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = vgg.train(loss, global_step)

    # Parse pruning hyperparameters
    pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams)

    # Create a pruning object using the pruning hyperparameters
    pruning_obj = pruning.Pruning(pruning_hparams, global_step=global_step)

    # Use the pruning_obj to add ops to the training graph to update the masks
    # The conditional_mask_update_op will update the masks only when the
    # training step is in [begin_pruning_step, end_pruning_step] specified in
    # the pruning spec proto
    mask_update_op = pruning_obj.conditional_mask_update_op()

    # Use the pruning_obj to add summaries to the graph to track the sparsity
    # of each of the layers
    pruning_obj.add_pruning_summaries()


    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1

      def before_run(self, run_context):
        self._step += 1
        self._start_time = time.time()
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        duration = time.time() - self._start_time
        loss_value = run_values.results
        if self._step % 10 == 0:
          num_examples_per_step = 128
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print(format_str % (datetime.datetime.now(), self._step, loss_value,
                              examples_per_sec, sec_per_batch))


    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
                config=tf.ConfigProto(
                log_device_placement=FLAGS.log_device_placement)) as mon_sess:

      saver.restore(mon_sess,"trained_weights/vgg_16.ckpt")
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
        # Update the masks
        mon_sess.run(mask_update_op)
Пример #22
0
def main(_):
    if ((not FLAGS.dataset_dir_iris) or (not FLAGS.dataset_dir_face)):
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset_iris = dataset_factory.get_dataset(FLAGS.dataset_name_iris,
                                                   FLAGS.dataset_split_name,
                                                   FLAGS.dataset_dir_iris)

        dataset_face = dataset_factory.get_dataset(FLAGS.dataset_name_face,
                                                   FLAGS.dataset_split_name,
                                                   FLAGS.dataset_dir_face)

        ####################
        # Select the network #
        ####################

        #  network_fn_iris = nets_factory.get_network_fn(
        #     FLAGS.model_name_iris,
        #    num_classes=(dataset.num_classes - FLAGS.labels_offset),
        #    weight_decay=FLAGS.weight_decay,
        #   is_training=True)

        network_fn_joint = nets_factory.get_network_fn_joint(
            FLAGS.model_name_joint,
            num_classes=(dataset_face.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name_iris = FLAGS.preprocessing_name_iris or FLAGS.model_name_iris
        image_preprocessing_fn_iris = preprocessing_factory.get_preprocessing(
            preprocessing_name_iris, is_training=True)

        preprocessing_name_face = FLAGS.preprocessing_name_face or FLAGS.model_name_face
        image_preprocessing_fn_face = preprocessing_factory.get_preprocessing(
            preprocessing_name_face, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider_iris = slim.dataset_data_provider.DatasetDataProvider(
                dataset_iris,
                shuffle=False,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image_iris, label_iris] = provider_iris.get(['image', 'label'])
            label_iris -= FLAGS.labels_offset

            #	train_image_size_iris = FLAGS.train_image_size_iris or network_fn_iris.default_image_size
            new_height_iris = FLAGS.New_Height_Of_Image_iris or network_fn_joint.default_image_size
            new_width_iris = FLAGS.New_Width_Of_Image_iris or network_fn_joint.default_image_size

            #         image = image_preprocessing_fn(image, train_image_size, train_image_size)
            image_iris = image_preprocessing_fn_iris(image_iris,
                                                     new_height_iris,
                                                     new_width_iris)

            #  io.imshow(image)
            #  io.show()
            images_iris, labels_iris = tf.train.batch(
                [image_iris, label_iris],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            #      tf.image_summary('images', images)
            labels_iris = slim.one_hot_encoding(
                labels_iris, dataset_iris.num_classes - FLAGS.labels_offset)
            batch_queue_iris = slim.prefetch_queue.prefetch_queue(
                [images_iris, labels_iris],
                capacity=2 * deploy_config.num_clones)

        with tf.device(deploy_config.inputs_device()):
            provider_face = slim.dataset_data_provider.DatasetDataProvider(
                dataset_face,
                shuffle=False,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image_face, label_face] = provider_face.get(['image', 'label'])
            label_face -= FLAGS.labels_offset

            #	train_image_size_face = FLAGS.train_image_size_face or network_fn_face.default_image_size
            new_height_face = FLAGS.New_Height_Of_Image_face or network_fn_joint.default_image_size
            new_width_face = FLAGS.New_Width_Of_Image_face or network_fn_joint.default_image_size

            #         image = image_preprocessing_fn(image, train_image_size, train_image_size)
            image_face = image_preprocessing_fn_face(image_face,
                                                     new_height_face,
                                                     new_width_face)

            #  io.imshow(image)
            #  io.show()
            images_face, labels_face = tf.train.batch(
                [image_face, label_face],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            #      tf.image_summary('images', images)
            labels_face = slim.one_hot_encoding(
                labels_face, dataset_face.num_classes - FLAGS.labels_offset)
            batch_queue_face = slim.prefetch_queue.prefetch_queue(
                [images_face, labels_face],
                capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################

        def clone_fn(batch_queue_iris, batch_queue_face):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images_iris, labels_iris = batch_queue_iris.dequeue()
            images_face, labels_face = batch_queue_face.dequeue()
            logits, end_points = network_fn_joint(images_face, images_iris)

            #  def clone_fn_face(batch_queue_face):
            #      """Allows data parallelism by creating multiple clones of network_fn."""
            #    images_face, labels_face = batch_queue_face.dequeue()
            #    logits_face, end_points_face, features_face,model_var_face = network_fn_face(images_face)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels_face,
                    label_smoothing=FLAGS.label_smoothing,
                    weight=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels_face,
                label_smoothing=FLAGS.label_smoothing,
                weight=1.0)

            # Adding the accuracy metric
            with tf.name_scope('accuracy'):
                predictions = tf.argmax(logits, 1)
                labels_face = tf.argmax(labels_face, 1)
                accuracy = tf.reduce_mean(
                    tf.to_float(tf.equal(predictions, labels_face)))
                tf.add_to_collection('accuracy', accuracy)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(
            deploy_config, clone_fn, [batch_queue_iris, batch_queue_face])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.histogram_summary('activations/' + end_point, x))
            summaries.add(
                tf.scalar_summary('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.histogram_summary(variable.op.name, variable))

        # Add summaries for the input images.
        summaries.add(
            tf.image_summary('face',
                             images_face,
                             max_images=15,
                             name='Face_images'))
        summaries.add(
            tf.image_summary('iris',
                             images_iris,
                             max_images=15,
                             name='Iris_images'))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset_face.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(
                tf.scalar_summary('learning_rate',
                                  learning_rate,
                                  name='learning_rate'))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)

        # # Add total_loss to summary.
        # summaries.add(tf.scalar_summary('total_loss', total_loss,
        #                                 name='total_loss'))

        # Add total_loss and accuacy to summary.
        summaries.add(
            tf.scalar_summary('eval/Total_Loss', total_loss,
                              name='total_loss'))
        accuracy = tf.get_collection('accuracy', first_clone_scope)[0]
        summaries.add(
            tf.scalar_summary('eval/Accuracy', accuracy, name='accuracy'))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.merge_summary(list(summaries), name='summary_op')

        init_iris, init_feed = _get_init_op()

        #	var_2=[v for v in tf.all_variables() if v.name == "vgg_19/conv3/conv3_3/weights:0"][0]

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=init_iris,
            init_feed_dict=init_feed,
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def eval_model(model_name):
    slim = tf.contrib.slim

    print("eval model")
    PATH_TO_HACONE_LOCAL = '/home/lile/Projects/git_repo/hacone'

    with open(PATH_TO_HACONE_LOCAL +
              '/jobs/job{}.txt'.format(model_name)) as fp:
        data = json.load(fp)

        job_id = data['job']
        params = data['params']
        params = json.loads(params)

    candidate = []

    for i in xrange(0, 5):
        candidate.append(params['b{}_i1'.format(i)])
        candidate.append(params['b{}_i2'.format(i)])
        candidate.append(params['b{}_o1'.format(i)])
        candidate.append(params['b{}_o2'.format(i)])

    N = 2
    F = 24

    dataset_dir = '/home/lile/dataset/cifar10_val'
    batch_size = 100
    output_dir = os.path.join(PATH_TO_HACONE_LOCAL, 'models_trained',
                              model_name)
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset('cifar10', 'val', dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            'cifarnet',
            candidate,
            N,
            F,
            num_classes=(dataset.num_classes - 0),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * batch_size,
            common_queue_min=batch_size)
        [image, label] = provider.get(['image', 'label'])

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = 'cifarnet'
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch([image, label],
                                        batch_size=batch_size,
                                        num_threads=4,
                                        capacity=5 * batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1

        num_batches = math.ceil(dataset.num_samples / float(batch_size))

        checkpoint_path = output_dir
        if tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        else:
            checkpoint_path = checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        final_op = [names_to_values['Accuracy']]  #top1 accuracy to return
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        start_time = time.time()
        a = slim.evaluation.evaluate_once(
            master='',
            checkpoint_path=checkpoint_path,
            logdir=output_dir,
            session_config=config,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            final_op=final_op,
            variables_to_restore=variables_to_restore)
        duration = time.time() - start_time
        print('________________________________')
        print('duration :' + str(duration))
        print('________________________________')

        print(a)
        return duration
def eval(checkpoint_path,
         eval_dir,
         dataset_dir,
         logo_name,
         model_name="inception_v4",
         batch_size=100):
    if not dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = my_dataset.get_split('validation', dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            model_name,
            #num_classes=(dataset.num_classes),
            is_training=False,
            logo_names=[logo_name])
        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * batch_size,
            common_queue_min=batch_size)
        [image, label] = provider.get(['image', 'label'])

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch([image, label],
                                        batch_size=batch_size,
                                        num_threads=4,
                                        capacity=5 * batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images, logo_names=[logo_name])
        variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits[logo_name], 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits[logo_name], labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # This ensures that we make a single pass over all of the data.
        num_batches = math.ceil(dataset.num_samples / float(batch_size))

        if tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)

        # tf.logging.info('Evaluating %s' % checkpoint_path)

        #print('variables_to_restore: ',variables_to_restore)
        accuracy = slim.evaluation.evaluate_once(
            master="",
            checkpoint_path=checkpoint_path,
            logdir=eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)

        print(accuracy)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label, filename] = provider.get(['image', 'label', 'filename'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels, filenames = tf.train.batch(
            [image, label, filename],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, end_points = network_fn(images)
        preprobs = end_points['Predictions']
        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)
        mislabeled = tf.not_equal(predictions, labels)
        mislabeled_filenames = tf.boolean_mask(filenames, mislabeled)
        original_classes = tf.boolean_mask(labels, mislabeled)
        predicted_classes = tf.boolean_mask(predictions, mislabeled)
        probabilities = tf.reduce_max(preprobs, 1)

        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
            'Mean_absolute':
            tf.metrics.mean_absolute_error(labels, predictions),
            'Confusion_matrix':
            _get_streaming_metrics(predictions, labels,
                                   dataset.num_classes - FLAGS.labels_offset),
            'mislabeled_filenames':
            tf.contrib.metrics.streaming_concat(mislabeled_filenames),
            'original_classes':
            tf.contrib.metrics.streaming_concat(original_classes),
            'predicted_classes':
            tf.contrib.metrics.streaming_concat(predicted_classes),
            'probabilities':
            tf.contrib.metrics.streaming_concat(probabilities),
        })

        # Print the summaries to screen.
        unnames = [
            'Confusion_matrix', 'mislabeled_filenames', 'original_classes',
            'predicted_classes', 'probabilities'
        ]
        for name, value in names_to_values.items():
            if name not in unnames:
                summary_name = 'eval/%s' % name
                op = tf.summary.scalar(summary_name, value, collections=[])
                op = tf.Print(op, [value], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # op = tf.Print(names_to_values['mislabeled_filenames'], [names_to_values['mislabeled_filenames']], 'testing', summarize=1000)
        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)
        eval_op = list(names_to_updates.values())
        [
            confusion_matrix,
            mislabeled_filenames,
            original_classes,
            predicted_classes,
            probabilities,
        ] = slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=eval_op,
            variables_to_restore=variables_to_restore,
            # session_config=session_config,
            final_op=[
                names_to_updates['Confusion_matrix'],
                names_to_values['mislabeled_filenames'],
                names_to_values['original_classes'],
                names_to_values['predicted_classes'],
                names_to_values['probabilities']
            ])
        print(confusion_matrix)
        filenames = list(mislabeled_filenames)
        original = list(original_classes)
        predicted = list(predicted_classes)
        probabilities = list(probabilities)
        with open('misclassified_images.p', 'wb') as f:
            pickle.dump(
                list(zip(filenames, original, predicted, probabilities)), f)
        if FLAGS.print_misclassified_images:
            zipped = list(zip(filenames, original, predicted, probabilities))
            print(zipped)
Пример #26
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(FLAGS.model_name,
                                                 num_classes=14,
                                                 is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        #[image, label] = provider.get(['image', 'label'])
        #label -= FLAGS.labels_offset
        [image, label1, label2, label3, label4, label5, label6, label7,
         label8, label9, label10, label11, label12, label13, label14] = \
            provider.get(['image', 'label1', 'label2', 'label3', 'label4', 'label5',
                          'label6', 'label7', 'label8', 'label9', 'label10',
                          'label11', 'label12', 'label13', 'label14'])
        print(image.shape)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = 'nihxray'
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        #images, labels = tf.train.batch(
        #    [image, label],
        #    batch_size=FLAGS.batch_size,
        #    num_threads=FLAGS.num_preprocessing_threads,
        #    capacity=5 * FLAGS.batch_size)
        images, labels1, labels2, labels3, labels4, labels5, labels6, labels7, \
        labels8, labels9, labels10, labels11, labels12, labels13, labels14 \
            = tf.train.batch(
            [image, label1, label2, label3, label4, label5, label6, label7,
             label8, label9, label10, label11, label12, label13, label14],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)
        labels1 = tf.expand_dims(labels1, 1)
        labels2 = tf.expand_dims(labels2, 1)
        labels3 = tf.expand_dims(labels3, 1)
        labels4 = tf.expand_dims(labels4, 1)
        labels5 = tf.expand_dims(labels5, 1)
        labels6 = tf.expand_dims(labels6, 1)
        labels7 = tf.expand_dims(labels7, 1)
        labels8 = tf.expand_dims(labels8, 1)
        labels9 = tf.expand_dims(labels9, 1)
        labels10 = tf.expand_dims(labels10, 1)
        labels11 = tf.expand_dims(labels11, 1)
        labels12 = tf.expand_dims(labels12, 1)
        labels13 = tf.expand_dims(labels13, 1)
        labels14 = tf.expand_dims(labels14, 1)
        labels = tf.concat([
            labels1, labels2, labels3, labels4, labels5, labels6, labels7,
            labels8, labels9, labels10, labels11, labels12, labels13, labels14
        ], 1)
        print(labels.shape)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        #predictions = tf.argmax(logits, 1)
        #labels = tf.squeeze(labels)
        predictions = logits
        pred1 = predictions[:, 0]
        pred2 = predictions[:, 1]
        pred3 = predictions[:, 2]
        pred4 = predictions[:, 3]
        pred5 = predictions[:, 4]
        pred6 = predictions[:, 5]
        pred7 = predictions[:, 6]
        pred8 = predictions[:, 7]
        pred9 = predictions[:, 8]
        pred10 = predictions[:, 9]
        pred11 = predictions[:, 10]
        pred12 = predictions[:, 11]
        pred13 = predictions[:, 12]
        pred14 = predictions[:, 13]

        pred1 = tf.div(tf.subtract(pred1, tf.reduce_min(pred1)),
                       tf.subtract(tf.reduce_max(pred1), tf.reduce_min(pred1)))
        pred2 = tf.div(tf.subtract(pred2, tf.reduce_min(pred2)),
                       tf.subtract(tf.reduce_max(pred2), tf.reduce_min(pred2)))
        pred3 = tf.div(tf.subtract(pred3, tf.reduce_min(pred3)),
                       tf.subtract(tf.reduce_max(pred3), tf.reduce_min(pred3)))
        pred4 = tf.div(tf.subtract(pred4, tf.reduce_min(pred4)),
                       tf.subtract(tf.reduce_max(pred4), tf.reduce_min(pred4)))
        pred5 = tf.div(tf.subtract(pred5, tf.reduce_min(pred5)),
                       tf.subtract(tf.reduce_max(pred5), tf.reduce_min(pred5)))
        pred6 = tf.div(tf.subtract(pred6, tf.reduce_min(pred6)),
                       tf.subtract(tf.reduce_max(pred6), tf.reduce_min(pred6)))
        pred7 = tf.div(tf.subtract(pred7, tf.reduce_min(pred7)),
                       tf.subtract(tf.reduce_max(pred7), tf.reduce_min(pred7)))
        pred8 = tf.div(tf.subtract(pred8, tf.reduce_min(pred8)),
                       tf.subtract(tf.reduce_max(pred8), tf.reduce_min(pred8)))
        pred9 = tf.div(tf.subtract(pred9, tf.reduce_min(pred9)),
                       tf.subtract(tf.reduce_max(pred9), tf.reduce_min(pred9)))
        pred10 = tf.div(
            tf.subtract(pred10, tf.reduce_min(pred10)),
            tf.subtract(tf.reduce_max(pred10), tf.reduce_min(pred10)))
        pred11 = tf.div(
            tf.subtract(pred11, tf.reduce_min(pred11)),
            tf.subtract(tf.reduce_max(pred11), tf.reduce_min(pred11)))
        pred12 = tf.div(
            tf.subtract(pred12, tf.reduce_min(pred12)),
            tf.subtract(tf.reduce_max(pred12), tf.reduce_min(pred12)))
        pred13 = tf.div(
            tf.subtract(pred13, tf.reduce_min(pred13)),
            tf.subtract(tf.reduce_max(pred13), tf.reduce_min(pred13)))
        pred14 = tf.div(
            tf.subtract(pred14, tf.reduce_min(pred14)),
            tf.subtract(tf.reduce_max(pred14), tf.reduce_min(pred14)))
        labels1 = labels[:, 0]
        labels2 = labels[:, 1]
        labels3 = labels[:, 2]
        labels4 = labels[:, 3]
        labels5 = labels[:, 4]
        labels6 = labels[:, 5]
        labels7 = labels[:, 6]
        labels8 = labels[:, 7]
        labels9 = labels[:, 8]
        labels10 = labels[:, 9]
        labels11 = labels[:, 10]
        labels12 = labels[:, 11]
        labels13 = labels[:, 12]
        labels14 = labels[:, 13]
        print(pred1.shape)
        print(labels1.shape)

        # Define the metrics:
        #names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        #    'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        #    'Recall_5': slim.metrics.streaming_recall_at_k(
        #        logits, labels, 5),
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'AUC1':
            slim.metrics.streaming_auc(pred1, labels1),
            'AUC2':
            slim.metrics.streaming_auc(pred2, labels2),
            'AUC3':
            slim.metrics.streaming_auc(pred3, labels3),
            'AUC4':
            slim.metrics.streaming_auc(pred4, labels4),
            'AUC5':
            slim.metrics.streaming_auc(pred5, labels5),
            'AUC6':
            slim.metrics.streaming_auc(pred6, labels6),
            'AUC7':
            slim.metrics.streaming_auc(pred7, labels7),
            'AUC8':
            slim.metrics.streaming_auc(pred8, labels8),
            'AUC9':
            slim.metrics.streaming_auc(pred9, labels9),
            'AUC10':
            slim.metrics.streaming_auc(pred10, labels10),
            'AUC11':
            slim.metrics.streaming_auc(pred11, labels11),
            'AUC12':
            slim.metrics.streaming_auc(pred12, labels12),
            'AUC13':
            slim.metrics.streaming_auc(pred13, labels13),
            'AUC14':
            slim.metrics.streaming_auc(pred14, labels14),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
Пример #27
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    if not tf.gfile.IsDirectory(FLAGS.checkpoint_dir):
        raise ValueError(
            'You must supply the checkpoint directory with --checkpoint_dir')
    if os.path.exists(FLAGS.eval_dir):
        raise ValueError('eval_dir exists')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        writer = tf.summary.FileWriter(FLAGS.eval_dir)
        writer.add_event(
            event_pb2.Event(wall_time=0, file_version="brain.Event:2"))
        prog = re.compile(".*model.ckpt-(?P<wall_time>\d+.\d+)-(?P<step>\d+)")
        checkpoint_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        for checkpoint_path in checkpoint_state.all_model_checkpoint_paths:
            try:
                temp_eval_dir = tempfile.mkdtemp()
                tf.logging.info('Evaluating %s' % checkpoint_path)
                slim.evaluation.evaluate_once(
                    master=FLAGS.master,
                    checkpoint_path=checkpoint_path,
                    logdir=temp_eval_dir,
                    num_evals=num_batches,
                    eval_op=list(names_to_updates.values()),
                    variables_to_restore=variables_to_restore)

                temp_event_file = glob.glob(
                    os.path.join(temp_eval_dir, 'events.out*'))[0]
                for event in tf.train.summary_iterator(temp_event_file):
                    for value in event.summary.value:
                        if value.tag == 'eval/Accuracy':
                            accuracy = value
                        if value.tag == 'eval/Recall_5':
                            recall = value
            finally:
                try:
                    shutil.rmtree(temp_eval_dir)
                except OSError as e:
                    if e.errno != errno.ENOENT:
                        raise

            m = prog.match(checkpoint_path)
            wall_time = float(m.group('wall_time'))
            step = int(m.group('step'))

            summary = summary_pb2.Summary(value=[accuracy, recall])
            writer.add_event(
                event_pb2.Event(wall_time=wall_time,
                                step=step,
                                summary=summary))
def main_fun(argv, ctx):
  import tensorflow as tf
  from tensorflow.python.ops import control_flow_ops
  from datasets import dataset_factory
  from deployment import model_deploy
  from nets import nets_factory
  from preprocessing import preprocessing_factory

  sys.argv = argv

  slim = tf.contrib.slim

  tf.app.flags.DEFINE_integer(
      'num_gpus', '1', 'The number of GPUs to use per node')

  tf.app.flags.DEFINE_boolean('rdma', False, 'Whether to use rdma.')

  tf.app.flags.DEFINE_string(
      'master', '', 'The address of the TensorFlow master to use.')

  tf.app.flags.DEFINE_string(
      'train_dir', '/tmp/tfmodel/',
      'Directory where checkpoints and event logs are written to.')

  tf.app.flags.DEFINE_integer('num_clones', 1,
                              'Number of model clones to deploy.')

  tf.app.flags.DEFINE_boolean('clone_on_cpu', False,
                              'Use CPUs to deploy clones.')

  tf.app.flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas.')

  tf.app.flags.DEFINE_integer(
      'num_ps_tasks', 0,
      'The number of parameter servers. If the value is 0, then the parameters '
      'are handled locally by the worker.')

  tf.app.flags.DEFINE_integer(
      'num_readers', 4,
      'The number of parallel readers that read data from the dataset.')

  tf.app.flags.DEFINE_integer(
      'num_preprocessing_threads', 4,
      'The number of threads used to create the batches.')

  tf.app.flags.DEFINE_integer(
      'log_every_n_steps', 10,
      'The frequency with which logs are print.')

  tf.app.flags.DEFINE_integer(
      'save_summaries_secs', 600,
      'The frequency with which summaries are saved, in seconds.')

  tf.app.flags.DEFINE_integer(
      'save_interval_secs', 600,
      'The frequency with which the model is saved, in seconds.')

  tf.app.flags.DEFINE_integer(
      'task', 0, 'Task id of the replica running the training.')

  ######################
  # Optimization Flags #
  ######################

  tf.app.flags.DEFINE_float(
      'weight_decay', 0.00004, 'The weight decay on the model weights.')

  tf.app.flags.DEFINE_string(
      'optimizer', 'rmsprop',
      'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
      '"ftrl", "momentum", "sgd" or "rmsprop".')

  tf.app.flags.DEFINE_float(
      'adadelta_rho', 0.95,
      'The decay rate for adadelta.')

  tf.app.flags.DEFINE_float(
      'adagrad_initial_accumulator_value', 0.1,
      'Starting value for the AdaGrad accumulators.')

  tf.app.flags.DEFINE_float(
      'adam_beta1', 0.9,
      'The exponential decay rate for the 1st moment estimates.')

  tf.app.flags.DEFINE_float(
      'adam_beta2', 0.999,
      'The exponential decay rate for the 2nd moment estimates.')

  tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')

  tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5,
                            'The learning rate power.')

  tf.app.flags.DEFINE_float(
      'ftrl_initial_accumulator_value', 0.1,
      'Starting value for the FTRL accumulators.')

  tf.app.flags.DEFINE_float(
      'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.')

  tf.app.flags.DEFINE_float(
      'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')

  tf.app.flags.DEFINE_float(
      'momentum', 0.9,
      'The momentum for the MomentumOptimizer and RMSPropOptimizer.')

  tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.')

  #######################
  # Learning Rate Flags #
  #######################

  tf.app.flags.DEFINE_string(
      'learning_rate_decay_type',
      'exponential',
      'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
      ' or "polynomial"')

  tf.app.flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')

  tf.app.flags.DEFINE_float(
      'end_learning_rate', 0.0001,
      'The minimal end learning rate used by a polynomial decay learning rate.')

  tf.app.flags.DEFINE_float(
      'label_smoothing', 0.0, 'The amount of label smoothing.')

  tf.app.flags.DEFINE_float(
      'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.')

  tf.app.flags.DEFINE_float(
      'num_epochs_per_decay', 2.0,
      'Number of epochs after which learning rate decays.')

  tf.app.flags.DEFINE_bool(
      'sync_replicas', False,
      'Whether or not to synchronize the replicas during training.')

  tf.app.flags.DEFINE_integer(
      'replicas_to_aggregate', 1,
      'The Number of gradients to collect before updating params.')

  tf.app.flags.DEFINE_float(
      'moving_average_decay', None,
      'The decay to use for the moving average.'
      'If left as None, then moving averages are not used.')

  #######################
  # Dataset Flags #
  #######################

  tf.app.flags.DEFINE_string(
      'dataset_name', 'imagenet', 'The name of the dataset to load.')

  tf.app.flags.DEFINE_string(
      'dataset_split_name', 'train', 'The name of the train/test split.')

  tf.app.flags.DEFINE_string(
      'dataset_dir', None, 'The directory where the dataset files are stored.')

  tf.app.flags.DEFINE_integer(
      'labels_offset', 0,
      'An offset for the labels in the dataset. This flag is primarily used to '
      'evaluate the VGG and ResNet architectures which do not use a background '
      'class for the ImageNet dataset.')

  tf.app.flags.DEFINE_string(
      'model_name', 'inception_v3', 'The name of the architecture to train.')

  tf.app.flags.DEFINE_string(
      'preprocessing_name', None, 'The name of the preprocessing to use. If left '
      'as `None`, then the model_name flag is used.')

  tf.app.flags.DEFINE_integer(
      'batch_size', 32, 'The number of samples in each batch.')

  tf.app.flags.DEFINE_integer(
      'train_image_size', None, 'Train image size')

  tf.app.flags.DEFINE_integer('max_number_of_steps', None,
                              'The maximum number of training steps.')

  #####################
  # Fine-Tuning Flags #
  #####################

  tf.app.flags.DEFINE_string(
      'checkpoint_path', None,
      'The path to a checkpoint from which to fine-tune.')

  tf.app.flags.DEFINE_string(
      'checkpoint_exclude_scopes', None,
      'Comma-separated list of scopes of variables to exclude when restoring '
      'from a checkpoint.')

  tf.app.flags.DEFINE_string(
      'trainable_scopes', None,
      'Comma-separated list of scopes to filter the set of variables to train.'
      'By default, None would train all the variables.')

  tf.app.flags.DEFINE_boolean(
      'ignore_missing_vars', False,
      'When restoring a checkpoint would ignore missing variables.')

  FLAGS = tf.app.flags.FLAGS
  FLAGS.job_name = ctx.job_name
  FLAGS.task = ctx.task_index
  FLAGS.num_clones = FLAGS.num_gpus
  FLAGS.worker_replicas = len(ctx.cluster_spec['worker'])
  assert(FLAGS.num_ps_tasks == (len(ctx.cluster_spec['ps']) if 'ps' in ctx.cluster_spec else 0))

  def _configure_learning_rate(num_samples_per_epoch, global_step):
    """Configures the learning rate.

    Args:
      num_samples_per_epoch: The number of samples in each epoch of training.
      global_step: The global_step tensor.

    Returns:
      A `Tensor` representing the learning rate.

    Raises:
      ValueError: if
    """
    decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
                      FLAGS.num_epochs_per_decay)
    if FLAGS.sync_replicas:
      decay_steps /= FLAGS.replicas_to_aggregate

    if FLAGS.learning_rate_decay_type == 'exponential':
      return tf.train.exponential_decay(FLAGS.learning_rate,
                                        global_step,
                                        decay_steps,
                                        FLAGS.learning_rate_decay_factor,
                                        staircase=True,
                                        name='exponential_decay_learning_rate')
    elif FLAGS.learning_rate_decay_type == 'fixed':
      return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
    elif FLAGS.learning_rate_decay_type == 'polynomial':
      return tf.train.polynomial_decay(FLAGS.learning_rate,
                                       global_step,
                                       decay_steps,
                                       FLAGS.end_learning_rate,
                                       power=1.0,
                                       cycle=False,
                                       name='polynomial_decay_learning_rate')
    else:
      raise ValueError('learning_rate_decay_type [%s] was not recognized',
                       FLAGS.learning_rate_decay_type)


  def _configure_optimizer(learning_rate):
    """Configures the optimizer used for training.

    Args:
      learning_rate: A scalar or `Tensor` learning rate.

    Returns:
      An instance of an optimizer.

    Raises:
      ValueError: if FLAGS.optimizer is not recognized.
    """
    if FLAGS.optimizer == 'adadelta':
      optimizer = tf.train.AdadeltaOptimizer(
          learning_rate,
          rho=FLAGS.adadelta_rho,
          epsilon=FLAGS.opt_epsilon)
    elif FLAGS.optimizer == 'adagrad':
      optimizer = tf.train.AdagradOptimizer(
          learning_rate,
          initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
    elif FLAGS.optimizer == 'adam':
      optimizer = tf.train.AdamOptimizer(
          learning_rate,
          beta1=FLAGS.adam_beta1,
          beta2=FLAGS.adam_beta2,
          epsilon=FLAGS.opt_epsilon)
    elif FLAGS.optimizer == 'ftrl':
      optimizer = tf.train.FtrlOptimizer(
          learning_rate,
          learning_rate_power=FLAGS.ftrl_learning_rate_power,
          initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
          l1_regularization_strength=FLAGS.ftrl_l1,
          l2_regularization_strength=FLAGS.ftrl_l2)
    elif FLAGS.optimizer == 'momentum':
      optimizer = tf.train.MomentumOptimizer(
          learning_rate,
          momentum=FLAGS.momentum,
          name='Momentum')
    elif FLAGS.optimizer == 'rmsprop':
      optimizer = tf.train.RMSPropOptimizer(
          learning_rate,
          decay=FLAGS.rmsprop_decay,
          momentum=FLAGS.momentum,
          epsilon=FLAGS.opt_epsilon)
    elif FLAGS.optimizer == 'sgd':
      optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    else:
      raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
    return optimizer


  def _add_variables_summaries(learning_rate):
    summaries = []
    for variable in slim.get_model_variables():
      summaries.append(tf.summary.histogram(variable.op.name, variable))
    summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate))
    return summaries


  def _get_init_fn():
    """Returns a function run by the chief worker to warm-start the training.

    Note that the init_fn is only run when initializing the model during the very
    first global step.

    Returns:
      An init function run by the supervisor.
    """
    if FLAGS.checkpoint_path is None:
      return None

    # Warn the user if a checkpoint exists in the train_dir. Then we'll be
    # ignoring the checkpoint anyway.
    if tf.train.latest_checkpoint(FLAGS.train_dir):
      tf.logging.info(
          'Ignoring --checkpoint_path because a checkpoint already exists in %s'
          % FLAGS.train_dir)
      return None

    exclusions = []
    if FLAGS.checkpoint_exclude_scopes:
      exclusions = [scope.strip()
                    for scope in FLAGS.checkpoint_exclude_scopes.split(',')]

    # TODO(sguada) variables.filter_variables()
    variables_to_restore = []
    for var in slim.get_model_variables():
      excluded = False
      for exclusion in exclusions:
        if var.op.name.startswith(exclusion):
          excluded = True
          break
      if not excluded:
        variables_to_restore.append(var)

    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Fine-tuning from %s' % checkpoint_path)

    return slim.assign_from_checkpoint_fn(
        checkpoint_path,
        variables_to_restore,
        ignore_missing_vars=FLAGS.ignore_missing_vars)


  def _get_variables_to_train():
    """Returns a list of variables to train.

    Returns:
      A list of variables to train by the optimizer.
    """
    if FLAGS.trainable_scopes is None:
      return tf.trainable_variables()
    else:
      scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]

    variables_to_train = []
    for scope in scopes:
      variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
      variables_to_train.extend(variables)
    return variables_to_train

  # main
  cluster_spec, server = TFNode.start_cluster_server(ctx=ctx, num_gpus=FLAGS.num_gpus, rdma=FLAGS.rdma)
  if ctx.job_name == 'ps':
    # `ps` jobs wait for incoming connections from the workers.
    server.join()
  else:
    # `worker` jobs will actually do the work.
    if not FLAGS.dataset_dir:
      raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
      #######################
      # Config model_deploy #
      #######################
      deploy_config = model_deploy.DeploymentConfig(
          num_clones=FLAGS.num_clones,
          clone_on_cpu=FLAGS.clone_on_cpu,
          replica_id=FLAGS.task,
          num_replicas=FLAGS.worker_replicas,
          num_ps_tasks=FLAGS.num_ps_tasks)

      # Create global_step
      #with tf.device(deploy_config.variables_device()):
      #  global_step = slim.create_global_step()
      with tf.device("/job:ps/task:0"):
        global_step = tf.Variable(0, name="global_step")

      ######################
      # Select the dataset #
      ######################
      dataset = dataset_factory.get_dataset(
          FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

      ######################
      # Select the network #
      ######################
      network_fn = nets_factory.get_network_fn(
          FLAGS.model_name,
          num_classes=(dataset.num_classes - FLAGS.labels_offset),
          weight_decay=FLAGS.weight_decay,
          is_training=True)

      #####################################
      # Select the preprocessing function #
      #####################################
      preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
      image_preprocessing_fn = preprocessing_factory.get_preprocessing(
          preprocessing_name,
          is_training=True)

      ##############################################################
      # Create a dataset provider that loads data from the dataset #
      ##############################################################
      with tf.device(deploy_config.inputs_device()):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=FLAGS.num_readers,
            common_queue_capacity=20 * FLAGS.batch_size,
            common_queue_min=10 * FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        train_image_size = FLAGS.train_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, train_image_size, train_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)
        labels = slim.one_hot_encoding(
            labels, dataset.num_classes - FLAGS.labels_offset)
        batch_queue = slim.prefetch_queue.prefetch_queue(
            [images, labels], capacity=2 * deploy_config.num_clones)

      ####################
      # Define the model #
      ####################
      def clone_fn(batch_queue):
        """Allows data parallelism by creating multiple clones of network_fn."""
        images, labels = batch_queue.dequeue()
        logits, end_points = network_fn(images)

        #############################
        # Specify the loss function #
        #############################
        if 'AuxLogits' in end_points:
          tf.losses.softmax_cross_entropy(
              logits=end_points['AuxLogits'], onehot_labels=labels,
              label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss')
        tf.losses.softmax_cross_entropy(
            logits=logits, onehot_labels=labels,
            label_smoothing=FLAGS.label_smoothing, weights=1.0)
        return end_points

      # Gather initial summaries.
      summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

      clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
      first_clone_scope = deploy_config.clone_scope(0)
      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by network_fn.
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

      # Add summaries for end_points.
      end_points = clones[0].outputs
      for end_point in end_points:
        x = end_points[end_point]
        summaries.add(tf.summary.histogram('activations/' + end_point, x))
        summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                        tf.nn.zero_fraction(x)))

      # Add summaries for losses.
      for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
        summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

      # Add summaries for variables.
      for variable in slim.get_model_variables():
        summaries.add(tf.summary.histogram(variable.op.name, variable))

      #################################
      # Configure the moving averages #
      #################################
      if FLAGS.moving_average_decay:
        moving_average_variables = slim.get_model_variables()
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
      else:
        moving_average_variables, variable_averages = None, None

      #########################################
      # Configure the optimization procedure. #
      #########################################
      with tf.device(deploy_config.optimizer_device()):
        learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
        optimizer = _configure_optimizer(learning_rate)
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))

      if FLAGS.sync_replicas:
        # If sync_replicas is enabled, the averaging will be done in the chief
        # queue runner.
        optimizer = tf.train.SyncReplicasOptimizer(
            opt=optimizer,
            replicas_to_aggregate=FLAGS.replicas_to_aggregate,
            variable_averages=variable_averages,
            variables_to_average=moving_average_variables,
            replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
            total_num_replicas=FLAGS.worker_replicas)
      elif FLAGS.moving_average_decay:
        # Update ops executed locally by trainer.
        update_ops.append(variable_averages.apply(moving_average_variables))

      # Variables to train.
      variables_to_train = _get_variables_to_train()

      #  and returns a train_tensor and summary_op
      total_loss, clones_gradients = model_deploy.optimize_clones(
          clones,
          optimizer,
          var_list=variables_to_train)
      # Add total_loss to summary.
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Create gradient updates.
      grad_updates = optimizer.apply_gradients(clones_gradients,
                                               global_step=global_step)
      update_ops.append(grad_updates)

      update_op = tf.group(*update_ops)
      train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                        name='train_op')

      # Add the summaries from the first clone. These contain the summaries
      # created by model_fn and either optimize_clones() or _gather_clone_loss().
      summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                         first_clone_scope))

      # Merge all summaries together.
      summary_op = tf.summary.merge(list(summaries), name='summary_op')


      ###########################
      # Kicks off the training. #
      ###########################
      summary_writer = tf.summary.FileWriter("tensorboard_%d" %(ctx.worker_num), graph=tf.get_default_graph())
      slim.learning.train(
          train_tensor,
          logdir=FLAGS.train_dir,
          master=server.target,
          is_chief=(FLAGS.task == 0),
          init_fn=_get_init_fn(),
          summary_op=summary_op,
          number_of_steps=FLAGS.max_number_of_steps,
          log_every_n_steps=FLAGS.log_every_n_steps,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs,
          summary_writer=summary_writer,
          sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Пример #29
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    ###########################
    # Kicks off the training. #
    ###########################
    init = tf.global_variables_initializer()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)
    if FLAGS.checkpoint_path==FLAGS.train_dir:
        saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir))

    # load pretrained weights
    weight_ini_fn = _get_init_fn()
    weight_ini_fn(sess)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

    for step in range(FLAGS.max_number_of_steps):
        start_time = time.time()
        # _, loss_value = sess.run([train_tensor, loss])
        # _, loss_value = sess.run([train_tensor, total_loss])
        loss_value = sess.run(train_tensor)
        duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step % FLAGS.log_every_n_steps == 0:
            # num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
            num_examples_per_step = FLAGS.batch_size
            examples_per_sec = num_examples_per_step / duration
            # sec_per_batch = duration / FLAGS.num_gpus
            sec_per_batch = duration

            format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print(format_str % (step, loss_value,
                                examples_per_sec, sec_per_batch))

        if step % FLAGS.summary_snapshot_steps == 0:
            summary_str = sess.run(summary_op)
            summary_writer.add_summary(summary_str, step)

        # Save the model checkpoint periodically.
        if step % FLAGS.model_snapshot_steps == 0 or (step + 1) == FLAGS.max_number_of_steps:
            checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)

    print('OK...')
def main(_):

    tf.logging.set_verbosity(tf.logging.INFO)
    # Get image's height and width.
    height = 0
    width = 0
    with tf.gfile.GFile(FLAGS.image_file, 'rb') as f:
        with tf.Session().as_default() as sess:
            if FLAGS.image_file.lower().endswith('png'):
                image = sess.run(tf.image.decode_png(f.read()))
            else:
                image = sess.run(tf.image.decode_jpeg(f.read()))
            height = image.shape[0]
            width = image.shape[1]
    tf.logging.info('Image size: %dx%d' % (width, height))

    with tf.Graph().as_default():
        with tf.Session().as_default() as sess:

            # Read image data.
            image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn)
            print(image)
            plt.subplot(121)
            np_image = sess.run(image)
            plt.imshow(np_image)
            
            input_shape = (None, None, 3)
            input_tensor = tf.placeholder(dtype=tf.uint8, shape=input_shape, name='image_tensor')
            print(input_tensor)
            with tf.variable_scope("input_process"):   
                processed_image = utils.mean_image_subtraction(
                    input_tensor, [123.68, 116.779, 103.939])                    # Preprocessing image
                batched_image = tf.expand_dims(processed_image, 0)               # Add batch dimension

            generated = model.net(batched_image, training=False)
            
            generated = tf.cast(generated, tf.uint8)
            # Remove batch dimension
            generated = tf.squeeze(generated, [0],name='output_image')
            

            # Restore model variables.
            saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1)
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            # Use absolute path
            FLAGS.model_file = os.path.abspath(FLAGS.model_file)
            saver.restore(sess, FLAGS.model_file)
            
            summary_writer = tf.summary.FileWriter("logs",sess.graph)
            save_graph_to_file(sess,sess.graph_def ,"models/new_freeze_graph.pb") 
            
             
            # Make sure 'generated' directory exists.
            generated_file = 'generated/res.jpg'
            if os.path.exists('generated') is False:
                os.makedirs('generated')

            # Generate and write image data to file.
            with tf.gfile.GFile(generated_file, 'wb') as f:
                feed_dict={input_tensor:np_image}
                plt.subplot(122)
                plt.imshow(sess.run(generated,feed_dict))
                plt.show()
                start_time = time.time()
                f.write(sess.run(tf.image.encode_jpeg(generated),feed_dict))
                end_time = time.time()
                tf.logging.info('Elapsed time: %fs' % (end_time - start_time))
                tf.logging.info('Done. Please check %s.' % generated_file)
Пример #31
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label, image_name] = provider.get(['image', 'label', 'name'])
        print(image, label, image_name)
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        labels = slim.one_hot_encoding(
            label, dataset.num_classes - FLAGS.labels_offset)
        labels = tf.reduce_sum(labels, axis=0)
        images, labels, image_names = tf.train.batch(
            [image, labels, image_name],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        print(images, labels)
        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)
        print(logits)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.greater(logits, .7)
        labels = tf.cast(labels, tf.bool)

        match = tf.reduce_all(tf.equal(predictions, labels), axis=1)

        accuracy = slim.metrics.streaming_percentage_less(
            tf.cast(match, tf.float32), 0.5)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            accuracy,
            'Precision':
            slim.metrics.streaming_recall(predictions, labels),
            'Recall':
            slim.metrics.streaming_precision(predictions, labels)
            #'Recall_5': slim.metrics.streaming_recall_at_k(
            #    logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        init = tf.global_variables_initializer()
        from tensorflow.python.training import saver as tf_saver
        saver = tf_saver.Saver(variables_to_restore)
        all_logits = []
        all_labels = []
        all_names = []
        try:
            with tf.Session() as sess:
                sess.run(init)
                saver.restore(sess, checkpoint_path)

                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                print('RUNNING')
                i = 0
                while not coord.should_stop():
                    print(i)
                    logits_val, labels_val, name_val = sess.run(
                        [logits, labels, image_names])
                    all_logits.append(logits_val)
                    all_labels.append(labels_val)
                    all_names.append(name_val)
                    i += 1
                    if i == num_batches:
                        break
        except tf.errors.OutOfRangeError:
            print('Done')
        finally:
            coord.request_stop()

        import numpy as np
        all_logits = np.concatenate(all_logits)
        all_labels = np.concatenate(all_labels)
        all_names = np.concatenate(all_names)

        np.savez('%s.%s' %
                 (checkpoint_path, '%s-val.npz' % FLAGS.dataset_split_name),
                 logits=all_logits,
                 labels=all_labels,
                 names=all_names)

        coord.join(threads)
        sess.close()
Пример #32
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    tf_global_step = slim.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=False,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size)
    [image, label, coarse_label] = provider.get(
        ['image', 'label', 'coarse_label'])
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

#    image = tf.image.grayscale_to_rgb(image)

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

    images, labels, coarse_labels = tf.train.batch(
        [image, label, coarse_label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)
    coarse_labels = tf.cast(coarse_labels, tf.int32)
    tf.image_summary('image', images, max_images=5)

    ####################
    # Define the model #
    ####################
    logits, _ = network_fn(images)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    one_hot_labels = slim.one_hot_encoding(labels, 2)
    loss = slim.losses.softmax_cross_entropy(logits, one_hot_labels)

    predictions = tf.argmax(logits, 1)
    labels = tf.squeeze(labels)

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Total_Loss': slim.metrics.streaming_mean(loss),
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
    })

  with tf.variable_scope('coarse_label_accuracy',
                         values=[predictions, labels, coarse_labels]):
    totals = tf.Variable(
        initial_value=tf.zeros([len(dataset.coarse_labels_to_names)]),
        trainable=False,
        collections=[tf.GraphKeys.LOCAL_VARIABLES],
        dtype=tf.float32,
        name='totals')

    counts = tf.Variable(
        initial_value=tf.zeros([len(dataset.coarse_labels_to_names)]),
        trainable=False,
        collections=[tf.GraphKeys.LOCAL_VARIABLES],
        dtype=tf.float32,
        name='counts')

    correct = tf.cast(tf.equal(predictions, labels), tf.int32)
    accuracy_ops = []
    for index, coarse_key in list(enumerate(dataset.coarse_labels_to_names)):
      label_correct = tf.boolean_mask(correct, tf.equal(coarse_key, coarse_labels))
      sum_correct = tf.reduce_sum(label_correct)
      sum_correct = tf.cast(tf.expand_dims(sum_correct, 0), tf.float32)
      delta_totals = tf.SparseTensor([[index]], sum_correct, totals.get_shape())
      label_count = tf.cast(tf.shape(label_correct), tf.float32)
      delta_counts = tf.SparseTensor([[index]], label_count, counts.get_shape())

      totals_compute_op = tf.assign_add(
          totals,
          tf.sparse_tensor_to_dense(delta_totals),
          use_locking=True)
      counts_compute_op = tf.assign_add(
          counts,
          tf.sparse_tensor_to_dense(delta_counts),
          use_locking=True)

      accuracy_ops.append(totals_compute_op)
      accuracy_ops.append(counts_compute_op)
    with tf.control_dependencies(accuracy_ops):
      update_op = tf.select(tf.equal(counts, 0),
                            tf.zeros_like(counts, tf.float32),
                            tf.div(totals, counts))
      names_to_updates['Coarse_Label_Accuracy'] = update_op

    if FLAGS.recall:
      recall_value, recall_update = slim.metrics.streaming_recall_at_k(
          logits, labels, 5)
      names_to_values['Recall@5'] = recall_value
      names_to_updates['Recall@5'] = recall_update

    # Print the summaries to screen.
    # TODO(vonclites) list(d.items()) is for Python 3... check compatibility
    for name, value in list(names_to_values.items()):
      summary_name = 'eval/%s' % name
      op = tf.scalar_summary(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    for index, label_name in list(enumerate(dataset.coarse_labels_to_names.values())):
      summary_name = 'eval/%s' % label_name
      op = tf.scalar_summary(summary_name, update_op[index], collections=[])
      op = tf.Print(op, [update_op[index]], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

#    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
#      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
#    else:
#      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Evaluating %s' % FLAGS.checkpoint_path)

    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_path,
        logdir=FLAGS.eval_dir,
        num_evals=num_batches,
        eval_op=list(names_to_updates.values()),
        eval_interval_secs=FLAGS.eval_interval_secs,
        variables_to_restore=slim.get_variables_to_restore())
Пример #33
0
    tf.contrib.data.parallel_interleave(tf.data.TFRecordDataset,
                                        cycle_length=8))
ds = ds.map(decode, num_parallel_calls=16)
ds = ds.apply(
    tf.contrib.data.shuffle_and_repeat(buffer_size=batch_size * 16, seed=1234))
ds = ds.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
ds = ds.prefetch(buffer_size=batch_size * 16)

iterator = tf.data.Iterator.from_structure(ds.output_types, ds.output_shapes)
images, labels = iterator.get_next()

training_init_op = iterator.make_initializer(ds)

# In[5]:

image_prep_fn = preprocessing_factory.get_preprocessing('inception_v1',
                                                        is_training=False)
images_preped = image_prep_fn(images, None, None)
print images, images_preped

import model
class_logits = model.build_net(images_preped, num_classes, True, args.model)

labels_oh = tf.one_hot(labels,
                       num_classes,
                       on_value=1.,
                       off_value=0.,
                       dtype=tf.float32)

cls_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_oh,
                                                      logits=class_logits)
cls_loss = tf.reduce_mean(cls_loss)
Пример #34
0
def main(FLAGS):
    style_features_t = losses.get_style_features(FLAGS)
    training_path = os.path.join(FLAGS.model_path, FLAGS.naming)
    if not (os.path.exists(training_path)):
        os.makedirs(training_path)

    with tf.Graph().as_default():
        with tf.Session() as sess:
            """创建Network"""
            network_fn = nets_factory.get_network_fn(
                FLAGS.loss_model,
                num_classes=1,
                is_training=False)

            image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)

            """训练图片预处理"""
            processed_images = reader.batch_image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size,
                                                  'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch)
            generated = model.transform_network(processed_images, training=True)
            processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
                                   for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size)
                                   ]
            processed_generated = tf.stack(processed_generated)
            _, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False)
            tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):')
            for key in endpoints_dict:
                tf.logging.info(key)

            """创建 Losses"""
            content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers)
            style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers)
            tv_loss = losses.total_variation_loss(generated)  # use the unprocessed image

            loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss

            """准备训练"""
            global_step = tf.Variable(0, name="global_step", trainable=False)
            variable_to_train = []
            for variable in tf.trainable_variables():
                # 只训练和保存生成网络中的变量
                if not (variable.name.startswith(FLAGS.loss_model)):
                    variable_to_train.append(variable)

            """优化"""
            train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train)

            variables_to_restore = []
            for v in tf.global_variables():
                if not (v.name.startswith(FLAGS.loss_model)):
                    variables_to_restore.append(v)
            saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1)
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            init_func = utils._get_init_fn(FLAGS)
            init_func(sess)
            last_file = tf.train.latest_checkpoint(training_path)
            if last_file:
                tf.logging.info('Restoring model from {}'.format(last_file))
                saver.restore(sess, last_file)

            """开始训练"""
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            start_time = time.time()
            try:
                while not coord.should_stop():
                    _, loss_t, step = sess.run([train_op, loss, global_step])
                    elapsed_time = time.time() - start_time
                    start_time = time.time()
                    if step % 10 == 0:
                        tf.logging.info(
                            'step: %d,  total Loss %f, secs/step: %f,%s' % (step, loss_t, elapsed_time, time.asctime()))
                    """checkpoint"""
                    if step % 50 == 0:
                        tf.logging.info('saving check point...')
                        saver.save(sess, os.path.join(training_path, FLAGS.naming + '.ckpt'), global_step=step)
            except tf.errors.OutOfRangeError:
                saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done'))
                tf.logging.info('Done training -- epoch limit reached')
            finally:
                coord.request_stop()
                tf.logging.info('coordinator stop')
            coord.join(threads)
def use_tensorflow_get_feature(base_path, save_path):

    checkpoint = tf.train.get_checkpoint_state('/home/lee/Downloads/logs/')

    input_checkpoint = checkpoint.model_checkpoint_path

    network_fn = nets_factory.get_network_fn('resnet_v1_50',
                                             num_classes=100000,
                                             is_training=False)

    placeholder = tf.placeholder(name='input',
                                 dtype=tf.float32,
                                 shape=[None, 224, 224, 3])
    network_fn(placeholder)

    saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    saver.restore(sess, input_checkpoint)

    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        'resnet_v1_50', is_training=False)

    img_pla = tf.placeholder(dtype=tf.float32,
                             shape=[None, None, 3],
                             name='img')

    image_preprocessing = image_preprocessing_fn(img_pla, 224, 224)

    def format_feature(class_name, feature, image_filepath):
        return [feature, class_name, image_filepath]

    class_name_and_path_list = [
        [floder, os.path.join(base_path, floder)]
        for floder in os.listdir(base_path)
        if os.path.isdir(os.path.join(base_path, floder))
    ]
    #print class_name_and_path_list
    max_num = 0
    for class_name_and_path in class_name_and_path_list:
        image_path_list = [
            os.path.join(class_name_and_path[1], image_file)
            for image_file in os.listdir(class_name_and_path[1])
            if image_file.endswith('.jpg')
        ]
        max_num = max_num + len(image_path_list)
    now_num = 0
    last_num = 0
    last_time = time.time()
    for class_name_and_path in class_name_and_path_list:
        image_path_list = [
            os.path.join(class_name_and_path[1], image_file)
            for image_file in os.listdir(class_name_and_path[1])
            if image_file.endswith('.jpg')
        ]
        if os.path.exists(
                os.path.join(
                    class_name_and_path[1].replace(base_path, save_path),
                    'tensorflow-resnet-50_feature.npy')):
            t_npy = np.load(
                os.path.join(
                    class_name_and_path[1].replace(base_path, save_path),
                    'tensorflow-resnet-50_feature.npy'))
            if len(image_path_list) == len(image_path_list):
                now_num = now_num + len(image_path_list)
                continue

        feature_list = []
        all_image_list = []
        image_list = []
        for image_path in image_path_list:
            image = imutils.opencv2matplotlib(cv2.imread(image_path))
            image_list.append(
                sess.run(image_preprocessing, feed_dict={'img:0': image}))
            if len(image_list) > 63:
                all_image_list.append(image_list)
                image_list = []

        all_image_list.append(image_list)

        result_list = []
        if len(all_image_list) != 0:
            for image_list in all_image_list:
                #print image_list
                if len(image_list) != 0:
                    temp = sess.run("resnet_v1_50/pool5:0",
                                    feed_dict={'input:0': image_list})
                    for j in temp:
                        result_list.append(np.ravel(j))

            #print len(result_list)
            for idx, featrue in enumerate(result_list):
                #print format_feature(feature=featrue,
                #                                class_name=class_name_and_path[0], image_filepath=image_path_list[idx])
                feature_list.append(
                    format_feature(feature=featrue,
                                   class_name=class_name_and_path[0],
                                   image_filepath=image_path_list[idx]))
            now_num = now_num + len(image_path_list)
            print '正在提取中...%d/%d (%.2f/sec)' % (now_num, max_num,
                                                (now_num - last_num) /
                                                float(time.time() - last_time))
            last_num = now_num
            last_time = time.time()
            file_tools.check_fold(class_name_and_path[1].replace(
                base_path, save_path))
            np.save(
                os.path.join(
                    class_name_and_path[1].replace(base_path, save_path),
                    'tensorflow-resnet-50_feature.npy'), feature_list)
        else:
            print '发现文件夹分布不符合规范'
    max_num = 0
    for class_name_and_path in class_name_and_path_list:
        image_path_list = [
            os.path.join(class_name_and_path[1].replace(base_path, save_path),
                         image_file)
            for image_file in os.listdir(class_name_and_path[1])
            if image_file.endswith('.jpg')
        ]
        max_num = max_num + len(image_path_list)

    now_num = 0

    feature_list = []
    class_name_list = []
    file_path_list = []

    for class_name_and_path in class_name_and_path_list:
        image_path_list = [
            os.path.join(class_name_and_path[1].replace(base_path, save_path),
                         image_file)
            for image_file in os.listdir(class_name_and_path[1])
            if image_file.endswith('.jpg')
        ]
        npy = np.load(
            os.path.join(class_name_and_path[1].replace(base_path, save_path),
                         'tensorflow-resnet-50_feature.npy'))
        for t in npy:
            feature_list.append(t[0])
            class_name_list.append(t[1])
            file_path_list.append(t[2])
            now_num = now_num + 1
        print '正在提取中...%d / %d' % (now_num, max_num)

    np.save(os.path.join(save_path, 'tensorflow-feature.npy'), feature_list)
    np.save(os.path.join(save_path, 'tensorflow-class_name.npy'),
            class_name_list)
    np.save(os.path.join(save_path, 'tensorflow-file_path.npy'),
            file_path_list)
Пример #36
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    tf_global_step = slim.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=False,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size)
    [image, label] = provider.get(['image', 'label'])
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

    images, labels = tf.train.batch(
        [image, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

    ####################
    # Define the model #
    ####################
    logits, _ = network_fn(images)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    predictions = tf.argmax(logits, 1)
    labels = tf.squeeze(labels)

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        'Recall_5': slim.metrics.streaming_recall_at_k(
            logits, labels, 5),
    })

    # Print the summaries to screen.
    for name, value in names_to_values.items():
      summary_name = 'eval/%s' % name
      op = tf.summary.scalar(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Evaluating %s' % checkpoint_path)

    slim.evaluation.evaluate_once(
        master=FLAGS.master,
        checkpoint_path=checkpoint_path,
        logdir=FLAGS.eval_dir,
        num_evals=num_batches,
        eval_op=list(names_to_updates.values()),
        variables_to_restore=variables_to_restore)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)
        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
def main_fun(argv, ctx):
  import math
  import six
  import tensorflow as tf

  from datasets import dataset_factory
  from nets import nets_factory
  from preprocessing import preprocessing_factory

  sys.argv = argv

  slim = tf.contrib.slim

  tf.app.flags.DEFINE_integer(
      'batch_size', 100, 'The number of samples in each batch.')

  tf.app.flags.DEFINE_integer(
      'max_num_batches', None,
      'Max number of batches to evaluate by default use all.')

  tf.app.flags.DEFINE_string(
      'master', '', 'The address of the TensorFlow master to use.')

  tf.app.flags.DEFINE_string(
      'checkpoint_path', '/tmp/tfmodel/',
      'The directory where the model was written to or an absolute path to a '
      'checkpoint file.')

  tf.app.flags.DEFINE_string(
      'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.')

  tf.app.flags.DEFINE_integer(
      'num_preprocessing_threads', 4,
      'The number of threads used to create the batches.')

  tf.app.flags.DEFINE_string(
      'dataset_name', 'imagenet', 'The name of the dataset to load.')

  tf.app.flags.DEFINE_string(
      'dataset_split_name', 'test', 'The name of the train/test split.')

  tf.app.flags.DEFINE_string(
      'dataset_dir', None, 'The directory where the dataset files are stored.')

  tf.app.flags.DEFINE_integer(
      'labels_offset', 0,
      'An offset for the labels in the dataset. This flag is primarily used to '
      'evaluate the VGG and ResNet architectures which do not use a background '
      'class for the ImageNet dataset.')

  tf.app.flags.DEFINE_string(
      'model_name', 'inception_v3', 'The name of the architecture to evaluate.')

  tf.app.flags.DEFINE_string(
      'preprocessing_name', None, 'The name of the preprocessing to use. If left '
      'as `None`, then the model_name flag is used.')

  tf.app.flags.DEFINE_float(
      'moving_average_decay', None,
      'The decay to use for the moving average.'
      'If left as None, then moving averages are not used.')

  tf.app.flags.DEFINE_integer(
      'eval_image_size', None, 'Eval image size')

  FLAGS = tf.app.flags.FLAGS

  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  cluster_spec, server = TFNode.start_cluster_server(ctx)

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #tf_global_step = slim.get_or_create_global_step()
    tf_global_step = tf.Variable(0, name="global_step")

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=False,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size)
    [image, label] = provider.get(['image', 'label'])
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

    images, labels = tf.train.batch(
        [image, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

    ####################
    # Define the model #
    ####################
    logits, _ = network_fn(images)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    predictions = tf.argmax(logits, 1)
    labels = tf.squeeze(labels)

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        'Recall_5': slim.metrics.streaming_recall_at_k(
            logits, labels, 5),
    })

    # Print the summaries to screen.
    for name, value in six.iteritems(names_to_values):
      summary_name = 'eval/%s' % name
      op = tf.summary.scalar(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Evaluating %s' % checkpoint_path)

    slim.evaluation.evaluate_once(
        master=FLAGS.master,
        checkpoint_path=checkpoint_path,
        logdir=FLAGS.eval_dir,
        num_evals=num_batches,
        eval_op=list(names_to_updates.values()),
        variables_to_restore=variables_to_restore)
Пример #39
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=False)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            #       label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size
            print(train_image_size)

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        images, labels = batch_queue.dequeue()
        print(images, labels)
        logits, end_points = network_fn(images)

        labels_to_class_names = dataset_utils.read_label_file(
            FLAGS.dataset_dir, filename='labels.txt')
        print(labels_to_class_names)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            images_np, labels_np = sess.run([images, labels])
            print(images_np.shape, labels_np.shape)

            for i in range(10):
                image_np, label_np = sess.run([images, labels])

                plt.imshow(image_np[0, :, :, :])
                plt.title('label name:' +
                          str(labels_to_class_names[np.argmax(label_np[0])]))
                plt.show()


#             cv2.imshow('label name:',cv2.cvtColor(image_np[0,:,:,:],cv2.COLOR_RGB2BGR))
#             print(labels_to_class_names[np.argmax(label_np[0])])
#             cv2.waitKey(0)

            coord.request_stop()
            coord.join(threads)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        ssd_model = ssd_vgg_300.SSDNet()
        ssd_model.set_batch_size(FLAGS.batch_size)
        network_fn = nets_factory.get_network_fn(ssd_model, is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            common_queue_capacity=20 * FLAGS.batch_size,
            common_queue_min=10 * FLAGS.batch_size)
        [image, labels,
         bboxes] = provider.get(['image', 'object/label', 'object/bbox'])
        labels -= FLAGS.labels_offset

        if FLAGS.remove_difficult:
            difficults_gt = provider.get(['object/difficult'])
        else:
            difficults_gt = tf.zeros(tf.shape(labels), dtype=tf.int64)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name)

        eval_image_size_height = FLAGS.eval_image_size_height or ssd_model.ssd_params.image_size[
            0]
        eval_image_size_width = FLAGS.eval_image_size_width or ssd_model.ssd_params.image_size[
            1]

        image, labels_gt, bboxes_gt = image_preprocessing_fn(
            image,
            labels,
            bboxes,
            eval_image_size_height,
            eval_image_size_width,
            data_format=DATA_FORMAT,
            is_training=False)

        anchors = ssd_model.anchors_for_all_layer()
        labels_en, scores_en, bboxes_en = ssd_model.bboxes_encode(
            anchors, labels_gt, bboxes_gt)

        images, labels_gt, bboxes_gt, difficults_gt, labels_en, scores_en, bboxes_en = \
            tf.train.batch(
            [image, labels_gt, bboxes_gt, difficults_gt,labels_en, scores_en, bboxes_en],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size,
            dynamic_pad=True)

        ################################
        # SSD Model + outputs decoding #
        ################################
        logits, locs, endpoints = network_fn(images)
        ssd_model.ssd_class_and_loc_losses(logits, locs, labels_en, bboxes_en,
                                           scores_en)

        # Performing post_processing on CPU: loop-intensive, usually more efficient.
        with tf.device('/device:CPU:0'):
            # Detect objects from SSD Model outputs
            locs_aggr = ssd_model.bboxes_decode(locs, anchors)
            scores_nms, bboxes_nms = ssd_model.detected_bboxes(
                logits, locs_aggr, FLAGS.select_threshold, FLAGS.nms_threshold,
                FLAGS.select_top_k, FLAGS.keep_top_k)

            num_bboxes_gt, tp, fp = bboxes_matching_batch(
                scores_nms.keys(),
                scores_nms,
                bboxes_nms,
                labels_gt,
                bboxes_gt,
                difficults_gt,
                matching_threshold=FLAGS.matching_threshold)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        # Define the metrics:
        with tf.device('/device:CPU:0'):
            dict_metrics = {}
            # First add all losses.
            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
            # Extra losses as well.
            for loss in tf.get_collection('EXTRA_LOSSES'):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)

            # Add metrics to summaries and Print on screen.
            for name, metric in dict_metrics.items():
                # summary_name = 'eval/%s' % name
                summary_name = name
                op = tf.summary.scalar(summary_name, metric[0], collections=[])
                # op = tf.Print(op, [metric[0]], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # FP and TP metrics.
            tp_fp_metric = streaming_tp_fp_arrays(num_bboxes_gt, tp, fp,
                                                  scores_nms)
            for c in tp_fp_metric[0].keys():
                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
                                                tp_fp_metric[1][c])

            # Add to summaries precision/recall values.
            aps_voc12 = {}
            for c in tp_fp_metric[0].keys():
                # Precison and recall values.
                prec, rec = precision_recall(*tp_fp_metric[0][c])

                # Average precision VOC12.
                v = average_precision_voc12(prec, rec)
                summary_name = 'AP_VOC12/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                # op = tf.Print(op, [v], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc12[c] = v

            # Mean average precision VOC12.
            summary_name = 'AP_VOC12/mAP'
            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # Split into values and updates ops.
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
            dict_metrics)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
Пример #41
0
def main(FLAGS):
    style_features_t = losses.get_style_features(FLAGS)

    # Make sure the training path exists.
    training_path = os.path.join(FLAGS.model_path, FLAGS.naming)
    if not(os.path.exists(training_path)):
        os.makedirs(training_path)

    with tf.Graph().as_default():
        with tf.Session() as sess:
            """Build Network"""
            network_fn = nets_factory.get_network_fn(
                FLAGS.loss_model,
                num_classes=1,
                is_training=False)

            image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
                FLAGS.loss_model,
                is_training=False)
            processed_images = reader.image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size,
                                            'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch)
            generated = model.net(processed_images, training=True)
            processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
                                   for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size)
                                   ]
            processed_generated = tf.stack(processed_generated)
            _, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False)

            # Log the structure of loss network
            tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):')
            for key in endpoints_dict:
                tf.logging.info(key)

            """Build Losses"""
            content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers)
            style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers)
            tv_loss = losses.total_variation_loss(generated)  # use the unprocessed image

            loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss

            # Add Summary for visualization in tensorboard.
            """Add Summary"""
            tf.summary.scalar('losses/content_loss', content_loss)
            tf.summary.scalar('losses/style_loss', style_loss)
            tf.summary.scalar('losses/regularizer_loss', tv_loss)

            tf.summary.scalar('weighted_losses/weighted_content_loss', content_loss * FLAGS.content_weight)
            tf.summary.scalar('weighted_losses/weighted_style_loss', style_loss * FLAGS.style_weight)
            tf.summary.scalar('weighted_losses/weighted_regularizer_loss', tv_loss * FLAGS.tv_weight)
            tf.summary.scalar('total_loss', loss)

            for layer in FLAGS.style_layers:
                tf.summary.scalar('style_losses/' + layer, style_loss_summary[layer])
            tf.summary.image('generated', generated)
            # tf.image_summary('processed_generated', processed_generated)  # May be better?
            tf.summary.image('origin', tf.stack([
                image_unprocessing_fn(image) for image in tf.unstack(processed_images, axis=0, num=FLAGS.batch_size)
            ]))
            summary = tf.summary.merge_all()
            writer = tf.summary.FileWriter(training_path)

            """Prepare to Train"""
            global_step = tf.Variable(0, name="global_step", trainable=False)

            variable_to_train = []
            for variable in tf.trainable_variables():
                if not(variable.name.startswith(FLAGS.loss_model)):
                    variable_to_train.append(variable)
            train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train)

            variables_to_restore = []
            for v in tf.global_variables():
                if not(v.name.startswith(FLAGS.loss_model)):
                    variables_to_restore.append(v)
            saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1)

            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

            # Restore variables for loss network.
            init_func = utils._get_init_fn(FLAGS)
            init_func(sess)

            # Restore variables for training model if the checkpoint file exists.
            last_file = tf.train.latest_checkpoint(training_path)
            if last_file:
                tf.logging.info('Restoring model from {}'.format(last_file))
                saver.restore(sess, last_file)

            """Start Training"""
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            start_time = time.time()
            try:
                while not coord.should_stop():
                    _, loss_t, step = sess.run([train_op, loss, global_step])
                    elapsed_time = time.time() - start_time
                    start_time = time.time()
                    """logging"""
                    # print(step)
                    if step % 10 == 0:
                        tf.logging.info('step: %d,  total Loss %f, secs/step: %f' % (step, loss_t, elapsed_time))
                    """summary"""
                    if step % 25 == 0:
                        tf.logging.info('adding summary...')
                        summary_str = sess.run(summary)
                        writer.add_summary(summary_str, step)
                        writer.flush()
                    """checkpoint"""
                    if step % 1000 == 0:
                        saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt'), global_step=step)
            except tf.errors.OutOfRangeError:
                saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done'))
                tf.logging.info('Done training -- epoch limit reached')
            finally:
                coord.request_stop()
            coord.join(threads)
Пример #42
0
def main(_):
    if not FLAGS.input_dir:
        raise ValueError(
            'You must supply the input directory with --input_dir')
    if not FLAGS.output_dir:
        raise ValueError(
            'You must supply the dataset directory with --output_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():

        # Preprocess the images so that they all have the same size
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size
        orig_image = tf.placeholder(tf.uint8, shape=(None, None, 3))
        image = image_preprocessing_fn(orig_image, orig_image, eval_image_size,
                                       eval_image_size)
        images = tf.expand_dims(image, 0)

        # Add noise.
        noisy_batch, alpha, sigma = sensor_model.sensor_noise_rand_light_level(
            images, [FLAGS.ll_low, FLAGS.ll_high],
            scale=1.0,
            sensor=FLAGS.sensor)

        bayer_mask = sensor_model.get_bayer_mask(eval_image_size,
                                                 eval_image_size)
        inputs = noisy_batch * bayer_mask

        if not os.path.isdir(FLAGS.output_dir):
            os.mkdir(FLAGS.output_dir)

        with tf.Session() as sess:
            count = 0
            synsets = [
                path for path in os.listdir(FLAGS.input_dir) if not '.' in path
            ]

            for synset in synsets:
                path = os.path.join(FLAGS.input_dir, synset)
                image_names = os.listdir(path)
                print("Found %d images in %s" % (len(image_names), synset))

                synset_path = os.path.join(FLAGS.output_dir, synset)
                if not os.path.isdir(synset_path):
                    os.mkdir(synset_path)

                for imagename in image_names:
                    output_imgfn = os.path.join(
                        FLAGS.output_dir, synset,
                        imagename.split('.')[0] + '.png')
                    if os.path.isfile(output_imgfn):
                        continue
                    loaded_image = cv2.imread(os.path.join(path, imagename))

                    # BGR to RGB
                    loaded_image = loaded_image[..., ::-1]
                    images, alpha_val, sigma_val = sess.run(
                        [inputs, alpha, sigma],
                        feed_dict={orig_image: loaded_image})
                    img = (255.0 * images[0, :, :, :]).astype(np.uint8)

                    # RGB to BGR
                    img = img[..., ::-1]

                    if count % 1000 == 0:
                        print("%d processed images." % (count))
                    cv2.imwrite(output_imgfn, img)
                    count += 1

        print('Total images processed:', count)
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')
  times = {}
  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    start = time.time()
    tf_global_step = slim.get_or_create_global_step()
    times['global_step'] = time.time() - start

    ######################
    # Select the dataset #

    start = time.time()
    dataset = dataset_factory.get_dataset(
      FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir, suffix=FLAGS.dataset_name_suffix)
    times['get_dataset'] = time.time() - start

    ####################
    # Select the model #
    ####################
    start = time.time()
    network_fn = nets_factory.get_network_fn(
      FLAGS.model_name,
      num_classes=(dataset.num_classes - FLAGS.labels_offset),
      is_training=False)
    times['select_model'] = time.time() - start

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    start = time.time()
    provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      shuffle=False,
      common_queue_capacity=2 * FLAGS.batch_size,
      common_queue_min=FLAGS.batch_size)
    times['get_provider'] = time.time() - start
    start = time.time()
    [image] = provider.get(['image'])
    times['get_image'] = time.time() - start

    #####################################
    # Select the preprocessing function #
    #####################################
    start = time.time()
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
      preprocessing_name,
      is_training=False)
    times['get_preprocessing'] = time.time() - start

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

    start = time.time()
    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)
    times['preprocessing'] = time.time() - start

    start = time.time()
    images = tf.train.batch(
      [image],
      batch_size=FLAGS.batch_size,
      num_threads=FLAGS.num_preprocessing_threads,
      capacity=5 * FLAGS.batch_size)
    times['get_batch'] = time.time() - start

    start = time.time()
    tf.image_summary('test_images', images, FLAGS.batch_size)
    times['image_summary'] = time.time() - start

    ####################
    # Define the model #
    ####################
    start = time.time()
    logits, _ = network_fn(images)
    times['do_network'] = time.time() - start

    # with tf.variable_scope('resnet_v2_152/block1/unit_1/bottleneck_v2/conv1', reuse=True):
    #   weights = tf.get_variable('weights')
    #   kernel_transposed = put_kernels_on_grid(weights)
    # scale weights to [0 1], type is still float
    # x_min = tf.reduce_min(weights)
    # x_max = tf.reduce_max(weights)
    # kernel_0_to_1 = (weights - x_min) / (x_max - x_min)
    #
    # # to tf.image_summary format [batch_size, height, width, channels]
    # kernel_transposed = tf.transpose(kernel_0_to_1, [3, 0, 1, 2])

    # this will display random 3 filters from the 64 in conv1
    # tf.image_summary('conv1/filters', kernel_transposed, max_images=50)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
        slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    if len(logits.get_shape()) == 4:
      logits = tf.reshape(logits, [int(logits.get_shape()[0]), -1])

    softmax = tf.nn.softmax(logits)
    # predictions = tf.argmax(logits, 1)

    # Define the metrics:
    # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
    # 'Predictions': predictions,
    # 'Predictions': slim.metrics.streaming_accuracy(predictions, labels),
    # 'Predictions@5': slim.metrics.streaming_recall_at_k(
    #   logits, labels, 5),
    # })

    # Print the summaries to screen.
    # for name, value in names_to_values.iteritems():
    #   summary_name = 'eval/%s' % name
    #   op = tf.scalar_summary(summary_name, value, collections=[])
    #   op = tf.Print(op, [value], summary_name)
    #   tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

    start = time.time()
    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path
    times['load_checkpoint'] = time.time() - start

    tf.logging.info('Evaluating %s' % checkpoint_path)
    # evaluate_loop

    from tensorflow.contrib.framework.python.ops import variables
    from tensorflow.core.protobuf import saver_pb2
    from tensorflow.python.training import saver as tf_saver
    from tensorflow.python.framework import ops
    from tensorflow.python.training import supervisor
    saver = tf_saver.Saver(
      variables_to_restore or variables.get_variables_to_restore(),
      write_version=saver_pb2.SaverDef.V1)
    sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                               logdir=FLAGS.eval_dir,
                               summary_op=None,
                               summary_writer=None,
                               global_step=None,
                               saver=None)
    # init = tf.initialize_all_variables()
    # sess = tf.Session()
    with sv.managed_session(FLAGS.master, start_standard_services=False) as sess:
      # sess.run(init)
      saver.restore(sess, checkpoint_path)
      sv.start_queue_runners(sess)
      start = time.time()
      final_op_value = sess.run(logits)
      # final_op_value = slim.evaluation.evaluate_once(
      #   master=FLAGS.master,
      #   checkpoint_path=checkpoint_path,
      #   logdir=FLAGS.eval_dir,
      #   num_evals=num_batches,
      #   final_op=[softmax, logits],
      #   # eval_op=names_to_updates.values(),
      #   variables_to_restore=variables_to_restore)
      times['exec'] = time.time() - start

    print(final_op_value[1].shape)
    result_predict = np.reshape(final_op_value[1], (FLAGS.batch_size, final_op_value[1].shape[-1]))
    # print(final_op_value)
    print(result_predict)
    print(np.argsort(result_predict[:, 1])[-5:])
  print(times)
Пример #44
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():
        # Config model_deploy. Keep TF Slim Models structure.
        # Useful if want to need multiple GPUs and/or servers in the future.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)
        # Create global_step.
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        tf_utils.print_configuration(FLAGS.__flags, ssd_params,
                                     dataset.data_sources, FLAGS.train_dir)
        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device(deploy_config.inputs_device()):
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    num_readers=FLAGS.num_readers,
                    common_queue_capacity=20 * FLAGS.batch_size,
                    common_queue_min=10 * FLAGS.batch_size,
                    shuffle=True)
            # Get for SSD network: image, labels, bboxes.
            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
                                                             'object/label',
                                                             'object/bbox'])
            # Pre-processing image, labels and bboxes.
            image, glabels, gbboxes = \
                image_preprocessing_fn(image, glabels, gbboxes,
                                       out_shape=ssd_shape,
                                       data_format=DATA_FORMAT)
            # Encode groundtruth labels and bboxes.
            gclasses, glocalisations, gscores = \
                ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] + [len(ssd_anchors)] * 3

            # Training batches and queue.
            r = tf.train.batch(
                tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(r, batch_shape)

            # Intermediate queueing: unique batch computation pipeline for all
            # GPUs running the training.
            batch_queue = slim.prefetch_queue.prefetch_queue(
                tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]),
                capacity=2 * deploy_config.num_clones)

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple
            clones of network_fn."""
            # Dequeue batch.
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

            # Construct SSD network.
            arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
                                          data_format=DATA_FORMAT)
            with slim.arg_scope(arg_scope):
                predictions, localisations, logits, end_points = \
                    ssd_net.net(b_image, is_training=True)
            # Add loss function.
            ssd_net.losses(logits, localisations,
                           b_gclasses, b_glocalisations, b_gscores,
                           match_threshold=FLAGS.match_threshold,
                           negative_ratio=FLAGS.negative_ratio,
                           alpha=FLAGS.loss_alpha,
                           label_smoothing=FLAGS.label_smoothing)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # =================================================================== #
        # Add summaries from first clone.
        # =================================================================== #
        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                            tf.nn.zero_fraction(x)))
        # Add summaries for losses and extra losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # =================================================================== #
        # Configure the moving averages.
        # =================================================================== #
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        # =================================================================== #
        # Configure the optimization procedure.
        # =================================================================== #
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(FLAGS,
                                                             dataset.num_samples,
                                                             global_step)
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(FLAGS)

        # and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master='',
            is_chief=True,
            init_fn=tf_utils.get_init_fn(FLAGS),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            saver=saver,
            save_interval_secs=FLAGS.save_interval_secs,
            session_config=config,
            sync_optimizer=None)
Пример #45
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,
      clone_on_cpu=FLAGS.clone_on_cpu,
      replica_id=FLAGS.task,
      num_replicas=FLAGS.worker_replicas,
      num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
      FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
      FLAGS.model_name,
      num_classes=(dataset.num_classes - FLAGS.labels_offset),
      weight_decay=FLAGS.weight_decay,
      is_training=True,
      width_multiplier=FLAGS.width_multiplier)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
      preprocessing_name,
      is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=FLAGS.num_readers,
        common_queue_capacity=20 * FLAGS.batch_size,
        common_queue_min=10 * FLAGS.batch_size)

      # gt_bboxes format [ymin, xmin, ymax, xmax]
      [image, img_shape, gt_labels, gt_bboxes] = provider.get(['image', 'shape',
                                                               'object/label',
                                                               'object/bbox'])

      # Preprocesing
      # gt_bboxes = scale_bboxes(gt_bboxes, img_shape)  # bboxes format [0,1) for tf draw

      image, gt_labels, gt_bboxes = image_preprocessing_fn(image,
                                                           config.IMG_HEIGHT,
                                                           config.IMG_WIDTH,
                                                           labels=gt_labels,
                                                           bboxes=gt_bboxes,
                                                           )

      #############################################
      # Encode annotations for losses computation #
      #############################################

      # anchors format [cx, cy, w, h]
      anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32)

      # encode annos, box_input format [cx, cy, w, h]
      input_mask, labels_input, box_delta_input, box_input = encode_annos(gt_labels,
                                                                          gt_bboxes,
                                                                          anchors,
                                                                          config.NUM_CLASSES)

      images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = tf.train.batch(
        [image, input_mask, labels_input, box_delta_input, box_input],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

      batch_queue = slim.prefetch_queue.prefetch_queue(
        [images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = batch_queue.dequeue()
      anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32)
      end_points = network_fn(images)
      end_points["viz_images"] = images
      conv_ds_14 = end_points['MobileNet/conv_ds_14/depthwise_conv']
      dropout = slim.dropout(conv_ds_14, keep_prob=0.5, is_training=True)
      num_output = config.NUM_ANCHORS * (config.NUM_CLASSES + 1 + 4)
      predict = slim.conv2d(dropout, num_output, kernel_size=(3, 3), stride=1, padding='SAME',
                            activation_fn=None,
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.0001),
                            scope="MobileNet/conv_predict")

      with tf.name_scope("Interpre_prediction") as scope:
        pred_box_delta, pred_class_probs, pred_conf, ious, det_probs, det_boxes, det_class = \
          interpre_prediction(predict, b_input_mask, anchors, b_box_input)
        end_points["viz_det_probs"] = det_probs
        end_points["viz_det_boxes"] = det_boxes
        end_points["viz_det_class"] = det_class

      with tf.name_scope("Losses") as scope:
        losses(b_input_mask, b_labels_input, ious, b_box_delta_input, pred_class_probs, pred_conf, pred_box_delta)

      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      if end_point not in ["viz_images", "viz_det_probs", "viz_det_boxes", "viz_det_class"]:
        x = end_points[end_point]
        summaries.add(tf.summary.histogram('activations/' + end_point, x))
        summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                        tf.nn.zero_fraction(x)))

    # Add summaries for det result TODO(shizehao): vizulize prediction


    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
        opt=optimizer,
        replicas_to_aggregate=FLAGS.replicas_to_aggregate,
        variable_averages=variable_averages,
        variables_to_average=moving_average_variables,
        replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
        total_num_replicas=FLAGS.worker_replicas)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
      clones,
      optimizer,
      var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                      name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
      train_tensor,
      logdir=FLAGS.train_dir,
      master=FLAGS.master,
      is_chief=(FLAGS.task == 0),
      init_fn=_get_init_fn(),
      summary_op=summary_op,
      number_of_steps=FLAGS.max_number_of_steps,
      log_every_n_steps=FLAGS.log_every_n_steps,
      save_summaries_secs=FLAGS.save_summaries_secs,
      save_interval_secs=FLAGS.save_interval_secs,
      sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Пример #46
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():
        # Config model_deploy. Keep TF Slim Models structure.
        # Useful if want to need multiple GPUs and/or servers in the future.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)
        # Create global_step.
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        tf_utils.print_configuration(FLAGS.__flags, ssd_params,
                                     dataset.data_sources, FLAGS.train_dir)
        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device(deploy_config.inputs_device()):
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    num_readers=FLAGS.num_readers,
                    common_queue_capacity=20 * FLAGS.batch_size,
                    common_queue_min=10 * FLAGS.batch_size,
                    shuffle=True)
            # Get for SSD network: image, labels, bboxes.
            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
                                                             'object/label',
                                                             'object/bbox'])
            # Pre-processing image, labels and bboxes.
            image, glabels, gbboxes = \
                image_preprocessing_fn(image, glabels, gbboxes,
                                       out_shape=ssd_shape,
                                       data_format=DATA_FORMAT)
            # Encode groundtruth labels and bboxes.
            gclasses, glocalisations, gscores = \
                ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] + [len(ssd_anchors)] * 3


            # Training batches and queue.
            r = tf.train.batch(
                tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(r, batch_shape)

            # Intermediate queueing: unique batch computation pipeline for all
            # GPUs running the training.
            batch_queue = slim.prefetch_queue.prefetch_queue(
                tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]),
                capacity=2 * deploy_config.num_clones)

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple
            clones of network_fn."""
            # Dequeue batch.
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

            # Construct SSD network.
            arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
                                          data_format=DATA_FORMAT)
            with slim.arg_scope(arg_scope):
                predictions, localisations, logits, end_points = \
                    ssd_net.net(b_image, is_training=True)
            # Add loss function.
            ssd_net.losses(logits, localisations,
                           b_gclasses, b_glocalisations, b_gscores,
                           match_threshold=FLAGS.match_threshold,
                           negative_ratio=FLAGS.negative_ratio,
                           alpha=FLAGS.loss_alpha,
                           label_smoothing=FLAGS.label_smoothing)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # =================================================================== #
        # Add summaries from first clone.
        # =================================================================== #
        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                            tf.nn.zero_fraction(x)))
        # Add summaries for losses and extra losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # =================================================================== #
        # Configure the moving averages.
        # =================================================================== #
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        # =================================================================== #
        # Configure the optimization procedure.
        # =================================================================== #
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(FLAGS,
                                                             dataset.num_samples,
                                                             global_step)
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(FLAGS)

        # and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master='',
            is_chief=True,
            init_fn=tf_utils.get_init_fn(FLAGS),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            saver=saver,
            save_interval_secs=FLAGS.save_interval_secs,
            session_config=config,
            sync_optimizer=None)
Пример #47
0
def eval(eval_op_feed_dict=None, session_config=None, max_num_batches=None, sample_percentage = None, masking_variable_value=None, 
        compute_delta_cost_per_layer_solution=None, compute_delta_cost_per_layer_solution2=None):

  FLAGS.is_training=False
  if not FLAGS.dataset_dir:  
    raise ValueError('You must supply the dataset directory with --dataset_dir')
   
  assert not ((max_num_batches is not None) and (sample_percentage is not None)), 'argument of eval max_num_batches and sample_percentage cannot be both specified'
  shuffle = False
  if sample_percentage is not None:
      if sample_percentage < 0.99:
          shuffle = True
  if max_num_batches is not None:
      shuffle = True

  #set the number of batches to be evalated, None for all the samples
  if max_num_batches is not None:
      FLAGS.max_num_batches=max_num_batches

  #tf.logging.set_verbosity(tf.logging.INFO)
  tf.logging.set_verbosity(tf.logging.WARN)
  with tf.Graph().as_default(), tf.device('/cpu:0'):

    tf_global_step = tf.train.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle= shuffle,
        num_readers=32,
        common_queue_capacity=10 * FLAGS.batch_size,
        common_queue_min=3*FLAGS.batch_size)
    [image, label] = provider.get(['image', 'label'])
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

    images, labels = tf.train.batch(
        [image, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

    gpus = os.environ["CUDA_VISIBLE_DEVICES"].strip().split(',')
    if gpus == ['']:
        gpus = ['CPU']
    else:
        assert all([g.isdigit() for g in gpus]), 'invalud gpu string : %s'%os.environ["CUDA_VISIBLE_DEVICES"]
    num_gpus = len(gpus)

    # Split the batch of images and labels for towers.
    if num_gpus == 0:
        num_splits = 1
    else:
        num_splits = num_gpus

    assert FLAGS.batch_size % num_splits == 0, 'batch_size %d cannot be divided by num_splits %d'%(FLAGS.batch_size, num_splits)

    images_splits = tf.split(axis=0, num_or_size_splits=num_splits, value=images)
    labels_splits = tf.split(axis=0, num_or_size_splits=num_splits, value=labels)
   
    def _tower_logit(images, labels,reuse_variables=None):
        ####################
        # Define the model #
        ####################
        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
            logits, _ = network_fn(images)

        return logits

    # Calculate the gradients for each model tower.
    tower_loss_list = []
    tower_top_1_op_list = []
    tower_top_5_op_list = []

    #MaskingVariableManager
    mvm = DnnUtili.mvm

    #each element is the concatenation of all the masking variables in one tower
    variables_list = []

    for idx, gpu_id in enumerate(gpus):
        #with slim.arg_scope([slim.model_variable,slim.variable], device='/gpu:%s' % gpu_id):
        if gpu_id == 'CPU':
            device_string = '/cpu:0'
        else:
            device_string = '/gpu:%d' % idx
            
        with tf.device(device_string):
            # Force all Variables to reside on the CPU.
            #with slim.arg_scope([slim.model_variable,slim.variable], device='/cpu:0'):
            #with tf.device('/cpu:0'):
                # Calculate the loss for one tower of the ImageNet model. This
                # function constructs the entire ImageNet model but shares the
                # variables across all towers.
            logits = _tower_logit(images_splits[idx], labels_splits[idx], reuse_variables=True if idx>0 else None)

            #predictions = tf.argmax(logits, 1)
            labels = tf.squeeze(labels_splits[idx])
            # Specify the loss function
            loss = tf.losses.sparse_softmax_cross_entropy(
                    logits = logits, labels = labels)
            # Calculate predictions.
            top_1_op = tf.reduce_sum(tf.to_float(tf.nn.in_top_k(logits, labels, 1)))
            top_5_op = tf.reduce_sum(tf.to_float(tf.nn.in_top_k(logits, labels, 5)))

            tower_loss_list.append(loss)
            tower_top_1_op_list.append(top_1_op)
            tower_top_5_op_list.append(top_5_op)

        #if the mvm is not empty
        if not mvm.is_empty():
            #test get variables
            #variables = slim.get_model_variables()
            variables = DnnUtili.mvm.get_variables()
            variables_list.append(variables)

            #num_mask_variables = DnnUtili.mvm.get_num_reduced_mask_variables()

            #variables_name = [str(v) for v in variables]
            #print(variables_name)

        #compute the value of the feed_dict for masking variables
        #only run for the first tower, and if the mvm is not empty
        if idx == 0 and not mvm.is_empty():
            if FLAGS.only_compute_mask_variable is False:
                print_mvm_parameters()

            #print the information of the masking variables
            #if FLAGS.only_compute_mask_variable is False:
            if FLAGS.K_heuristic is None:
                #don't print when computing reduced index
                mvm.print_variable_index()

            assert bool(FLAGS.call_gurobi) + bool(FLAGS.solution_path is not None) + bool(FLAGS.K_heuristic is not None) <= 1, 'no more than one of these options can be true, got %s, %s, %s'%(FLAGS.call_gurobi, FLAGS.load_solution, FLAGS.K_heuristic)

            #if only call the compute_delta_cost_per_layer function in mvm, dump /tmp/delta_cost_per_layer.pickle
            if compute_delta_cost_per_layer_solution is not None:
                mvm.compute_delta_cost_per_layer(compute_delta_cost_per_layer_solution, compute_delta_cost_per_layer_solution2)
                return

            if FLAGS.call_gurobi:
                masking_variable_value = mvm.call_gurobi_miqp(hessian_pickle_path=FLAGS.hessian_pickle_path, 
                        computation_max=FLAGS.computation_max, memory_max=FLAGS.memory_max, 
                        monotonic=False, timelimit=FLAGS.timelimit)
            elif FLAGS.solution_path is not None:
                print('---Loading solution from %s'%FLAGS.solution_path)
                if str(FLAGS.solution_path).endswith('.pickle'):
                    with open(FLAGS.solution_path, 'rb') as f:
                        masking_variable_value = pickle.load(f)
                elif str(FLAGS.solution_path).endswith('.mat'):
                        assert int(FLAGS.solution_random_rounding) + int(FLAGS.cross_entropy_rounding) + int(FLAGS.add_and_svd_rounding) <= 1, 'only choose one type of rounding'
                        masking_variable_value = scipy.io.loadmat(FLAGS.solution_path)['x']
                        #full solution is the solution of all the variables, including the reduced variables
                        full_solution = mvm.expand_reduced_mask_variables_np(np.squeeze(masking_variable_value), exact_size=True)
                        mat_content = scipy.io.loadmat(FLAGS.solution_path)
                        mat_content['full_x'] = full_solution
                        scipy.io.savemat(FLAGS.solution_path, mat_content, do_compression=True)
                        print('eval_functions_multi: adding full_solution to sqp_solution.mat')

                        if FLAGS.solution_random_rounding:
                            masking_variable_value = DnnUtili.solution_random_rounding(masking_variable_value)
                        elif FLAGS.cross_entropy_rounding:
                            masking_variable_value = mvm.cross_entropy_rounding(masking_variable_value, FLAGS.computation_max, FLAGS.memory_max)
                        elif FLAGS.add_and_svd_rounding:
                            #all the values are calculated in my_slim_layer.py when the network is being constructed
                            masking_variable_value = None
                        else:
                            assert masking_variable_value.shape[1]==1, 'expected a column vector, got %s'%str(masking_variable_value.shape)
                            masking_variable_value = np.reshape(masking_variable_value,(masking_variable_value.shape[0]))
                else:
                    raise ValueError('invalid solution_path: %s'%FLAGS.solution_path)
            elif FLAGS.K_heuristic is not None:
                #use the get_mask_variable_value_using_heuristic() to decide the singular values to use using heuristic
                print('---Using heuristic %d in get_mask_variable_value_using_heuristic()'%(FLAGS.K_heuristic))
                masking_variable_value = mvm.get_mask_variable_value_using_heuristic(FLAGS.K_heuristic, 
                        computation_max=FLAGS.computation_max, memory_max=FLAGS.memory_max, 
                        monotonic=False, timelimit=FLAGS.timelimit)
                 
                #if an sqp_solution exists, and contains a x_full entry, convert the full solution according the reduced index just computed
                if os.path.isfile('/tmp/sqp_solution.mat'):
                    mat_content = scipy.io.loadmat('/tmp/sqp_solution.mat')
                    if 'full_x' in mat_content:
                        reduced_x = mvm.reduce_mask_variables_np(np.squeeze(mat_content['full_x']), exact_size=True)
                        mat_content['reduced_x'] = reduced_x
                        scipy.io.savemat('/tmp/sqp_solution.mat',mat_content, do_compression=True)
                        print('eval_functions_multi: added reduced_x to sqp_solution.mat, based on existing full_x and current reduced_index.')
            elif masking_variable_value is not None:
                print('---Using masking variable solution from argument')
            else:
                print('---!!! No approximation is specified,  all mask are enabled, no approximation to the network')
                masking_variable_value = np.zeros([mvm.get_num_mask_variables()],dtype=np.float32)

            #save the computation and memory cost coefficients to a pickle
            mvm.save_coefficients_to_pickle()

            if FLAGS.only_compute_mask_variable:
                assert FLAGS.call_gurobi or FLAGS.K_heuristic is not None, 'should be computing a mask variable solution'
                mvm.save_variable_index_to_pickle()
                print('---mask_variable solution computed.')
                return

            masking_variable_value_dict = mvm.get_variable_to_value_dict(masking_variable_value)
            if FLAGS.add_and_svd_rounding:
                #all the values are calculated in my_slim_layer.py when the network is being constructed
                masking_variable_value_dict = dict()

            #DEBUG
            #DnnUtili.mvm.print_variable_index()
            #DnnUtili.mvm.print_solution(masking_variable_value)

            #print('-----Total computation cost: %s'%mvm.get_total_computation_cost())
            #print('-----Total memory cost: %s'%mvm.get_total_memory_cost())
            
            if not FLAGS.add_and_svd_rounding:
                computation_percentage, memory_percentage = mvm.calculate_percentage_computation_memory_cost(masking_variable_value)
            else:
                computation_percentage, memory_percentage = -1,-1
            #end MaskingVariableManager

        ##clear the masking variable manager, but not for the last gpu, so we still have a copy of the masking variables
        if idx != num_gpus-1:
            DnnUtili.mvm.__init__()

    with slim.arg_scope([slim.model_variable,slim.variable], device='/cpu:0'):
        loss_op = tf.reduce_sum(tower_loss_list)
        top_1_op = tf.reduce_sum(tower_top_1_op_list)
        top_5_op = tf.reduce_sum(tower_top_5_op_list)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()
    #print('eval_functions_multi: %s'%variables_to_restore)

    #find the absolute path of the checkpoint file and restore weights
    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path
    tf.logging.info('Evaluating %s' % checkpoint_path)

    #compute the number of iterations
    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    elif sample_percentage:
      assert 0<sample_percentage <= 1, 'invalid sample_percentage %s'%sample_percentage
      num_batches = math.ceil(dataset.num_samples*sample_percentage / float(FLAGS.batch_size))
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))
    assert num_batches > 4, 'only evaluate so few batches? num_batches = %d'%num_batches
    #the number of samples that are actually evaluated
    total_sample_count = num_batches * FLAGS.batch_size

    #duplicate the value of the masking variables to each tower
    if not mvm.is_empty():
        #at this point, masking_variable_value_dict is computed
        duplicated_masking_variable_value_dict = copy.copy(masking_variable_value_dict)
        masking_variable_value_dict_values = list(masking_variable_value_dict.values())
        for i, variables in enumerate(variables_list):
            if i == 0:
                continue
            
            for j, var in enumerate(variables):
                duplicated_masking_variable_value_dict[var] = masking_variable_value_dict_values[j]

        #save a dict mapping from the name of the variable to its values 
        name_value_dict = OrderedDict()
        for var,value in masking_variable_value_dict.items():
            name_value_dict[var.op.name] = value
        with open('/tmp/mask_variable_value_dict.pickle', 'wb') as f:
            pickle.dump(name_value_dict, f, protocol=-1)

        masking_variable_value_dict = duplicated_masking_variable_value_dict
        print_eval_parameters()

    assert eval_op_feed_dict is None, 'because the feed_dict has to be duplicated for each tower for the masking variables, this is not implemented yet'
    if not mvm.is_empty():
        eval_op_feed_dict = masking_variable_value_dict
    ##merge mask variable values with the eval_op_feed_dict argument
    #if eval_op_feed_dict:
    #    eval_op_feed_dict = masking_variable_value_dict
    #else:
    #    raise NotImplementedError('because the feed_dict has to be duplicated for each tower for the masking variables, this is not implemented yet')
    #    eval_op_feed_dict = {**eval_op_feed_dict, **masking_variable_value_dict}

    #start a session
    sess = tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=False))
    init_op = tf.global_variables_initializer()
    #do not need to run init_op because the weights will be restored using saver?
    #sess.run(init_op)

    saver = tf.train.Saver(variables_to_restore)
    saver.restore(sess, checkpoint_path)

    loss_sum = np.longdouble(0.0)
    top_1_sum = np.longdouble(0.0)
    top_5_sum = np.longdouble(0.0)

    tf_run_start = time.time()
    #sess.run, the code will halt and produce no result
    with slim.queues.QueueRunners(sess):
        for i in range(num_batches):
            #print('starting iteration %d at %s '%(i, datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
            #iteration_start_time = time.time()
          
            loss_np, top_1_op_np, top_5_op_np = sess.run([loss_op, top_1_op, top_5_op], feed_dict=eval_op_feed_dict)
            loss_sum += np.longdouble(loss_np)
            top_1_sum += np.longdouble(top_1_op_np)
            top_5_sum += np.longdouble(top_5_op_np)

    tf_run_time = time.time() - tf_run_start

    loss = loss_sum/np.longdouble(num_batches)/num_gpus
    top_1 = top_1_sum/np.longdouble(total_sample_count)
    top_5 = top_5_sum/np.longdouble(total_sample_count)

    sess.close()

    #accuracy = slim.evaluation.evaluate_once(
    #    master=FLAGS.master,
    #    checkpoint_path=checkpoint_path,
    #    logdir=FLAGS.eval_dir,
    #    num_evals=num_batches,
    #    #Chong edited
    #    #initial_op=None,
    #    #initial_op_feed_dict=init_op_feed_dict,
    #    eval_op=list(names_to_updates.values()),
    #    eval_op_feed_dict = eval_op_feed_dict,
    #    final_op=final_op,
    #    final_op_feed_dict=None,
    #    session_config = session_config,
    #    variables_to_restore=variables_to_restore)


    #delete the eval directory, so the summary files do not accmulate
    shutil.rmtree(FLAGS.eval_dir, ignore_errors=True)

    results = OrderedDict()

    results['accuracy'] = top_1
    results['accuracy_5'] = top_5
    results['loss'] = loss

    if FLAGS.add_and_svd_rounding:
        computation_percentage, memory_percentage = DnnUtili.calculate_percentage_add_and_svd(FLAGS.computation_max, FLAGS.memory_max)

    try:
        results['computation_cost'] = computation_percentage
        results['memory_cost'] = memory_percentage
    except NameError:
        results['computation_cost'] = -1
        results['memory_cost'] = -1

    results['tf_run_time'] = tf_run_time
    #print('eval_functions_multi: tf.run() time: %.1f'%tf_run_time)

    return results
Пример #48
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        # =================================================================== #
        # Dataset + SSD model + Pre-processing
        # =================================================================== #
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(
            num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)

        # Evaluation shape and associated anchors: eval_image_size
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        tf_utils.print_configuration(FLAGS.__flags, ssd_params,
                                     dataset.data_sources, FLAGS.eval_dir)
        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device('/cpu:0'):
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    common_queue_capacity=2 * FLAGS.batch_size,
                    common_queue_min=FLAGS.batch_size,
                    shuffle=False)
            # Get for SSD network: image, labels, bboxes.
            [image, shape, glabels, gbboxes] = provider.get(
                ['image', 'shape', 'object/label', 'object/bbox'])
            if FLAGS.remove_difficult:
                [gdifficults] = provider.get(['object/difficult'])
            else:
                gdifficults = tf.zeros(tf.shape(glabels), dtype=tf.int64)

            # Pre-processing image, labels and bboxes.
            image, glabels, gbboxes, gbbox_img = \
                image_preprocessing_fn(image, glabels, gbboxes,
                                       out_shape=ssd_shape,
                                       data_format=DATA_FORMAT,
                                       resize=FLAGS.eval_resize,
                                       difficults=None)

            # Encode groundtruth labels and bboxes.
            gclasses, glocalisations, gscores = \
                ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] * 5 + [len(ssd_anchors)] * 3

            # Evaluation batch.
            r = tf.train.batch(tf_utils.reshape_list([
                image, glabels, gbboxes, gdifficults, gbbox_img, gclasses,
                glocalisations, gscores
            ]),
                               batch_size=FLAGS.batch_size,
                               num_threads=FLAGS.num_preprocessing_threads,
                               capacity=5 * FLAGS.batch_size,
                               dynamic_pad=True)
            (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img,
             b_gclasses, b_glocalisations,
             b_gscores) = tf_utils.reshape_list(r, batch_shape)

        # =================================================================== #
        # SSD Network + Ouputs decoding.
        # =================================================================== #
        dict_metrics = {}
        arg_scope = ssd_net.arg_scope(data_format=DATA_FORMAT)
        with slim.arg_scope(arg_scope):
            predictions, localisations, logits, end_points = \
                ssd_net.net(b_image, is_training=False)
        # Add losses functions.
        ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations,
                       b_gscores)

        # Performing post-processing on CPU: loop-intensive, usually more efficient.
        with tf.device('/device:CPU:0'):
            # Detected objects from SSD output.
            localisations = ssd_net.bboxes_decode(localisations, ssd_anchors)
            rscores, rbboxes = \
                ssd_net.detected_bboxes(predictions, localisations,
                                        select_threshold=FLAGS.select_threshold,
                                        nms_threshold=FLAGS.nms_threshold,
                                        clipping_bbox=None,
                                        top_k=FLAGS.select_top_k,
                                        keep_top_k=FLAGS.keep_top_k)
            # Compute TP and FP statistics.
            num_gbboxes, tp, fp, rscores = \
                tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
                                          b_glabels, b_gbboxes, b_gdifficults,
                                          matching_threshold=FLAGS.matching_threshold)

        # Variables to restore: moving avg. or normal weights.
        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        # =================================================================== #
        # Evaluation metrics.
        # =================================================================== #
        with tf.device('/device:CPU:0'):
            dict_metrics = {}
            # First add all losses.
            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
            # Extra losses as well.
            for loss in tf.get_collection('EXTRA_LOSSES'):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)

            # Add metrics to summaries and Print on screen.
            for name, metric in dict_metrics.items():
                # summary_name = 'eval/%s' % name
                summary_name = name
                op = tf.summary.scalar(summary_name, metric[0], collections=[])
                # op = tf.Print(op, [metric[0]], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # FP and TP metrics.
            tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp,
                                                      rscores)
            for c in tp_fp_metric[0].keys():
                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
                                                tp_fp_metric[1][c])

            # Add to summaries precision/recall values.
            aps_voc07 = {}
            aps_voc12 = {}
            for c in tp_fp_metric[0].keys():
                # Precison and recall values.
                prec, rec = tfe.precision_recall(*tp_fp_metric[0][c])

                # Average precision VOC07.
                v = tfe.average_precision_voc07(prec, rec)
                summary_name = 'AP_VOC07/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                # op = tf.Print(op, [v], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc07[c] = v

                # Average precision VOC12.
                v = tfe.average_precision_voc12(prec, rec)
                summary_name = 'AP_VOC12/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                # op = tf.Print(op, [v], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc12[c] = v

            # Mean average precision VOC07.
            summary_name = 'AP_VOC07/mAP'
            mAP = tf.add_n(list(aps_voc07.values())) / len(aps_voc07)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # Mean average precision VOC12.
            summary_name = 'AP_VOC12/mAP'
            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # for i, v in enumerate(l_precisions):
        #     summary_name = 'eval/precision_at_recall_%.2f' % LIST_RECALLS[i]
        #     op = tf.summary.scalar(summary_name, v, collections=[])
        #     op = tf.Print(op, [v], summary_name)
        #     tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # Split into values and updates ops.
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
            dict_metrics)

        # =================================================================== #
        # Evaluation loop.
        # =================================================================== #
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        # Number of batches...
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if not FLAGS.wait_for_checkpoints:
            if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
                checkpoint_path = tf.train.latest_checkpoint(
                    FLAGS.checkpoint_path)
            else:
                checkpoint_path = FLAGS.checkpoint_path
            tf.logging.info('Evaluating %s' % checkpoint_path)

            # Standard evaluation loop.
            start = time.time()
            slim.evaluation.evaluate_once(
                master=FLAGS.master,
                checkpoint_path=checkpoint_path,
                logdir=FLAGS.eval_dir,
                num_evals=num_batches,
                eval_op=flatten(list(names_to_updates.values())),
                variables_to_restore=variables_to_restore,
                session_config=config)
            # Log time spent.
            elapsed = time.time()
            elapsed = elapsed - start
            print('Time spent : %.3f seconds.' % elapsed)
            print('Time spent per BATCH: %.3f seconds.' %
                  (elapsed / num_batches))

        else:
            checkpoint_path = FLAGS.checkpoint_path
            tf.logging.info('Evaluating %s' % checkpoint_path)

            # Waiting loop.
            slim.evaluation.evaluation_loop(
                master=FLAGS.master,
                checkpoint_dir=checkpoint_path,
                logdir=FLAGS.eval_dir,
                num_evals=num_batches,
                eval_op=list(names_to_updates.values()),
                variables_to_restore=variables_to_restore,
                eval_interval_secs=60,
                max_number_of_evaluations=np.inf,
                session_config=config,
                timeout=None)
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpus
  if FLAGS.num_clones == -1:
    FLAGS.num_clones = len(FLAGS.gpus.split(','))

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    # tf.set_random_seed(42)
    tf.set_random_seed(0)
    ######################
    # Config model_deploy#
    ######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name,
        FLAGS.dataset_dir.split(','),
        dataset_list_dir=FLAGS.dataset_list_dir,
        num_samples=FLAGS.frames_per_video,
        modality=FLAGS.modality,
        split_id=FLAGS.split_id)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        batch_size=FLAGS.batch_size,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        dropout_keep_prob=(1.0-FLAGS.dropout),
        pooled_dropout_keep_prob=(1.0-FLAGS.pooled_dropout),
        batch_norm=FLAGS.netvlad_batch_norm)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)  # in case of pooling images,
                           # now preprocessing is done video-level

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=FLAGS.num_readers,
        common_queue_capacity=20 * FLAGS.batch_size,
        common_queue_min=10 * FLAGS.batch_size,
        bgr_flips=FLAGS.bgr_flip)
      [image, label] = provider.get(['image', 'label'])
      # now note that the above image might be a 23 channel image if you have
      # both RGB and flow streams. It will need to split later, but all the
      # preprocessing will be done consistently for all frames over all streams
      label = tf.string_to_number(label, tf.int32)
      label.set_shape(())
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      scale_ratios=[float(el) for el in FLAGS.scale_ratios.split(',')],
      image = image_preprocessing_fn(image, train_image_size,
                                     train_image_size,
                                     scale_ratios=scale_ratios,
                                     out_dim_scale=FLAGS.out_dim_scale,
                                     model_name=FLAGS.model_name)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      if FLAGS.debug:
        images = tf.Print(images, [labels], 'Read batch')
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)
      summarize_images(images, provider.num_channels_stream)

    ####################
    # Define the model #
    ####################
    kwargs = {}
    if FLAGS.conv_endpoint is not None:
      kwargs['conv_endpoint'] = FLAGS.conv_endpoint
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(
          images, pool_type=FLAGS.pooling,
          classifier_type=FLAGS.classifier_type,
          num_channels_stream=provider.num_channels_stream,
          netvlad_centers=FLAGS.netvlad_initCenters.split(','),
          stream_pool_type=FLAGS.stream_pool_type,
          **kwargs)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weight=0.4, scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weight=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    global end_points_debug
    end_points = clones[0].outputs
    end_points_debug = dict(end_points)
    end_points_debug['images'] = images
    end_points_debug['labels'] = labels
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.histogram_summary('activations/' + end_point, x))
      summaries.add(tf.scalar_summary('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.histogram_summary(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.scalar_summary('learning_rate', learning_rate,
                                      name='learning_rate'))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables,
          replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
          total_num_replicas=FLAGS.worker_replicas)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()
    logging.info('Training the following variables: %s' % (
      ' '.join([el.name for el in variables_to_train])))

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)

    # clip the gradients if needed
    if FLAGS.clip_gradients > 0:
      logging.info('Clipping gradients by %f' % FLAGS.clip_gradients)
      with tf.name_scope('clip_gradients'):
        clones_gradients = slim.learning.clip_gradient_norms(
            clones_gradients,
            FLAGS.clip_gradients)

    # Add total_loss to summary.
    summaries.add(tf.scalar_summary('total_loss', total_loss,
                                    name='total_loss'))

    # Create gradient updates.
    train_ops = {}
    if FLAGS.iter_size == 1:
      grad_updates = optimizer.apply_gradients(clones_gradients,
                                               global_step=global_step)
      update_ops.append(grad_updates)

      update_op = tf.group(*update_ops)
      train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                        name='train_op')
      train_ops = train_tensor
    else:
      gvs = [(grad, var) for grad, var in clones_gradients]
      varnames = [var.name for grad, var in gvs]
      varname_to_var = {var.name: var for grad, var in gvs}
      varname_to_grad = {var.name: grad for grad, var in gvs}
      varname_to_ref_grad = {}
      for vn in varnames:
        grad = varname_to_grad[vn]
        print("accumulating ... ", (vn, grad.get_shape()))
        with tf.variable_scope("ref_grad"):
          with tf.device(deploy_config.variables_device()):
            ref_var = slim.local_variable(
                np.zeros(grad.get_shape(),dtype=np.float32),
                name=vn[:-2])
            varname_to_ref_grad[vn] = ref_var

      all_assign_ref_op = [ref.assign(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]
      all_assign_add_ref_op = [ref.assign_add(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]
      assign_gradients_ref_op = tf.group(*all_assign_ref_op)
      accmulate_gradients_op = tf.group(*all_assign_add_ref_op)
      with tf.control_dependencies([accmulate_gradients_op]):
        final_gvs = [(varname_to_ref_grad[var.name] / float(FLAGS.iter_size), var) for grad, var in gvs]
        apply_gradients_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        update_ops.append(apply_gradients_op)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
            total_loss, name='train_op')
      for i in range(FLAGS.iter_size):
        if i == 0:
          train_ops[i] = assign_gradients_ref_op
        elif i < FLAGS.iter_size - 1:  # because apply_gradients also computes
                                       # (see control_dependency), so
                                       # no need of running an extra iteration
          train_ops[i] = accmulate_gradients_op
        else:
          train_ops[i] = train_tensor


    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.merge_summary(list(summaries), name='summary_op')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.intra_op_parallelism_threads = FLAGS.cpu_threads
    # config.allow_soft_placement = True
    # config.gpu_options.per_process_gpu_memory_fraction=0.7

    ###########################
    # Kicks off the training. #
    ###########################
    logging.info('RUNNING ON SPLIT %d' % FLAGS.split_id)
    slim.learning.train(
        train_ops,
        train_step_fn=train_step,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None,
        session_config=config)
Пример #50
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    tf.logging.set_verbosity(tf.logging.INFO)

    ####################
    #  Get the label map #
    ####################
    PATH_TO_LABELS = os.path.join(FLAGS.dataset_dir, 'labels.txt')

    category_index = {}
    categories = []
    label_map = open(PATH_TO_LABELS, 'r', encoding='utf-8')
    for line in label_map:
        cat = {}
        id = line.strip().split(":")[0]
        name = line.strip().split(":")[1]
        cat['id'] = int(id)
        cat['name'] = name
        category_index[int(id)] = cat
        categories.append(cat)

    ####################
    #  Get train data #
    ####################

    filename_queue = tf.train.string_input_producer([
        FLAGS.dataset_dir +
        '/pj_vehicle_validation_0000%d-of-00004.tfrecord' % i
        for i in range(0, 4)
    ], )
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'image/class/label':
                                           tf.FixedLenFeature([], tf.int64),
                                           'image/encoded':
                                           tf.FixedLenFeature([], tf.string),
                                       })

    image = features['image/encoded']
    label = features['image/class/label']

    graph = tf.Graph().as_default()
    ####################
    #  Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(FLAGS.model_name,
                                             num_classes=NUM_CLASSES,
                                             is_training=False)

    if hasattr(network_fn, 'default_image_size'):
        image_size = network_fn.default_image_size
    else:
        image_size = FLAGS.default_image_size

    #####################################
    #  Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name, is_training=False)

    image_processed = tf.image.decode_jpeg(image, channels=3)
    image_processed = image_preprocessing_fn(image_processed, image_size,
                                             image_size)

    images_processed, images, labels = tf.train.batch(
        [image_processed, image, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)
    labels = tf.squeeze(labels)

    ####################
    #  Define the model #
    #####################
    logits, end_points = network_fn(images_processed)

    checkpoint_path = FLAGS.checkpoint_path
    variables_to_restore = slim.get_variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    sess = tf.Session()
    saver.restore(sess, checkpoint_path)

    #evaluator = object_detection_evaluation.ObjectDetectionEvaluator(categories, matching_iou_threshold=0.5)
    iou = []

    with sess:

        coord = tf.train.Coordinator()  # 创建一个协调器,管理线程
        threads = tf.train.start_queue_runners(
            coord=coord)  # 启动QueueRunner, 此时文件名队列已经进队。
        for i in range(FLAGS.max_num_batches):
            images_, labels_, logits_, end_points_ = sess.run(
                [images, labels, logits, end_points])

            for j in range(FLAGS.batch_size):
                idx = i * FLAGS.batch_size + j
                if j == 0:
                    image_ = images_[j]
                    image_ = Image.open(BytesIO(image_), 'r')
                    image_.save(
                        os.path.join(
                            FLAGS.output_file,
                            'test_{0}_label_{1}.jpg'.format(i, labels_[j])))
                    image_np = np.array(image_)

                    logit_value = logits_[j]
                    feature_maps_A = end_points_['features_A'][j]

                    softmax = np.exp(logit_value) / np.sum(np.exp(logit_value),
                                                           axis=0)

                    n_top = 1
                    predictions = np.argsort(-logit_value)[:n_top]
                    scores = -np.sort(-softmax)[:n_top]

                    print(predictions)
                    print(scores)
                    print(labels_[j])

                    # 生成heatmap
                    cam_A = cam_utils.CAMmap(feature_maps_A, logit_value,
                                             n_top)
                    cam_B = cam_utils.CAMmap(feature_maps_B, logit_value,
                                             n_top)
                    for k in range(n_top):
                        fm_a = cam_A[:, :, k]
                        cam_A[:, :, k] = (fm_a - fm_a.min()) / (fm_a.max() -
                                                                fm_a.min())
                        fm_b = cam_B[:, :, k]
                        cam_B[:, :, k] = (fm_b - fm_b.min()) / (fm_b.max() -
                                                                fm_b.min())
                    cam = np.maximum(cam_A, cam_B)
                    im_height = image_np.shape[0]
                    im_width = image_np.shape[1]

                    # 保存heatmap
                    cam_resize = np.zeros((im_height, im_width, n_top))
                    for k in range(n_top):
                        heatmap_resize = Image.fromarray(cam[:, :, k]).resize(
                            (im_width, im_height), Image.BILINEAR)
                        cam_resize[:, :, k] = np.array(heatmap_resize)
                        heatmap = cam_utils.grey2rainbow(cam_resize[:, :, k] *
                                                         255)
                        heatmap = Image.fromarray(heatmap)
                        heatmap.save(
                            os.path.join(
                                FLAGS.output_file,
                                'test_{0}_heatmap_{1}.jpg'.format(i, k)))

                    # 生成bounding_boxes
                    threshold = 0.75
                    boxes = cam_utils.bounding_box(cam_resize, threshold)

                    # 输出检测结果
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        boxes,
                        predictions.astype(np.int32),
                        scores,
                        category_index,
                        use_normalized_coordinates=True,
                        min_score_thresh=0.001,
                        line_thickness=5)
                    plt.imsave(
                        os.path.join(FLAGS.output_file,
                                     'test_{0}_output.jpg'.format(i)),
                        image_np)

                    # 计算评价指标
                    annotations_dir = os.path.join(FLAGS.dataset_dir,
                                                   'test_data/annotations')
                    boxes_, classes_ = cam_utils.get_boxes(annotations_dir, i)
                    for k in range(boxes.shape[0]):
                        boxes[k, 0] = boxes[k, 0] * im_height
                        boxes[k, 1] = boxes[k, 1] * im_width
                        boxes[k, 2] = boxes[k, 2] * im_height
                        boxes[k, 3] = boxes[k, 3] * im_width

                    if predictions[0] == labels_[j]:
                        iou_ = np_box_ops.iou(boxes, boxes_)[0][0]
                        iou.append(iou_)
                    '''
                    result_dict = {}
                    result_dict[fields.InputDataFields.groundtruth_boxes] = boxes_
                    result_dict[fields.InputDataFields.groundtruth_classes] = classes_
                    result_dict[fields.DetectionResultFields.detection_boxes] = boxes
                    result_dict[fields.DetectionResultFields.detection_scores] = scores
                    result_dict[fields.DetectionResultFields.detection_classes] = classes
                    evaluator.add_single_ground_truth_image_info(image_id=i, groundtruth_dict=result_dict)
                    evaluator.add_single_detected_image_info(image_id=i, detections_dict=result_dict)

        metrics = evaluator.evaluate()
        for key in metrics:
            print(metrics[key])
        '''

        mean_iou = np.array(iou).mean()
        print(mean_iou)

        coord.request_stop()
        coord.join(threads)
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      with tf.device(deploy_config.inputs_device()):
        images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(
            logits=end_points['AuxLogits'], onehot_labels=labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss')
      tf.losses.softmax_cross_entropy(
          logits=logits, onehot_labels=labels,
          label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables,
          replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
          total_num_replicas=FLAGS.worker_replicas)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  if not os.path.isfile(FLAGS.checkpoint_path):
    FLAGS.eval_dir = os.path.join(FLAGS.checkpoint_path, 'eval')
  else:
    FLAGS.eval_dir = os.path.join(
        os.path.dirname(FLAGS.checkpoint_path), 'eval')

  try:
    os.makedirs(FLAGS.eval_dir)
  except OSError:
    pass

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    tf_global_step = slim.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name,
        FLAGS.dataset_dir.split(','),
        FLAGS.dataset_list_dir,
        num_samples=FLAGS.frames_per_video,
        modality=FLAGS.modality,
        split_id=FLAGS.split_id)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        batch_size=FLAGS.batch_size,
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=FLAGS.force_random_shuffle,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size,
        bgr_flips=FLAGS.bgr_flip)
    [image, label] = provider.get(['image', 'label'])
    label = tf.cast(tf.string_to_number(label, tf.int32),
        tf.int64)
    label.set_shape(())
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size,
                                   model_name=FLAGS.model_name,
                                   ncrops=FLAGS.ncrops,
                                   out_dim_scale=FLAGS.out_dim_scale)

    images, labels = tf.train.batch(
        [image, label],
        batch_size=FLAGS.batch_size,
        num_threads=1 if FLAGS.store_feat is not None else FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

    ####################
    # Define the model #
    ####################
    kwargs = {}
    if FLAGS.conv_endpoint is not None:
      kwargs['conv_endpoint'] = FLAGS.conv_endpoint
    logits, end_points = network_fn(
        images, pool_type=FLAGS.pooling,
        classifier_type=FLAGS.classifier_type,
        num_channels_stream=provider.num_channels_stream,
        netvlad_centers=FLAGS.netvlad_initCenters.split(','),
        stream_pool_type=FLAGS.stream_pool_type,
        **kwargs)
    end_points['images'] = images
    end_points['labels'] = labels

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    predictions = tf.argmax(logits, 1)
    # rgirdhar: Because of the following, can't use with batch_size=1
    if FLAGS.batch_size > 1:
      labels = tf.squeeze(labels)

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        'Recall@5': slim.metrics.streaming_recall_at_k(
            logits, labels, 5),
    })

    # Print the summaries to screen.
    for name, value in names_to_values.iteritems():
      summary_name = 'eval/%s' % name
      op = tf.scalar_summary(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = int(math.ceil(dataset.num_samples /
                                  float(FLAGS.batch_size)))

    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Evaluating %s' % checkpoint_path)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    if FLAGS.store_feat is not None:
      assert(FLAGS.store_feat_path is not None)
      from tensorflow.python.training import supervisor
      from tensorflow.python.framework import ops
      import h5py
      saver = tf.train.Saver(variables_to_restore)
      sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                                 logdir=None,
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=None)
      ept_names_to_store = FLAGS.store_feat.split(',')
      try:
        ept_to_store = [end_points[el] for el in ept_names_to_store]
      except:
        logging.error('Endpoint not found')
        logging.error('Choose from %s' % ','.join(end_points.keys()))
        raise KeyError()
      res = dict([(epname, []) for epname in ept_names_to_store])
      with sv.managed_session(
          FLAGS.master, start_standard_services=False,
          config=config) as sess:
        saver.restore(sess, checkpoint_path)
        sv.start_queue_runners(sess)
        for j in range(num_batches):
          if j % 10 == 0:
            logging.info('Doing batch %d/%d' % (j, num_batches))
          feats = sess.run(ept_to_store)
          for eid, epname in enumerate(ept_names_to_store):
            res[epname].append(feats[eid])
      logging.info('Writing out features to %s' % FLAGS.store_feat_path)
      with h5py.File(FLAGS.store_feat_path, 'w') as fout:
        for epname in res.keys():
          fout.create_dataset(epname,
              data=np.concatenate(res[epname], axis=0),
              compression='gzip',
              compression_opts=FLAGS.feat_store_compression_opt)
    else:
      slim.evaluation.evaluate_once(
          master=FLAGS.master,
          checkpoint_path=checkpoint_path,
          logdir=FLAGS.eval_dir,
          num_evals=num_batches,
          eval_op=names_to_updates.values(),
          variables_to_restore=variables_to_restore,
          session_config=config)