示例#1
0
def train_face_net(img_data,
                   lab_data,
                   nclass,
                   image_size,
                   nrof_preprocess_threads,
                   face_class,
                   alfa,
                   max_epoch,
                   batch_size,
                   epoch_num,
                   learning_rate=0.0001):
    read_train_dir = './botid/1010/'
    seave_train_dir = './botid/1010/'
    batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

    phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

    image_paths_placeholder = tf.placeholder(tf.string,
                                             shape=(None, 1),
                                             name='image_paths')

    labels_placeholder = tf.placeholder(tf.int32,
                                        shape=(None, 1),
                                        name='labels')

    control_placeholder = tf.placeholder(tf.int32,
                                         shape=(None, 1),
                                         name='control')

    input_queue = data_flow_ops.FIFOQueue(
        capacity=2000000,
        dtypes=[tf.string, tf.int32, tf.int32],
        shapes=[(1, ), (1, ), (1, )],
        shared_name=None,
        name=None)
    enqueue_op = input_queue.enqueue_many(
        [image_paths_placeholder, labels_placeholder, control_placeholder],
        name='enqueue_op')

    image_batch, label_batch = create_input_pipeline(input_queue, image_size,
                                                     nrof_preprocess_threads,
                                                     batch_size_placeholder)

    image_batch = tf.identity(image_batch, 'image_batch')
    image_batch = tf.identity(image_batch, 'input')
    label_batch = tf.identity(label_batch, 'label_batch')

    labels = ops.convert_to_tensor(lab_data, dtype=tf.int32)
    range_size = array_ops.shape(labels)[0]
    index_queue = tf.train.range_input_producer(range_size,
                                                num_epochs=None,
                                                shuffle=True,
                                                seed=None,
                                                capacity=32)
    index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_num,
                                                'index_dequeue')
    prelogits = face_id_net(image_batch, face_class, phase_train_placeholder)
    prelogits_center_loss, _ = center_loss(prelogits, label_batch, alfa,
                                           nclass)

    logits = slim.fully_connected(
        prelogits,
        nclass,
        activation_fn=None,
        weights_initializer=slim.initializers.xavier_initializer(),
        weights_regularizer=slim.l2_regularizer(0.0005),
        scope='Logits',
        reuse=False)
    eps = 1e-4
    prelogits_norm = tf.reduce_mean(
        tf.norm(tf.abs(prelogits) + eps, ord=1, axis=1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                         prelogits_norm * 0.0005)

    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                         prelogits_center_loss * 0.00)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=label_batch, logits=logits, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    correct_prediction = tf.cast(
        tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)),
        tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    # Calculate the total losses
    regularization_losses = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
    print('看看你的真面目', [cross_entropy_mean], regularization_losses,
          prelogits_center_loss)
    total_loss = tf.add_n([cross_entropy_mean] + regularization_losses +
                          [prelogits_center_loss],
                          name='total_loss')

    train_op = trian_optimizer(total_loss, learning_rate=learning_rate)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(read_train_dir)
    y_step = 0
    if ckpt and ckpt.model_checkpoint_path:
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('原有训练次数', global_step)
        y_step = int(float(global_step))

    coord = tf.train.Coordinator()
    tf.train.start_queue_runners(coord=coord, sess=sess)
    with sess.as_default():
        for epoch in range(1, max_epoch + 1):

            # print('你说这是什么事:img_data',type(img_data),len(img_data),img_data)
            # print('你说这是什么事:lab_data',type(lab_data), len(lab_data), lab_data)
            index_epoch = sess.run(index_dequeue_op)
            # print(index_epoch)
            label_epoch = np.array(lab_data)[index_epoch]
            image_epoch = np.array(img_data)[index_epoch]

            # Enqueue one epoch of image paths and labels
            labels_array = np.expand_dims(np.array(label_epoch), 1)
            # print('这里的图片是什么', image_epoch.shape, image_epoch)
            # print('到这里标签变成什么了', labels_array.shape, labels_array)
            image_paths_array = np.expand_dims(np.array(image_epoch), 1)
            control_value = 14
            control_array = np.ones_like(labels_array) * control_value
            sess.run(
                enqueue_op, {
                    image_paths_placeholder: image_paths_array,
                    labels_placeholder: labels_array,
                    control_placeholder: control_array
                })

            batch_number = 0
            while batch_number < epoch_num:
                tensor_list = [
                    total_loss, train_op, regularization_losses, prelogits,
                    cross_entropy_mean, prelogits_norm, accuracy,
                    prelogits_center_loss
                ]
                feed_dict = {
                    phase_train_placeholder: True,
                    batch_size_placeholder: batch_size
                }
                loss_, _, reg_losses_, prelogits_, cross_entropy_mean_, prelogits_norm_, accuracy_, center_loss_ = sess.run(
                    tensor_list, feed_dict=feed_dict)

                print(
                    'Epoch: [%d][%d/%d]\t总loss %2.3f\t交叉熵平均 %2.3f\t正则化loss %2.3f\t准确率 %2.3f\t中心loss %2.3f'
                    % (epoch, batch_number + 1,
                       epoch_num, loss_, cross_entropy_mean_,
                       np.sum(reg_losses_), accuracy_, center_loss_))
                batch_number += 1

            gd = sess.graph.as_graph_def()

            # fix batch norm nodes
            for node in gd.node:
                # print(node.name)
                if node.op == 'RefSwitch':
                    node.op = 'Switch'
                    for index in range(len(node.input)):
                        if 'moving_' in node.input[index]:
                            node.input[index] = node.input[index] + '/read'
                elif node.op == 'AssignSub':
                    node.op = 'Sub'
                    if 'use_locking' in node.attr: del node.attr['use_locking']

            constant_graph = graph_util.convert_variables_to_constants(
                sess, sess.graph_def, ['output/output'])
            with tf.gfile.FastGFile(seave_train_dir + 'face72.pb',
                                    mode='wb') as f:
                f.write(constant_graph.SerializeToString())

            checkpoint_path = os.path.join(seave_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=epoch + y_step)
    return True
示例#2
0
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      filter_fn=None,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None,
                                      seed=None):
    """Adds operations to read, queue, batch `Example` protos.

  Args:
    file_pattern: List of files or patterns of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.global_variables_initializer()` and run the op in a session.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    filter_fn: Filtering function, takes both keys as well `Example` Tensors
      and returns a boolean mask of the same shape as the input Tensors to
      be applied for filtering. If `None`, no filtering is done.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    setup_shared_queue: Whether to set up a shared queue for file names.
    name: Name of resulting op.
    seed: An integer (optional). Seed used if randomize_input == True.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
    # Retrieve files to read.
    file_names = _get_file_names(file_pattern, randomize_input)

    # Check input parameters are given and reasonable.
    if (not queue_capacity) or (queue_capacity <= 0):
        raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
    if (batch_size is None) or (
        (not isinstance(batch_size, ops.Tensor)) and
        (batch_size <= 0 or batch_size > queue_capacity)):
        raise ValueError('Invalid batch_size %s, with queue_capacity %s.' %
                         (batch_size, queue_capacity))
    if (read_batch_size is None) or (
        (not isinstance(read_batch_size, ops.Tensor)) and
        (read_batch_size <= 0)):
        raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
    if (not num_threads) or (num_threads <= 0):
        raise ValueError('Invalid num_threads %s.' % num_threads)
    if (num_epochs is not None) and (num_epochs <= 0):
        raise ValueError('Invalid num_epochs %s.' % num_epochs)

    with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
        with ops.name_scope('file_name_queue') as file_name_queue_scope:
            if setup_shared_queue:
                file_name_queue = data_flow_ops.FIFOQueue(
                    capacity=1, dtypes=[dtypes.string], shapes=[[]])
                enqueue_op = file_name_queue.enqueue(
                    input_pipeline_ops.seek_next(file_names,
                                                 shuffle=randomize_input,
                                                 num_epochs=num_epochs,
                                                 seed=seed))
                queue_runner.add_queue_runner(
                    queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
            else:
                file_name_queue = input_ops.string_input_producer(
                    constant_op.constant(file_names, name='input'),
                    shuffle=randomize_input,
                    num_epochs=num_epochs,
                    name=file_name_queue_scope,
                    seed=seed)

        example_list = _get_examples(file_name_queue, reader, num_threads,
                                     read_batch_size, filter_fn, parse_fn)

        enqueue_many = read_batch_size > 1

        if num_epochs is None:
            allow_smaller_final_batch = False
        else:
            allow_smaller_final_batch = True

        # Setup batching queue given list of read example tensors.
        if randomize_input:
            if isinstance(batch_size, ops.Tensor):
                min_after_dequeue = int(queue_capacity * 0.4)
            else:
                min_after_dequeue = max(queue_capacity - (3 * batch_size),
                                        batch_size)
            queued_examples_with_keys = input_ops.shuffle_batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch,
                seed=seed)
        else:
            queued_examples_with_keys = input_ops.batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        if parse_fn and isinstance(queued_examples_with_keys, dict):
            queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
            return queued_keys, queued_examples_with_keys
        return queued_examples_with_keys
示例#3
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename),
                                   args.filter_percentile, args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size, num_epochs=None,
                                                    shuffle=True, seed=None, capacity=32)

        index_dequeue_op = index_queue.dequeue_many(args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1,), (1,)],
                                              shared_name=None, name=None)
        enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_png(file_contents)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                # pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, label_batch = tf.train.batch_join(
            images_and_labels, batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(image_batch, args.keep_probability,
                                         phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size,
                                         weight_decay=args.weight_decay)
        logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None,
                                      weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                      weights_regularizer=slim.l2_regularizer(args.weight_decay),
                                      scope='Logits', reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Add center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
                                                   args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch, logits=logits, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # Calculate the total losses
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder,
                      learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step,
                      total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder,
                             embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer)
    sess.close()
    return model_dir
示例#4
0
    def testContainer(self):
        """Set containers outside & inside of cond_v2.

    Make sure the containers are set correctly for both variable creation
    (tested by variables.Variable) and for stateful ops (tested by FIFOQueue)
    """
        self.skipTest("b/113048653")
        with ops.Graph().as_default() as g:
            with self.session(graph=g):

                v0 = variables.Variable([0])
                q0 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                def container(node):
                    return node.op.get_attr("container")

                self.assertEqual(compat.as_bytes(""), container(v0))
                self.assertEqual(compat.as_bytes(""), container(q0.queue_ref))

                def true_fn():
                    # When this branch is created in cond below,
                    # the container should begin with 'l1'
                    v1 = variables.Variable([1])
                    q1 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    with ops.container("l2t"):
                        v2 = variables.Variable([2])
                        q2 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    v3 = variables.Variable([1])
                    q3 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    self.assertEqual(compat.as_bytes("l1"), container(v1))
                    self.assertEqual(compat.as_bytes("l1"),
                                     container(q1.queue_ref))
                    self.assertEqual(compat.as_bytes("l2t"), container(v2))
                    self.assertEqual(compat.as_bytes("l2t"),
                                     container(q2.queue_ref))
                    self.assertEqual(compat.as_bytes("l1"), container(v3))
                    self.assertEqual(compat.as_bytes("l1"),
                                     container(q3.queue_ref))

                    return constant_op.constant(2.0)

                def false_fn():
                    # When this branch is created in cond below,
                    # the container should begin with 'l1'
                    v1 = variables.Variable([1])
                    q1 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    with ops.container("l2f"):
                        v2 = variables.Variable([2])
                        q2 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    v3 = variables.Variable([1])
                    q3 = data_flow_ops.FIFOQueue(1, dtypes.float32)

                    self.assertEqual(compat.as_bytes("l1"), container(v1))
                    self.assertEqual(compat.as_bytes("l1"),
                                     container(q1.queue_ref))
                    self.assertEqual(compat.as_bytes("l2f"), container(v2))
                    self.assertEqual(compat.as_bytes("l2f"),
                                     container(q2.queue_ref))
                    self.assertEqual(compat.as_bytes("l1"), container(v3))
                    self.assertEqual(compat.as_bytes("l1"),
                                     container(q3.queue_ref))

                    return constant_op.constant(6.0)

                with ops.container("l1"):
                    cnd_true = cond_v2.cond_v2(constant_op.constant(True),
                                               true_fn, false_fn)
                    self.assertEquals(cnd_true.eval(), 2)

                    cnd_false = cond_v2.cond_v2(constant_op.constant(False),
                                                true_fn, false_fn)
                    self.assertEquals(cnd_false.eval(), 6)

                    v4 = variables.Variable([3])
                    q4 = data_flow_ops.FIFOQueue(1, dtypes.float32)
                v5 = variables.Variable([4])
                q5 = data_flow_ops.FIFOQueue(1, dtypes.float32)

            self.assertEqual(compat.as_bytes("l1"), container(v4))
            self.assertEqual(compat.as_bytes("l1"), container(q4.queue_ref))
            self.assertEqual(compat.as_bytes(""), container(v5))
            self.assertEqual(compat.as_bytes(""), container(q5.queue_ref))
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        var_list = []

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        distribution_strategy = distribution_strategy_context.get_strategy()
        with distribution_strategy.extended.colocate_vars_with(local_anchor):
            self._local_step = variable_scope.variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=global_step.dtype.base_dtype,
                name="sync_rep_local_step")

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        with ops.name_scope(None, self._name):
            for grad, var in grads_and_vars:
                var_list.append(var)
                with ops.device(var.device):
                    # Dense gradients.
                    if grad is None:
                        aggregated_grad.append(None)  # pass-through.
                        continue
                    elif isinstance(grad, ops.Tensor):
                        grad_accum = data_flow_ops.ConditionalAccumulator(
                            grad.dtype,
                            shape=var.get_shape(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_grad(grad,
                                                  local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_grad(self._replicas_to_aggregate))
                    else:
                        if not isinstance(grad, indexed_slices.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        grad_accum = data_flow_ops.SparseConditionalAccumulator(
                            grad.dtype,
                            shape=(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_indexed_slices_grad(
                                grad, local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_indexed_slices_grad(
                                self._replicas_to_aggregate))

                    self._accumulator_list.append((grad_accum, var.device))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradients(
                    aggregated_grads_and_vars, global_step)

            # Create token queue.
            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

            with ops.device(global_step.device), ops.name_scope(""):
                # Replicas have to wait until they can get a token from the token queue.
                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    # Sync_op needs to insert tokens to the token queue at the end of the
                    # step so the replicas can fetch them to start the next step.
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    sync_token_queue, [sync_op])
            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True
            return train_op
示例#6
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    train_set = facenet.get_dataset(args.data_dir)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    if args.pretrained_model:
        print('Pre-trained model: %s' %
              os.path.expanduser(args.pretrained_model))

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 3),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 3),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(3, ), (3, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_jpeg(file_contents, channels=3)

                image = tf.image.resize_images(
                    image, [args.image_size + 22, args.image_size + 22])

                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_images(
                        image, [args.image_size, args.image_size])
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Build the inference graph
        if args.optimizer == 'MOMW' or args.optimizer == 'ADAMW' or args.optimizer == 'ADABOUND':
            print("\nNot using L2 regularization\n")
            prelogits, _ = network.inference(
                image_batch,
                args.keep_probability,
                phase_train=phase_train_placeholder,
                bottleneck_layer_size=args.embedding_size,
                weight_decay=0.0)
        else:
            prelogits, _ = network.inference(
                image_batch,
                args.keep_probability,
                phase_train=phase_train_placeholder,
                bottleneck_layer_size=args.embedding_size,
                weight_decay=args.weight_decay)
        if args.fraction != 2.0:
            print("\n Using fractional distance metric\n")
            #embeddings = lk_normalize(prelogits, axis=1, fraction=args.fraction, name='embeddings')
            embeddings = tf.identity(prelogits, name='embeddings')
        else:
            embeddings = tf.nn.l2_normalize(prelogits,
                                            1,
                                            1e-10,
                                            name='embeddings')
        # Split embeddings into anchor, positive and negative and calculate triplet loss

        anchor, positive, negative = tf.unstack(
            tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss = facenet.triplet_loss(anchor, positive, negative,
                                            args.alpha, args.fraction)

        # USING CYCLICAL LEARNING RATE
        if args.max_lr != 0:
            print("\nUsing cyclic learning rate\n")
            learning_rate = clr.cyclic_learning_rate(global_step,
                                                     learning_rate_placeholder,
                                                     max_lr=args.max_lr,
                                                     step_size=args.cycle_size,
                                                     mode=args.cycle_policy)
        else:
            print("\nUsing exponentially decaying learning rate\n")
            learning_rate = tf.train.exponential_decay(
                learning_rate_placeholder,
                global_step,
                args.learning_rate_decay_epochs * args.epoch_size,
                args.learning_rate_decay_factor,
                staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss,
                                 global_step,
                                 args.optimizer,
                                 learning_rate,
                                 args.moving_average_decay,
                                 tf.global_variables(),
                                 weight_decay=args.weight_decay)

        # Create a saver
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, train_set, epoch, image_paths_placeholder,
                      labels_placeholder, labels_batch, batch_size_placeholder,
                      learning_rate_placeholder, phase_train_placeholder,
                      enqueue_op, input_queue, global_step, embeddings,
                      total_loss, train_op, summary_op, summary_writer,
                      args.learning_rate_schedule_file, args.embedding_size,
                      anchor, positive, negative, triplet_loss, learning_rate,
                      regularization_losses)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, lfw_paths, embeddings, labels_batch,
                             image_paths_placeholder, labels_placeholder,
                             batch_size_placeholder, learning_rate_placeholder,
                             phase_train_placeholder, enqueue_op,
                             actual_issame, args.batch_size,
                             args.lfw_nrof_folds, log_dir, step,
                             summary_writer, args.embedding_size,
                             args.fraction)

    return model_dir
def main(pretrained_model: str,
         logs_base_dir: str = '~/logs/facenet',
         models_base_dir: str = '~/models/facenet',
         gpu_memory_fraction: float = 1.0,
         data_dir: str = '~/datasets/casia/casia_maxpy_mtcnnalign_182_160',
         model_def: str = 'models.inception_resnet_v1',
         max_nrof_epochs: int = 500,
         batch_size: int = 100,
         image_size: int = 160,
         epoch_size: int = 1000,
         embedding_size: int = 128,
         random_crop: bool = False,
         random_flip: bool = False,
         random_rotate: bool = False,
         use_fixed_image_standardization: bool = False,
         keep_probability: float = 1.0,
         weight_decay: float = 0.0,
         center_loss_factor: float = 0.0,
         center_loss_alfa: float = 0.95,
         prelogits_norm_loss_factor: float = 0.0,
         prelogits_norm_p: float = 1.0,
         prelogits_hist_max: float = 10.0,
         optimizer: str = 'ADAGRAD',
         learning_rate: float = 0.1,
         learning_rate_decay_epochs: int = 100,
         learning_rate_decay_factor: float = 1.0,
         moving_average_decay: float = 0.9999,
         seed: int = 666,
         nrof_preprocess_threads: int = 4,
         log_histograms: bool = False,
         learning_rate_schedule_file: str = 'data/learning_rate_schedule.txt',
         filter_filename: str = '',
         filter_percentile: float = 100.0,
         filter_min_nrof_images_per_class: int = 0,
         validate_every_n_epochs: int = 5,
         validation_set_split_ratio: float = 0.0,
         min_nrof_val_images_per_class: int = 0,
         lfw_pairs: str = 'data/pairs.txt',
         lfw_dir: str = '',
         lfw_batch_size: int = 100,
         lfw_nrof_folds: int = 10,
         lfw_distance_metric: int = 0,
         lfw_use_flipped_images: bool = False,
         lfw_subtract_mean: bool = False):
    """Train with softmax

    Arguments:
        pretrained_model {str} -- Load a pretrained model before training starts.

    Keyword Arguments:
        logs_base_dir {str} -- Directory where to write event logs. (default: {'~/logs/facenet'})
        models_base_dir {str} -- Directory where to write trained models and checkpoints. (default: {'~/models/facenet'})
        gpu_memory_fraction {float} -- Upper bound on the amount of GPU memory that will be used by the process. (default: {1.0})
        data_dir {str} -- Path to the data directory containing aligned face patches. (default: {'~/datasets/casia/casia_maxpy_mtcnnalign_182_160'})
        model_def {str} -- Model definition. Points to a module containing the definition of the inference graph. (default: {'models.inception_resnet_v1'})
        max_nrof_epochs {int} -- Number of epochs to run. (default: {500})
        batch_size {int} -- Number of images to process in a batch. (default: {100})
        image_size {int} -- Image size (height, width) in pixels. (default: {160})
        epoch_size {int} -- Number of batches per epoch. (default: {1000})
        embedding_size {int} -- Dimensionality of the embedding. (default: {128})
        random_crop {bool} -- Performs random cropping of training images. If false, the center image_size pixels from the training images are used. If the size of the images in the data directory is equal to image_size no cropping is performed (default: {False})
        random_flip {bool} -- Performs random horizontal flipping of training images. (default: {False})
        random_rotate {bool} -- Performs random rotations of training images. (default: {False})
        use_fixed_image_standardization {bool} -- Performs fixed standardization of images. (default: {False})
        keep_probability {float} -- Keep probability of dropout for the fully connected layer(s). (default: {1.0})
        weight_decay {float} -- L2 weight regularization. (default: {0.0})
        center_loss_factor {float} -- Center loss factor. (default: {0.0})
        center_loss_alfa {float} -- Center update rate for center loss. (default: {0.95})
        prelogits_norm_loss_factor {float} -- Loss based on the norm of the activations in the prelogits layer. (default: {0.0})
        prelogits_norm_p {float} -- Norm to use for prelogits norm loss. (default: {1.0})
        prelogits_hist_max {float} -- The max value for the prelogits histogram. (default: {10.0})
        optimizer {str} -- The optimization algorithm to use (default: {'ADAGRAD'})
        learning_rate {float} -- Initial learning rate. If set to a negative value a learning rate schedule can be specified in the file "learning_rate_schedule.txt" (default: {0.1})
        learning_rate_decay_epochs {int} -- Number of epochs between learning rate decay. (default: {100})
        learning_rate_decay_factor {float} -- Learning rate decay factor. (default: {1.0})
        moving_average_decay {float} -- Exponential decay for tracking of training parameters. (default: {0.9999})
        seed {int} -- Random seed. (default: {666})
        nrof_preprocess_threads {int} -- Number of preprocessing (data loading and augmentation) threads. (default: {4})
        log_histograms {bool} -- Enables logging of weight/bias histograms in tensorboard. (default: {False})
        learning_rate_schedule_file {str} -- File containing the learning rate schedule that is used when learning_rate is set to to -1. (default: {'data/learning_rate_schedule.txt'})
        filter_filename {str} -- File containing image data used for dataset filtering (default: {''})
        filter_percentile {float} -- Keep only the percentile images closed to its class center (default: {100.0})
        filter_min_nrof_images_per_class {int} -- Keep only the classes with this number of examples or more (default: {0})
        validate_every_n_epochs {int} -- Number of epoch between validation (default: {5})
        validation_set_split_ratio {float} -- The ratio of the total dataset to use for validation (default: {0.0})
        min_nrof_val_images_per_class {int} -- Classes with fewer images will be removed from the validation set (default: {0})
        lfw_pairs {str} -- The file containing the pairs to use for validation. (default: {'data/pairs.txt'})
        lfw_dir {str} -- Path to the data directory containing aligned face patches. (default: {''})
        lfw_batch_size {int} -- Number of images to process in a batch in the LFW test set. (default: {100})
        lfw_nrof_folds {int} -- Number of folds to use for cross validation. Mainly used for testing. (default: {10})
        lfw_distance_metric {int} -- Type of distance metric to use. 0: Euclidian, 1:Cosine similarity distance. (default: {0})
        lfw_use_flipped_images {bool} -- Concatenates embeddings for the image and its horizontally flipped counterpart. (default: {False})
        lfw_subtract_mean {bool} -- Subtract feature mean before calculating distance. (default: {False})

    Returns:
        [type] -- [description]
    """

    network = importlib.import_module(model_def)
    image_size = (image_size, image_size)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(logs_base_dir), subdir)
    os.makedirs(log_dir, exist_ok=True)
    model_dir = os.path.join(os.path.expanduser(models_base_dir), subdir)
    os.makedirs(model_dir, exist_ok=True)

    stat_file_name = os.path.join(log_dir, 'stat.h5')

    # Write arguments to a text file
    # facenet.write_arguments_to_file(
    #     args, os.path.join(log_dir, 'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    # src_path, _ = os.path.split(os.path.realpath(__file__))
    # facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=seed)
    random.seed(seed)
    dataset = facenet.get_dataset(data_dir)
    if filter_filename:
        dataset = filter_dataset(dataset, os.path.expanduser(filter_filename),
                                 filter_percentile,
                                 filter_min_nrof_images_per_class)

    if validation_set_split_ratio > 0.0:
        train_set, val_set = facenet.split_dataset(
            dataset, validation_set_split_ratio, min_nrof_val_images_per_class,
            'SPLIT_IMAGES')
    else:
        train_set, val_set = dataset, []
    image_list, label_list = facenet.get_image_paths_and_labels(train_set)
    val_image_list, val_label_list = facenet.get_image_paths_and_labels(
        val_set)
    assert len(image_list) > 0, 'The training set should not be empty'

    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if pretrained_model:
        pretrained_model = os.path.expanduser(pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if lfw_dir:
        print('LFW directory: %s' % lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, lfw_labels = lfw.get_paths(os.path.expanduser(lfw_dir),
                                              pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(seed)
        global_step = tf.Variable(0, trainable=False)

        # Create a queue that produces indices into the image_list and
        # label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_size,
                                                    'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name='labels')
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name='control')

        nrof_preprocess_threads = 4
        input_queue = data_flow_ops.FIFOQueue(
            capacity=2000000,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name='enqueue_op')
        image_batch, label_batch = facenet.create_input_pipeline(
            input_queue, image_size, nrof_preprocess_threads,
            batch_size_placeholder)

        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Number of classes in training set: %d' % nrof_classes)
        print('Number of examples in training set: %d' % len(image_list))

        print('Number of classes in validation set: %d' % len(val_set))
        print('Number of examples in validation set: %d' % len(val_image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(image_batch,
                                         keep_probability,
                                         phase_train=phase_train_placeholder,
                                         bottleneck_layer_size=embedding_size,
                                         weight_decay=weight_decay)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=slim.initializers.xavier_initializer(),
            weights_regularizer=slim.l2_regularizer(weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Norm for the prelogits
        eps = 1e-4
        prelogits_norm = tf.reduce_mean(
            tf.norm(tf.abs(prelogits) + eps, ord=prelogits_norm_p, axis=1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_norm * prelogits_norm_loss_factor)

        # Add center loss
        prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch,
                                                       center_loss_alfa,
                                                       nrof_classes)
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_center_loss * center_loss_factor)

        learning_rate = tf.train.exponential_decay(learning_rate_placeholder,
                                                   global_step,
                                                   learning_rate_decay_epochs *
                                                   epoch_size,
                                                   learning_rate_decay_factor,
                                                   staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        correct_prediction = tf.cast(
            tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)),
            tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters
        train_op = facenet.train(total_loss, global_step, optimizer,
                                 learning_rate, moving_average_decay,
                                 tf.global_variables(), log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            nrof_steps = max_nrof_epochs * epoch_size
            # Validate every validate_every_n_epochs as well as in the last
            # epoch
            nrof_val_samples = int(
                math.ceil(max_nrof_epochs / validate_every_n_epochs))
            stat = {
                'loss': np.zeros((nrof_steps, ), np.float32),
                'center_loss': np.zeros((nrof_steps, ), np.float32),
                'reg_loss': np.zeros((nrof_steps, ), np.float32),
                'xent_loss': np.zeros((nrof_steps, ), np.float32),
                'prelogits_norm': np.zeros((nrof_steps, ), np.float32),
                'accuracy': np.zeros((nrof_steps, ), np.float32),
                'val_loss': np.zeros((nrof_val_samples, ), np.float32),
                'val_xent_loss': np.zeros((nrof_val_samples, ), np.float32),
                'val_accuracy': np.zeros((nrof_val_samples, ), np.float32),
                'lfw_accuracy': np.zeros((max_nrof_epochs, ), np.float32),
                'lfw_valrate': np.zeros((max_nrof_epochs, ), np.float32),
                'learning_rate': np.zeros((max_nrof_epochs, ), np.float32),
                'time_train': np.zeros((max_nrof_epochs, ), np.float32),
                'time_validate': np.zeros((max_nrof_epochs, ), np.float32),
                'time_evaluate': np.zeros((max_nrof_epochs, ), np.float32),
                'prelogits_hist': np.zeros((max_nrof_epochs, 1000),
                                           np.float32),
            }
            for epoch in range(1, max_nrof_epochs + 1):
                step = sess.run(global_step, feed_dict=None)
                # Train for one epoch
                t = time.time()
                cont = train(
                    args, sess, epoch, image_list, label_list,
                    index_dequeue_op, enqueue_op, image_paths_placeholder,
                    labels_placeholder, learning_rate_placeholder,
                    phase_train_placeholder, batch_size_placeholder,
                    control_placeholder, global_step, total_loss, train_op,
                    summary_op, summary_writer, regularization_losses,
                    learning_rate_schedule_file, stat, cross_entropy_mean,
                    accuracy, learning_rate, prelogits, prelogits_center_loss,
                    random_rotate, random_crop, random_flip, prelogits_norm,
                    prelogits_hist_max, use_fixed_image_standardization)
                stat['time_train'][epoch - 1] = time.time() - t

                if not cont:
                    break

                t = time.time()
                if len(val_image_list) > 0 and (
                    (epoch - 1) % validate_every_n_epochs
                        == validate_every_n_epochs - 1
                        or epoch == max_nrof_epochs):
                    validate(args, sess, epoch, val_image_list, val_label_list,
                             enqueue_op, image_paths_placeholder,
                             labels_placeholder, control_placeholder,
                             phase_train_placeholder, batch_size_placeholder,
                             stat, total_loss, regularization_losses,
                             cross_entropy_mean, accuracy,
                             validate_every_n_epochs,
                             use_fixed_image_standardization)
                stat['time_validate'][epoch - 1] = time.time() - t

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, epoch)

                # Evaluate on LFW
                t = time.time()
                if lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder,
                             labels_placeholder, phase_train_placeholder,
                             batch_size_placeholder, control_placeholder,
                             embeddings, label_batch, lfw_paths, lfw_labels,
                             lfw_batch_size, lfw_nrof_folds, log_dir, step,
                             summary_writer, stat, epoch, lfw_distance_metric,
                             lfw_subtract_mean, lfw_use_flipped_images,
                             use_fixed_image_standardization)
                stat['time_evaluate'][epoch - 1] = time.time() - t

                print('Saving statistics')
                with h5py.File(stat_file_name, 'w') as f:
                    for key, value in stat.iteritems():
                        f.create_dataset(key, data=value)

    return model_dir
示例#8
0
def main():
    model_def = 'inception_resnet_v1'
    network = importlib.import_module(model_def)
    gpu_memory_fraction = 0.96
    # Learning params
    FC_SIZE = 1
    learning_rate = 0.0001
    batch_size = 8
    epoch_size = 2
    keep_probability = 1.0
    weight_decay = 0.0

    # Read the file containing the pairs used for testing
    pairs_train, labels_train, labels_sigle_train = read_pairs_path_label_test(
        image_directory, train_filename)
    pairs_validation, labels_validation, labels_sigle_validation = read_pairs_path_label_test(
        image_directory, validation_filename)
    with tf.Graph().as_default():
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 2),
                                                 name='image_path')  #
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 2),
                                            name='labels')  #

        # read image path
        input_queue = data_flow_ops.FIFOQueue(capacity=300000,
                                              dtypes=[tf.string, tf.int32],
                                              shapes=[(2, ), (2, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 3
        images_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, labels = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)
                # pylint: disable=no-member
                image.set_shape((224, 224, 3))
                images.append(tf.image.per_image_standardization(image))
            images_labels.append([images, labels])

        image_batch, labels_batch = tf.train.batch_join(
            images_labels,
            batch_size=batch_size_placeholder,
            shapes=[(224, 224, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Build the graph
        prelogits, _ = network.inference(image_batch,
                                         keep_probability,
                                         phase_train=phase_train_placeholder,
                                         bottleneck_layer_size=1,
                                         weight_decay=weight_decay)

        predictions = tf.nn.l2_normalize(prelogits,
                                         1,
                                         1e-10,
                                         name='embeddings')
        # # model_ = Model(inputs=resnet.input, outputs=predictions)

        preds_left, preds_right = tf.unstack(
            tf.reshape(predictions, [-1, 2, 1]), 2, 1)
        labels_1, _ = tf.unstack(tf.reshape(labels_batch, [-1, 2, 1]), 2, 1)
        difference = tf.sigmoid(tf.subtract(preds_left, preds_right))
        loss = log_loss_(labels_1, difference)  #
        # optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss)
        train_op = _train_op(loss, 'MOM', learning_rate, tf.global_variables())
        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        # Initialize variables
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)
        with sess.as_default():
            # Training and validation loop
            max_nrof_epochs = 30
            epoch = 0
            while epoch < max_nrof_epochs:
                batch_number = 0
                nrof_pairs = len(pairs_train)
                while batch_number < epoch_size:
                    nrof_batches = int(np.ceil(nrof_pairs * 2 / batch_size))
                    pair_paths = list(itertools.chain(*pairs_train))
                    labels_array = np.reshape(labels_train, (-1, 2))
                    pair_paths_array = np.reshape(
                        np.expand_dims(np.array(pair_paths), 1), (-1, 2))
                    sess.run(enqueue_op,
                             feed_dict={
                                 image_paths_placeholder: pair_paths_array,
                                 labels_placeholder: labels_array
                             })
                    nrof_examples = len(pair_paths)
                    train_time = 0
                    batch_err = 0
                    batch_preds = 0
                    i = 0
                    while i < nrof_batches:
                        start_time = time.time()
                        batch_size = min(nrof_examples - i * batch_size,
                                         batch_size)
                        # preds = sess.run(predictions, feed_dict={batch_size_placeholder:batch_size})
                        # err, _ = sess.run([loss, train_op], feed_dict={batch_size_placeholder:batch_size, phase_train_placeholder: True})
                        err, _, preds, lab = sess.run(
                            [loss, train_op, predictions, labels_batch],
                            feed_dict={
                                batch_size_placeholder: batch_size,
                                phase_train_placeholder: True
                            })
                        duration = time.time() - start_time
                        i += 1
                        train_time += duration
                        batch_err += err
                        batch_preds += preds
                    print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\t' %
                          (epoch, batch_number + 1, epoch_size, train_time,
                           batch_err / i))
                    batch_number += 1
                # ### validate ##########
                nro_size = pairs_validation.shape[0]
                labels_array = np.reshape(labels_validation, (-1, 2))
                image_paths_array = np.reshape(
                    np.expand_dims(np.array(pairs_validation), 1), (-1, 2))
                sess.run(enqueue_op,
                         feed_dict={
                             image_paths_placeholder: image_paths_array,
                             labels_placeholder: labels_array
                         })

                # for i in xrange(nrof_batches):
                #     batch_size = min(nrof_images - i * batch_size, batch_size)
                #     emb, lab = sess.run([embeddings, labels_batch], feed_dict={batch_size_placeholder: batch_size})
                feed_dict = {
                    batch_size_placeholder: nro_size,
                    phase_train_placeholder: False
                }
                err, _, preds, lab = sess.run(
                    [loss, train_op, predictions, labels_batch],
                    feed_dict=feed_dict)
                y = np.reshape(lab, (lab.shape[0], 1))
                accuracy = compute_accuracy(preds, y)
                print('epoch %d Accuracy validate set %0.2f' %
                      (epoch, accuracy))

                epoch += 1
def main(args):
    #Importing network based on model.def path
    network = importlib.import_module(args.model_def)
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)

    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    #Create a model file directory
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    palmPrintsNet.write_arguments_to_file(
        args, os.path.join(log_dir, 'arguments.txt'))
    src_path, _ = os.path.split(os.path.realpath(__file__))
    palmPrintsNet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = palmPrintsNet.get_dataset(args.data_dir)
    #Total class's number
    nrof_classes = len(train_set)
    print('model_path: %s' % model_dir)
    print('log_path: %s' % log_dir)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)
        image_list, label_list = palmPrintsNet.get_image_paths_and_labels(
            train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]

        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)

        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=1)
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])
        image_batch, label_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(160, 160, 1), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')
        print('the number of class: %d' % nrof_classes)
        print('the number of image: %d' % len(image_list))

        #perlogits is a 128-v feature vector
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        #center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = palmPrintsNet.center_loss(
                prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES,
                prelogits_center_loss * args.center_loss_factor)
        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)
        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)
        #total loss
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        #train
        train_op = palmPrintsNet.train(total_loss, global_step, args.optimizer,
                                       learning_rate,
                                       args.moving_average_decay,
                                       tf.global_variables(),
                                       args.log_histograms)
        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)
        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)
        with sess.as_default():
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size

                train(args, sess, epoch, image_list, label_list,
                      index_dequeue_op, enqueue_op, image_paths_placeholder,
                      labels_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, batch_size_placeholder,
                      global_step, total_loss, train_op, summary_op,
                      summary_writer, regularization_losses,
                      args.learning_rate_schedule_file)

                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)
    return model_dir
示例#10
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    train_set_ID = facenet.get_dataset(args.data_dir_ID)
    train_set_camera = facenet.get_dataset(args.data_dir_camera)

    logger.info('Model directory: %s' % model_dir)
    logger.info('Log directory: %s' % log_dir)
    if args.pretrained_model:
        logger.info('Pre-trained model: %s' %
                    os.path.expanduser(args.pretrained_model))

    if args.lfw_dir:
        logger.info('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    # associative, fengchen
    assoc = Associative(network, args)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder_ID = tf.placeholder(tf.string,
                                                    shape=(None, 3),
                                                    name='image_paths_ID')
        image_paths_placeholder_camera = tf.placeholder(
            tf.string, shape=(None, 3), name='image_paths_camera')
        image_paths_placeholder_valid = tf.placeholder(
            tf.string, shape=(None, 3), name='image_paths_valid')
        labels_placeholder_ID = tf.placeholder(tf.int64,
                                               shape=(None, 3),
                                               name='labels_ID')
        labels_placeholder_camera = tf.placeholder(tf.int64,
                                                   shape=(None, 3),
                                                   name='labels_camera')
        labels_placeholder_valid = tf.placeholder(tf.int64,
                                                  shape=(None, 3),
                                                  name='labels_valid')
        input_queue_ID = data_flow_ops.FIFOQueue(capacity=100000,
                                                 dtypes=[tf.string, tf.int64],
                                                 shapes=[(3, ), (3, )],
                                                 shared_name=None,
                                                 name=None)
        input_queue_camera = data_flow_ops.FIFOQueue(
            capacity=100000,
            dtypes=[tf.string, tf.int64],
            shapes=[(3, ), (3, )],
            shared_name=None,
            name=None)
        input_queue_valid = data_flow_ops.FIFOQueue(
            capacity=100000,
            dtypes=[tf.string, tf.int64],
            shapes=[(3, ), (3, )],
            shared_name=None,
            name=None)
        enqueue_op_ID = input_queue_ID.enqueue_many(
            [image_paths_placeholder_ID, labels_placeholder_ID])
        enqueue_op_camera = input_queue_camera.enqueue_many(
            [image_paths_placeholder_camera, labels_placeholder_camera])
        enqueue_op_valid = input_queue_valid.enqueue_many(
            [image_paths_placeholder_valid, labels_placeholder_valid])
        nrof_preprocess_threads = 4

        images_and_labels_ID = []
        images_and_labels_camera = []
        images_and_labels_valid = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue_ID.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)

                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels_ID.append([images, label])

        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue_camera.dequeue()

            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)

                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                # pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels_camera.append([images, label])

        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue_valid.dequeue()

            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)

                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                # pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels_valid.append([images, label])

        image_batch_ID, labels_batch_ID = tf.train.batch_join(
            images_and_labels_ID,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch_ID = tf.identity(image_batch_ID, 'image_batch_ID')
        image_batch_ID = tf.identity(image_batch_ID, 'input_ID')
        labels_batch_ID = tf.identity(labels_batch_ID, 'label_batch_ID')

        image_batch_camera, labels_batch_camera = tf.train.batch_join(
            images_and_labels_camera,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch_camera = tf.identity(image_batch_camera,
                                         'image_batch_camera')
        image_batch_camera = tf.identity(image_batch_camera, 'input_camera')
        labels_batch_camera = tf.identity(labels_batch_camera,
                                          'label_batch_camera')

        image_batch_valid, labels_batch_valid = tf.train.batch_join(
            images_and_labels_valid,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)

        image_batch_valid = tf.identity(image_batch_valid, 'image_batch_valid')
        labels_batch_valid = tf.identity(labels_batch_valid,
                                         'label_batch_valid')

        # Build the inference graph
        prelogits_ID, _, _, _, _ = network.inference(
            image_batch_ID,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        prelogits_camera, _, _, _, _ = network.inference(
            image_batch_camera,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        prelogits_valid, _, _, _, _ = network.inference(
            image_batch_valid,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)

        embeddings_ID = tf.nn.l2_normalize(prelogits_ID,
                                           1,
                                           1e-10,
                                           name='embeddings_ID')
        embeddings_camera = tf.nn.l2_normalize(prelogits_camera,
                                               1,
                                               1e-10,
                                               name='embeddings_camera')
        embeddings_valid = tf.nn.l2_normalize(prelogits_valid,
                                              1,
                                              1e-10,
                                              name='embeddings_valid')

        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor_ID, positive_ID, negative_ID = tf.unstack(
            tf.reshape(embeddings_ID, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss_ID = facenet.triplet_loss(anchor_ID, positive_ID,
                                               negative_ID, args.alpha)

        anchor_camera, positive_camera, negative_camera = tf.unstack(
            tf.reshape(embeddings_camera, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss_camera = facenet.triplet_loss(anchor_camera,
                                                   positive_camera,
                                                   negative_camera, args.alpha)

        images_mmd_ID, images_mmd_camera, _, _ = assoc.get_image_and_label()
        feature_map3_ID, _, _, _, _ = network.inference(
            images_mmd_ID,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        feature_map3_camera, _, _, _, _ = network.inference(
            images_mmd_camera,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)

        feature_map3_ID = tf.nn.l2_normalize(feature_map3_ID,
                                             1,
                                             1e-10,
                                             name='feature_map3_ID')
        feature_map3_camera = tf.nn.l2_normalize(feature_map3_camera,
                                                 1,
                                                 1e-10,
                                                 name='feature_map3_camera')
        loss_feature_map3 = losses.mmd_loss(feature_map3_ID,
                                            feature_map3_camera)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # # associative, fengchen
        # associative_loss = assoc.loss() * 10

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)

        triplet_loss = tf.add_n([triplet_loss_ID] + [triplet_loss_camera] +
                                regularization_losses,
                                name='triplet_loss')

        # associative, fengchen
        loss_total = tf.add_n([triplet_loss_ID] + [triplet_loss_camera] +
                              [loss_feature_map3] + regularization_losses,
                              name='loss_total')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(loss_total, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables())
        train_op_triplet = facenet.train(triplet_loss, global_step,
                                         args.optimizer, learning_rate,
                                         args.moving_average_decay,
                                         tf.global_variables())

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if args.pretrained_model:
                logger.info('Restoring pretrained model: %s' %
                            args.pretrained_model)
                saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, train_set_ID, train_set_camera, epoch,
                      image_paths_placeholder_ID,
                      image_paths_placeholder_camera, labels_placeholder_ID,
                      labels_placeholder_camera, labels_batch_ID,
                      labels_batch_camera, batch_size_placeholder,
                      learning_rate_placeholder, phase_train_placeholder,
                      enqueue_op_ID, enqueue_op_camera, global_step,
                      embeddings_ID, embeddings_camera, triplet_loss,
                      loss_total, triplet_loss_ID, triplet_loss_camera,
                      loss_feature_map3, regularization_losses, train_op,
                      train_op_triplet, summary_writer,
                      args.learning_rate_schedule_file, args.embedding_size)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, lfw_paths, embeddings_valid,
                             labels_batch_valid, image_paths_placeholder_valid,
                             labels_placeholder_valid, batch_size_placeholder,
                             learning_rate_placeholder,
                             phase_train_placeholder, enqueue_op_valid,
                             actual_issame, args.batch_size,
                             args.lfw_nrof_folds, log_dir, step,
                             summary_writer, args.embedding_size)

    return model_dir
示例#11
0
def main(data_dir, log_dir, model_dir, batch_size_train, epoch_size_train):
    """
    main process for tripless lot train
    :return:
    """
    batch_size_train = int(batch_size_train)
    epoch_size_train = int(epoch_size_train)
    network = importlib.import_module(constants.MODEL_DEF_DEFAULT)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')
    logdir = log_dir
    print(logdir)

    if not os.path.isdir(logdir):
        os.makedirs(logdir)

    #model_dir = os.path.join(os.path.expanduser(constants.MODELS_BASE_DIR_DEFAULT), subdir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    np.random.seed(seed=constants.SEED_DEFAULT)

    # Load traing set
    print('Data dir : {0}'.format(data_dir))
    train_set = face_net.load_dataset(data_dir)

    print('Model dir : {0}'.format(model_dir))
    print('Log dir : {0}'.format(logdir))

    # TODO load pre-trained model

    # Load LFW
    if constants.LFW_DIR_DEFAULT:
        print('LFW dir : {0}'.format(constants.LFW_DIR_DEFAULT))
        # Read the file which contain the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(constants.LFW_PAIRS_DEFAULT))
        lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(constants.LFW_DIR_DEFAULT), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(constants.SEED_DEFAULT)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='labels')
        labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(3,), (3,)],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder])

        nrof_preprocess_thread = constants.LFW_NROF_PREPROCESS_THREAD_DEFAULT
        images_and_labels = []
        for i in range(nrof_preprocess_thread):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_content = tf.read_file(filename)
                image = tf.to_float(tf.image.decode_image(file_content, channels=3))

                if constants.RANDOM_CROP_DEFAULT:
                    image = tf.random_crop(image, [constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(image, constants.IMAGE_SIZE, constants.IMAGE_SIZE)

                if constants.RANDOM_FLIP_DEFAULT:
                    image = tf.image.random_flip_left_right(image)

                # Pylink : disable=no-member
                image.set_shape((constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3))
                images.append(image)
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_thread * batch_size_train,
            allow_smaller_final_batch=True)

        # TODO something strange
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Buld the inference graph
        prelogits, _ = network.inference(image_batch, constants.KEEP_PROBABILITY_DEFAULT,
                                         phase_train=phase_train_placeholder,
                                         bottleneck_layer_size=constants.EMBEDDING_SIZE_DEFAULT,
                                         weight_decay=constants.WEIGHT_DECAY_DEFAULT)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1, 3, constants.EMBEDDING_SIZE_DEFAULT]), 3, 1)
        triplet_loss = face_net.triplet_lot(anchor, positive, negative, constants.ALPHA_DEFAULT)

        learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
                                                   constants.LEARNING_RATE_DECAY_EPOCHS_DEFAULT * epoch_size_train,
                                                   constants.LEARNING_RATE_DECAY_EPOCHS_FACTOR, staircase=True)
        tf.summary.scalar('learing_rate', learning_rate)

        # Calculate the total losses
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_loss, name='total_loss')

        # Build a Graph that train the model with one batch of example and update the model parameter
        train_op = face_net.train(total_loss, global_step, constants.OPTIMIZER_DEFAULT, learning_rate,
                                  constants.MOVING_AVERAGE_DECAY_DEFAULT, tf.global_variables(), log_dir)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of summaries
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=constants.GPU_MEMORY_FRACTION_DEFAULT)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(logdir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():
            if constants.PRETRAINED_MODEL_DEFAULT:
                print('Restoring pretrained model: {0}'.format(constants.PRETRAINED_MODEL_DEFAULT))
                saver.restore(sess, os.path.expanduser(constants.PRETRAINED_MODEL_DEFAULT))

            # Training and validation loop
            epoch = 0
            while epoch < constants.MAX_NROF_EPOCHS_DEFAULT:
                step = sess.run(global_step, feed_dict=None)
                epoch = step / epoch_size_train
                print('epoch for train {0}'.format(epoch))

                # Training for each one epoch
                train(sess, batch_size_train, epoch_size_train, train_set, epoch, image_paths_placeholder, labels_placeholder,
                      labels_batch, batch_size_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, enqueue_op, input_queue, global_step,
                      embeddings, total_loss, train_op, summary_op, summary_writer,
                      constants.LEARNING_RATE_SCHEDULE_FILE_DEFAULT, constants.EMBEDDING_SIZE_DEFAULT,
                      anchor, positive, negative, triplet_loss)

                # Save variables and the metagraph if it does not exist already
                save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

                # Evaluate on LW
                if constants.LFW_DIR_DEFAULT:
                    evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder,
                             batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op,
                             actual_issame, batch_size_train, constants.LFW_NROF_FOLDS_DEFAULT, logdir, step,
                             summary_writer, constants.EMBEDDING_SIZE_DEFAULT)

    print('End main process.')
    return model_dir
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")

    if global_step is None:
      raise ValueError("Global step is required to check staleness")

    self._global_step = global_step
    train_ops = []
    aggregated_grad = []
    inputs = []
    var_list = []
    for x in grads_and_vars:
      inputs.extend(list(x))

    with ops.device(global_step.device):
      self._local_steps = variables.Variable(
          array_ops.zeros(
              [self._total_num_replicas],
              dtype=global_step.dtype),
          trainable=False,
          name="local_steps")

    # Check staleness. Note that this has to be ref(), otherwise identity will
    # be accessed and it will be old values.
    local_step = array_ops.slice(self._local_steps.ref(),
                                 array_ops.reshape(self._replica_id, (1,)),
                                 [1],
                                 name="get_local_step")
    local_step = array_ops.reshape(local_step, ())
    is_stale = math_ops.less(local_step, global_step)

    with ops.op_scope(inputs, None, self._name):
      for grad, var in grads_and_vars:
        var_list.append(var)
        with ops.device(var.device):
          if isinstance(grad, ops.Tensor):
            gradient_queue = (data_flow_ops.FIFOQueue(self._tokens_per_step * 2,
                                                      grad.dtype,
                                                      shapes=var.get_shape(),
                                                      shared_name=var.name))
            self._one_element_queue_list.append((gradient_queue, var.device))
            train_ops.append(gradient_queue.enqueue([grad]))

            # Aggregate all gradients
            gradients = gradient_queue.dequeue_many(
                self._replicas_to_aggregate)
            aggregated_grad.append(math_ops.reduce_sum(gradients, [0]))
          elif grad is None:
            aggregated_grad.append(None)  # pass-through.
          else:
            if not isinstance(grad, ops.IndexedSlices):
              raise ValueError("Unknown grad type!")
            aggregated_grad.append(self._aggregate_sparse_grad(grad, var,
                                                               train_ops))

      aggregated_grads_and_vars = zip(aggregated_grad, var_list)

      # sync_op will be assigned to the same device as the global step.
      with ops.device(global_step.device), ops.name_scope(""):
        update_op = self._opt.apply_gradients(aggregated_grads_and_vars,
                                              global_step)

      # Create token queue.
      with ops.device(global_step.device), ops.name_scope(""):
        sync_token_queue = (
            data_flow_ops.FIFOQueue(-1,
                                    global_step.dtype.base_dtype,
                                    shapes=(),
                                    shared_name="sync_token_q"))
        self._sync_token_queue = sync_token_queue

        # dummy_queue is passed to the queue runner. Don't use the real queues
        # because the queue runner doesn't automatically reopen it once it
        # closed queues in PS devices.
        dummy_queue = (
            data_flow_ops.FIFOQueue(1,
                                    types_pb2.DT_INT32,
                                    shapes=(),
                                    shared_name="dummy_queue"))
      # Clear all the gradients queues in case there are stale gradients.
      clear_queue_ops = []
      with ops.control_dependencies([update_op]):
        for queue, dev in self._one_element_queue_list:
          with ops.device(dev):
            stale_grads = queue.dequeue_many(queue.size())
            clear_queue_ops.append(stale_grads)

        for queue, dev in self._sparse_grad_queues_and_devs:
          with ops.device(dev):
            _, stale_indices = queue.dequeue_many(queue.size())
            clear_queue_ops.append(stale_indices)

      with ops.device(global_step.device):
        self._clean_up_op = control_flow_ops.abort(
            error_msg="From sync_replicas")

      # According to the staleness, select between the enqueue op (real_grad)
      # or no-op (no_op_grad). Effectively dropping all the stale gradients.
      no_op_grad = lambda: [control_flow_ops.no_op(name="no_grad_enqueue")]
      real_grad = lambda: [control_flow_ops.group(*train_ops)]
      final_train_ops = control_flow_ops.cond(is_stale, no_op_grad, real_grad)

      with ops.device(global_step.device), ops.name_scope(""):
        # Replicas have to wait until they can get a token from the token queue.
        with ops.control_dependencies([final_train_ops]):
          token = sync_token_queue.dequeue()
          train_op = state_ops.scatter_update(self._local_steps,
                                              self._replica_id, token)

        with ops.control_dependencies(clear_queue_ops):
          # Sync_op needs to insert tokens to the token queue at the end of the
          # step so the replicas can fetch them to start the next step.
          # Note that ref() is used to avoid reading from the identity with old
          # the step.
          tokens = array_ops.fill([self._tokens_per_step], global_step.ref())
          sync_op = sync_token_queue.enqueue_many((tokens,))

        if self._variable_averages is not None:
          with ops.control_dependencies([sync_op]), ops.name_scope(""):
            sync_op = self._variable_averages.apply(
                self._variables_to_average)

        self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue,
                                                            [sync_op])
        self._gradients_applied = True
        return train_op
示例#13
0
 def simple_fn(x):
   del x
   queue = data_flow_ops.FIFOQueue(10, dtypes.int32, ())
   # Blocking
   return queue.dequeue_many(5)
示例#14
0
def main(args):

    network = importlib.import_module(args.model_def)
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = facenet.get_dataset(args.data_dir)

    if args.filter_filename:
        train_set = filter_dataset(train_set,
                                   os.path.expanduser(args.filter_filename),
                                   args.filter_percentile,
                                   args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The dataset should not be empty'
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)
        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=256000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder], name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.cast(
                    tf.image.decode_image(file_contents, channels=3),
                    tf.float32)
                # if args.random_crop:
                #     image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                #     #image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                # else:
                #     image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)
                #image = tf.image.random_brightness(image,max_delta=30)
                #image = tf.image.random_contrast(image,lower=0.8,upper=1.2)
                #image = tf.image.random_saturation(image,lower=0.8,upper=1.2)
                image.set_shape((112, 96, 3))
                images.append(tf.subtract(image, 127.5) * 0.0078125)
            images_and_labels.append([images, label])

        image_batch, label_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(112, 96, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))
        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        AM_logits = AM_logits_compute(embeddings, label_batch, args,
                                      nrof_classes)
        #AM_logits = Arc_logits(embeddings, label_batch, args, nrof_classes)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=AM_logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')

        #print('test',tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

        for weights in slim.get_variables_by_name('kernel'):
            kernel_regularization = tf.contrib.layers.l2_regularizer(
                args.weight_decay)(weights)
            print(weights)
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                 kernel_regularization)

        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)

        if args.weight_decay == 0:
            total_loss = tf.add_n([cross_entropy_mean], name='total_loss')
        else:
            total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                                  name='total_loss')
        tf.add_to_collection('losses', total_loss)

        #define two saver in case under 'finetuning on different dataset' situation
        saver_load = tf.train.Saver(tf.trainable_variables(), max_to_keep=1)
        saver_save = tf.train.Saver(tf.trainable_variables(), max_to_keep=1)

        #train_op = facenet.train(total_loss, global_step, args.optimizer,
        #    learning_rate, args.moving_average_decay, tf.trainable_variables(), args.log_histograms)
        #train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss,global_step = global_step,var_list=tf.trainable_variables())
        train_op = tf.train.MomentumOptimizer(
            learning_rate,
            momentum=0.9).minimize(total_loss,
                                   global_step=global_step,
                                   var_list=tf.trainable_variables())
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():
            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver_load.restore(sess, pretrained_model)

            print('Running training')
            epoch = 0
            best_accuracy = 0.0

            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                train(args, sess, epoch, image_list, label_list,
                      index_dequeue_op, enqueue_op, image_paths_placeholder,
                      labels_placeholder, learning_rate_placeholder,
                      phase_train_placeholder, batch_size_placeholder,
                      global_step, total_loss, train_op, summary_op,
                      summary_writer, regularization_losses,
                      args.learning_rate_schedule_file)

                print('validation running...')
                if args.lfw_dir:
                    #best_accuracy = evaluate_double(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings,
                    #	label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer,best_accuracy, saver_save,model_dir,subdir,image_batch,args)

                    best_accuracy = evaluate(
                        sess, enqueue_op, image_paths_placeholder,
                        labels_placeholder, phase_train_placeholder,
                        batch_size_placeholder, embeddings, label_batch,
                        lfw_paths, actual_issame, args.lfw_batch_size,
                        args.lfw_nrof_folds, log_dir, step, summary_writer,
                        best_accuracy, saver_save, model_dir, subdir)
    return model_dir
示例#15
0
  def apply_gradients(self, grads_and_vars, worker_id, global_step=None, name=None, collect_cdfs=False):
    """Apply gradients to variables.
    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.
    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.
    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.
    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")

    if global_step is None:
      raise ValueError("Global step is required to check staleness")

    self._global_step = global_step
    train_ops = []
    aggregated_grad = []
    var_list = []

#      worker_id_list_printer = logging_ops.Print(global_step,
#                  [a for a in self._worker_idx_list] + [worker_id] + [global_step],
#                  message="Worker ID list status")
#      train_ops.append(worker_id_list_printer)

    self._local_step = variables.Variable(
        initial_value=0,
        trainable=False,
        collections=[ops.GraphKeys.LOCAL_VARIABLES],
        dtype=global_step.dtype.base_dtype,
        name="sync_rep_local_step")
    self.local_step_init_op = state_ops.assign(self._local_step, global_step._ref())
    chief_init_ops = [self.local_step_init_op]
    self.ready_for_local_init_op = variables.report_uninitialized_variables(
      variables.all_variables())

    # The wait op waits for the current worker to dequeue a token from its respective token queue
    self._wait_op = self._sync_token_queues[worker_id].dequeue()

    # Replicas have to wait until they can get a token from the token queue
    # BEFORE begining to compute gradients.
    with ops.device(global_step.device):
      queue_size = self._sync_token_queues[worker_id].size()
      update_local_step_op = state_ops.assign(self._local_step, global_step._ref())

    # Gradient accum creation
    with ops.name_scope(None, self._name):
      worker_idx_list = []
      worker_counter = 0
      for grad, var in grads_and_vars:
        var_list.append(var)
        tf.logging.info("Grad " + str(grad) + " assigned to " + str(var.device))
        with ops.device(var.device):
          if grad is None:
            continue
          elif isinstance(grad, ops.Tensor):
            grad_accum = data_flow_ops.ConditionalAccumulator(
              grad.dtype,
              shape=var.get_shape(),
              shared_name=var.name + "/grad_accum")
          else:
            if not isinstance(grad, ops.IndexedSlices):
              raise ValueError("Unknown grad type!")
            grad_accum = data_flow_ops.SparseConditionalAccumulator(
              grad.dtype, shape=(), shared_name=var.name + "/grad_accum")
          self._accumulator_list.append((grad_accum, var))

      with ops.device(global_step.device):
        worker_idx_list.append(worker_id)
        worker_counter += 1
        worker_id_list_printer = logging_ops.Print(global_step,
                  [a for a in worker_idx_list] + [worker_id] + [global_step],
                  message="Worker ID list status")
        train_ops.append(worker_id_list_printer)
        counter_printer = logging_ops.Print(global_step, [worker_counter], message="Test for the counter")
        train_ops.append(counter_printer)

      """# Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_grad(grad,
                                                     local_step=self._local_step._ref()))

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step._ref()))"""

      # Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          print_start_op = logging_ops.Print(global_step, [global_step], message="Starting to apply grads for variable %d" % index)
          train_ops.append(print_start_op)
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              with ops.control_dependencies([print_start_op]):               
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_grad(grad,
                                                        local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    accum_sizes_printer = logging_ops.Print(global_step,
                          [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                          message="Accum aggregated status")
                    ret = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison),
                           lambda: tf.constant(1), lambda: tf.constant(0))
                    notification_printer = logging_ops.Print(global_step, [ret], message="should stop notification")
                    train_ops.append(notification_printer)
                    '''else:
                      notification_printer = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification")
                      train_ops.append(notification_printer)'''
                    train_ops.append(accum_sizes_printer)
#                    worker_id_list_printer = logging_ops.Print(global_step,
#                          [len(self._worker_list)] + [worker_id] + [global_step],
#                          message="Worker ID list status")
#                    train_ops.append(worker_id_list_printer)
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              with ops.control_dependencies([print_start_op]):
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_indexed_slices_grad(
                    grad, local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    accum_sizes_printer_parse = logging_ops.Print(global_step,
                          [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                          message="Accum aggregated status")
                    ret_sparse = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison),
                                  lambda: tf.constant(1), lambda: tf.constant(0))
                    notification_printer_sparse = logging_ops.Print(global_step, [ret_sparse], message="should stop notification")
                    train_ops.append(notification_printer_sparse)
                    #else:
                    #  notification_printer_sparse = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification")
                    #  train_ops.append(notification_printer_sparse)                      
                    train_ops.append(accum_sizes_printer_parse)
#                    worker_id_list_printer_sparse = logging_ops.Print(global_step,
#                          [len(self._worker_list)] + [worker_id] + [global_step],
#                          message="Worker ID list status")
#                    train_ops.append(worker_id_list_printer_sparse)                    
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)         
            with ops.control_dependencies([apply_grad_op]):          
              accum_sizes_printer = logging_ops.Print(global_step,
                                                   [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                                                   message="Accum aggregated status on ps")
              train_ops.append(accum_sizes_printer)

      # Phase 2 gradient applying
      for index, (grad, var) in enumerate(grads_and_vars):
        with ops.device(var.device):
          grad_accum = self._accumulator_list[index][0]
          if grad is None:
            aggregated_grad.append(None)
          elif isinstance(grad, ops.Tensor):
            if collect_cdfs:
              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
            else:
              aggregated_grad.append(grad_accum.take_grad(1))
          else:
            if collect_cdfs:
              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
            else:
              aggregated_grad.append(grad_accum.take_indexed_slices_grad(1))

      aggregated_grads_and_vars = zip(aggregated_grad, var_list)

      # Some debug operations
      self.print_sizes = logging_ops.Print(global_step, [self._sync_token_queues[i].size() for i in range(self._total_num_replicas)], message="queue sizes")
      self.print_accum_sizes = logging_ops.Print(self._local_step,
                                                 [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id],
                                                 message="Accum sizes")
      self.print_local_step = logging_ops.Print(self._local_step, [self._local_step._ref(), global_step._ref()], message="local vs global step")

      # sync_op will be assigned to the same device as the global step.
      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies([self.print_accum_sizes]):
          update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step)
          self._update_op = update_op
          with ops.control_dependencies([update_op]):
            sync_op = []
            for cur_worker_id in range(self._total_num_replicas):
              sync_op.append(self._sync_token_queues[cur_worker_id].enqueue(global_step))
            sync_op = control_flow_ops.group(*(sync_op))

        # dummy_queue is passed to the queue runner. Don't use the real queues
        # because the queue runner doesn't automatically reopen it once it
        # closed queues in PS devices.
        dummy_queue = (
            data_flow_ops.FIFOQueue(1,
                                    types_pb2.DT_INT32,
                                    shapes=(),
                                    shared_name="dummy_queue"))

        self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue,
                                                            [sync_op])

      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies(train_ops):
          # Worker finished applying gradients. Add token to phase1_finished_queue
          train_op = logging_ops.Print(self._local_step._ref(),
                                       [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                                       message="Finished worker updates",
                                       name="FinishedWorkerUpdatesPrint")

      for accum, var in self._accumulator_list:
        with ops.device(var.device):
          chief_init_ops.append(
              accum.set_global_step(
                  global_step, name="SetGlobalStep"))
      self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
      self._gradients_applied = True

      return train_op
示例#16
0
def main(args):
    """basic info"""
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), "logs")
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.logs_base_dir), "models")
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    argumentfile = os.path.join(log_dir, 'arguments.txt')
    utils.write_arguments_to_file(args, argumentfile)
    src_path, _ = os.path.split(os.path.realpath(__file__))
    # utils.store_revision_info(src_path, log_dir, ' '.join(sys.argv))
    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    if args.pretrained_model:
        print('Pre-trained model: %s' %
              os.path.expanduser(args.pretrained_model))
    """trainset"""
    # np.random.seed(seed=args.seed)
    train_set = utils.get_dataset(args.data_path, classnum, argumentfile)
    """开启图"""
    with tf.Graph().as_default():
        # tf.set_random_seed(args.seed)
        """占位符"""
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 1),
                                            name='labels')
        """通过队列读取批量数据"""
        input_queue = data_flow_ops.FIFOQueue(capacity=1000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(1, ), (1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 17
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()  # 路径、标签出队
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(
                    file_contents,
                    channels=1)  # uint8 with shape [h, w, num_channels]

                image = tf.image.crop_to_bounding_box(image, 0, 0, 850, 2100)
                image = tf.image.resize_images(
                    image, (args.image_size_h, args.image_size_w), method=1)

                if args.transform:
                    if random.random() < 1:
                        height, width, d = image.shape
                        mask = utils.geterasebox(height, width)
                        image = tf.multiply(image, mask)

                image = tf.cast(image, tf.float32)
                images.append(image)
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size_h, args.image_size_w, 1), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=False)
        tf.summary.image("trainimage", image_batch, max_outputs=16)

        # 阶梯下降学习率
        with tf.name_scope("lr"):
            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf.train.piecewise_constant(global_step,
                                                        boundaries=boundaries,
                                                        values=learning_rates)
        """运行网络 输出特征向量"""
        model = DentNet_ATT.DentNet_ATT(image_batch, classnum, True)
        embeddings = model.dropout2
        Cosin_logits = cosineface_losses(embedding=embeddings,
                                         labels=labels_batch,
                                         out_num=classnum)
        # loss
        with tf.name_scope("loss"):
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels_batch,
                logits=Cosin_logits,
                name='cross_entropy_per_example')
            cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                                name='cross_entropy')
            L2_loss = tf.add_n(
                [tf.nn.l2_loss(var)
                 for var in tf.trainable_variables()]) * 0.001  #0.001需要对比设置
            total_loss = tf.add_n([cross_entropy_mean + L2_loss * weightdecay],
                                  name='total_loss')

        # optimizer
        with tf.name_scope('optimizer'):
            if args.opt == 'ADAGRAD':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif args.opt == 'ADADELTA':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate,
                                                       rho=0.9,
                                                       epsilon=1e-6)
            elif args.opt == 'ADAM':
                optimizer = tf.train.AdamOptimizer(learning_rate,
                                                   beta1=0.9,
                                                   beta2=0.999,
                                                   epsilon=0.1)
            elif args.opt == 'RMSPROP':
                optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                      decay=0.9,
                                                      momentum=0.9,
                                                      epsilon=1.0)
            elif args.opt == 'MOM':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       0.95,
                                                       use_nesterov=True)
            else:
                raise ValueError('Invalid optimization algorithm')
            train_op = optimizer.minimize(total_loss, global_step=global_step)

        # accuracy 计算准确度
        with tf.name_scope("total_accuracy"):
            prob = tf.nn.softmax(Cosin_logits)
            one_hot_label = tf.one_hot(labels_batch, classnum)
            correct_pred = tf.equal(tf.argmax(prob, 1),
                                    tf.argmax(one_hot_label, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # Tensorboard
        tf.summary.scalar('loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('lr', learning_rate)

        saver = tf.train.Saver(max_to_keep=200)
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(log_dir)
        """开启session"""
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer(),
                     feed_dict={phase_train_placeholder: True})
            sess.run(tf.local_variables_initializer(),
                     feed_dict={phase_train_placeholder: True})
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(coord=coord, sess=sess)

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))
            if args.param_num:
                count_params()
            summary_writer.add_graph(sess.graph)  # 把图的数据写入

            # Training
            epoch = 0
            while epoch < args.max_nrof_epochs:
                batch_number = 0  # 一代内的步数
                while batch_number < args.epoch_size:
                    """选择训练样本"""
                    thestep = sess.run(global_step)
                    image_paths, num_per_class, batch_truelabel = sample_people(
                        train_set, args.people_per_batch,
                        args.images_per_person, log_dir, thestep)

                    nrof_examples = args.people_per_batch * args.images_per_person
                    image_paths_array = np.reshape(
                        np.expand_dims(np.array(image_paths), 1), (-1, 1))
                    sess.run(
                        enqueue_op, {
                            image_paths_placeholder: image_paths_array,
                            labels_placeholder: batch_truelabel
                        })
                    nrof_batches = int(np.ceil(nrof_examples /
                                               args.batch_size))
                    """计算特征向量"""
                    for i in range(nrof_batches):
                        batch_size = min(nrof_examples - i * args.batch_size,
                                         args.batch_size)
                        emb, lab, err, _, ministep, acc, s, lrr = sess.run(
                            [
                                embeddings, labels_batch, total_loss, train_op,
                                global_step, accuracy, summary_op,
                                learning_rate
                            ],
                            feed_dict={
                                batch_size_placeholder: batch_size,
                                phase_train_placeholder: True
                            })
                        if (ministep + 1) % 100 == 0:
                            timenow = str(time.strftime('%Y-%m-%d %H:%M:%S'))
                            print(timenow, "step:", ministep + 1, "lr:", lrr,
                                  "Loss:", err, "TrainAcc:", acc)
                            summary_writer.add_summary(s, ministep)

                        # save model
                        checkpoint_name = os.path.join(
                            model_dir,
                            'models-step' + str(ministep + 1) + '.ckpt')
                        if (ministep + 1) % 1000 == 0:
                            print("Saving checkpoint of model:",
                                  checkpoint_name)
                            saver.save(sess, checkpoint_name)
                    batch_number += 1
                epoch += 1
    return
def parallel_read(data_sources,
                  reader_class,
                  num_epochs=None,
                  num_readers=4,
                  reader_kwargs=None,
                  shuffle=True,
                  dtypes=None,
                  capacity=256,
                  min_after_dequeue=128,
                  seed=None,
                  scope=None):
  """Reads multiple records in parallel from data_sources using n readers.

  It uses a ParallelReader to read from multiple files in parallel using
  multiple readers created using `reader_class` with `reader_kwargs'.

  If shuffle is True the common_queue would be a RandomShuffleQueue otherwise
  it would be a FIFOQueue.

  Usage:
      data_sources = ['path_to/train*']
      key, value = parallel_read(data_sources, tf.CSVReader, num_readers=4)

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*
    reader_class: one of the io_ops.ReaderBase subclasses ex: TFRecordReader
    num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
    num_readers: a integer, number of Readers to create.
    reader_kwargs: an optional dict, of kwargs for the reader.
    shuffle: boolean, wether should shuffle the files and the records by using
      RandomShuffleQueue as common_queue.
    dtypes:  A list of types.  The length of dtypes must equal the number
        of elements in each record. If it is None it will default to
        [tf.string, tf.string] for (key, value).
    capacity: integer, capacity of the common_queue.
    min_after_dequeue: integer, minimum number of records in the common_queue
      after dequeue. Needed for a good shuffle.
    seed: A seed for RandomShuffleQueue.
    scope: Optional name scope for the ops.

  Returns:
    key, value: a tuple of keys and values from the data_source.
  """
  data_files = get_data_files(data_sources)
  with ops.name_scope(scope, 'parallel_read'):
    filename_queue = tf_input.string_input_producer(
        data_files, num_epochs=num_epochs, shuffle=shuffle, seed=seed,
        name='filenames')
    dtypes = dtypes or [tf_dtypes.string, tf_dtypes.string]
    if shuffle:
      common_queue = data_flow_ops.RandomShuffleQueue(
          capacity=capacity,
          min_after_dequeue=min_after_dequeue,
          dtypes=dtypes,
          seed=seed,
          name='common_queue')
    else:
      common_queue = data_flow_ops.FIFOQueue(
          capacity=capacity, dtypes=dtypes, name='common_queue')

    summary.scalar('fraction_of_%d_full' % capacity,
                   math_ops.to_float(common_queue.size()) * (1. / capacity))

    return ParallelReader(
        reader_class,
        common_queue,
        num_readers=num_readers,
        reader_kwargs=reader_kwargs).read(filename_queue)
示例#18
0
def main(args):

    network = importlib.import_module(args.model_def)
    image_size = (args.image_size, args.image_size)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir),
                           subdir)  #日志保存地址
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    # os.path.realpath(__file__)代表返回当前模块真实路径
    # os.path.split(os.path.realpath(__file__))代表返回路径的目录和文件名(元组形式)
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    # dataset是列表,列表元素为每一个类的ImageClass对象,定义见facenet.py文件
    dataset = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        dataset = filter_dataset(dataset,
                                 os.path.expanduser(args.filter_filename),
                                 args.filter_percentile,
                                 args.filter_min_nrof_images_per_class)

    # 划分训练集和测试集,train_set和val_set的形式和dataset一样
    if args.validation_set_split_ratio > 0.0:
        train_set, val_set = facenet.split_dataset(
            dataset, args.validation_set_split_ratio,
            args.min_nrof_val_images_per_class, 'SPLIT_IMAGES')
    else:
        train_set, val_set = dataset, []

    nrof_classes = len(train_set)  #类目数(即有多少人)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        # pairs也是列表,子元素也是列表,每个子列表包含pairs.txt的每一行
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        #lfw_paths为两两比较的列表,列表的元素为有2个元素的元祖,actual_issame为列表,表示每个元素是否为同一个人
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        # global_step对应的是全局批的个数,根据这个参数可以更新学习率
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The training set should not be empty'

        val_image_list, val_label_list = facenet.get_image_paths_and_labels(
            val_set)

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        # range_size的大小和样本个数一样
        range_size = array_ops.shape(labels)[0]
        #QueueRunner:保存的是队列中的入列操作,保存在一个list当中,其中每个enqueue运行在一个线程当中
        #range_input_producer:返回的是一个队列,队列中有打乱的整数,范围是从0到range size
        #range_size大小和总的样本个数一样
        #并将一个QueueRunner添加到当前图的QUEUE_RUNNER集合中
        index_queue = tf.train.range_input_producer(
            range_size, num_epochs=None, shuffle=True, seed=None,
            capacity=32)  #capacity代表队列容量

        # 返回的是一个出列操作,每次出列一个epoch需要用到的样本个数
        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name='labels')
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name='control')

        # 上面的队列是一个样本索引的出队队列,只用来出列
        # 用来每次出列一个epoch中用到的样本
        # 这里是第二个队列,这个队列用来入列,每个元素的大小为shape=[(1,),(1,),(1,)]
        nrof_preprocess_threads = 4
        # 这里的shape代表每一个元素的维度
        input_queue = data_flow_ops.FIFOQueue(
            capacity=600000,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None)

        # 这时一个入列的操作,这个操作将在session run的时候用到
        # 每次入列的是image_paths_placeholder, labels_placeholder对
        # 注意,这里只有2个队列,一个用来出列打乱的元素序号
        # 一个根据对应的需要读取指定的文件
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name='enqueue_op')
        image_batch, label_batch = facenet.create_input_pipeline(
            input_queue, image_size, nrof_preprocess_threads,
            batch_size_placeholder)

        # image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))
        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        # arcface_logits = arcface_logits_compute(embeddings, label_batch, args, nrof_classes)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)
        with tf.variable_scope('Logits'):
            arcface_logits = arcface_logits_compute(embeddings, label_batch,
                                                    args, nrof_classes)
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label_batch,
                logits=arcface_logits,
                name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        correct_prediction = tf.cast(
            tf.equal(tf.argmax(arcface_logits, 1),
                     tf.cast(label_batch, tf.int64)), tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # for weights in slim.get_variables_by_name('embedding_weights'):
        #    weight_regularization = tf.contrib.layers.l2_regularizer(args.weight_decay)(weights)
        #    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, weight_regularization)

        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)

        if args.weight_decay == 0:
            total_loss = tf.add_n([cross_entropy_mean], name='total_loss')
        else:
            total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                                  name='total_loss')

        #define two saver in case under 'finetuning on different dataset' situation
        saver_save = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables(), args.log_histograms)
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():
            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                facenet.load_model(pretrained_model)

            print('Running arcface training')

            best_accuracy = 0.0
            for epoch in range(1, args.max_nrof_epochs + 1):
                step = sess.run(global_step, feed_dict=None)
                cont = train(
                    args, sess, epoch, image_list, label_list,
                    index_dequeue_op, enqueue_op, image_paths_placeholder,
                    labels_placeholder, learning_rate_placeholder,
                    phase_train_placeholder, batch_size_placeholder,
                    global_step, total_loss, accuracy, train_op, summary_op,
                    summary_writer, regularization_losses,
                    args.learning_rate_schedule_file, args.random_rotate,
                    args.random_crop, args.random_flip,
                    args.use_fixed_image_standardization, control_placeholder)

                if not cont:
                    break
                print('validation running...')
                if args.lfw_dir:
                    best_accuracy = evaluate(
                        sess, enqueue_op, image_paths_placeholder,
                        labels_placeholder, phase_train_placeholder,
                        batch_size_placeholder, control_placeholder,
                        embeddings, label_batch, lfw_paths, actual_issame,
                        args.lfw_batch_size, args.lfw_nrof_folds, log_dir,
                        step, summary_writer, best_accuracy, saver_save,
                        model_dir, subdir, args.lfw_subtract_mean,
                        args.lfw_use_flipped_images,
                        args.use_fixed_image_standardization,
                        args.lfw_distance_metric)
    return model_dir
示例#19
0
def _custom_shuffle_batch(tensors,
                          batch_size,
                          capacity,
                          min_after_dequeue,
                          keep_input,
                          num_threads=1,
                          seed=None,
                          enqueue_many=False,
                          shapes=None,
                          allow_smaller_final_batch=False,
                          shared_name=None,
                          name=None,
                          shuffle=False):
    """Helper function for `shuffle_batch` and `maybe_shuffle_batch`."""

    if context.executing_eagerly():
        raise ValueError(
            "Input pipelines based on Queues are not supported when eager execution"
            " is enabled. Please use tf.data to ingest data into your model"
            " instead.")
    tensor_list = tf_input._as_tensor_list(tensors)
    with ops.name_scope(name, "shuffle_batch",
                        list(tensor_list) + [keep_input]) as name:
        if capacity <= min_after_dequeue:
            raise ValueError(
                "capacity %d must be bigger than min_after_dequeue %d." %
                (capacity, min_after_dequeue))
        tensor_list = tf_input._validate(tensor_list)
        keep_input = tf_input._validate_keep_input(keep_input, enqueue_many)
        tensor_list, sparse_info = tf_input._store_sparse_tensors(
            tensor_list, enqueue_many, keep_input)
        types = tf_input._dtypes([tensor_list])
        shapes = tf_input._shapes([tensor_list], shapes, enqueue_many)

        ###########################################################################################
        if shuffle:
            queue = data_flow_ops.RandomShuffleQueue(
                capacity=capacity,
                min_after_dequeue=min_after_dequeue,
                seed=seed,
                dtypes=types,
                shapes=shapes,
                shared_name=shared_name)
        else:
            # Remove shuffle property
            queue = data_flow_ops.FIFOQueue(capacity=capacity,
                                            dtypes=types,
                                            shapes=shapes,
                                            shared_name=shared_name)
        ###########################################################################################

        tf_input._enqueue(queue, tensor_list, num_threads, enqueue_many,
                          keep_input)
        full = (math_ops.to_float(
            math_ops.maximum(0,
                             queue.size() - min_after_dequeue)) *
                (1. / (capacity - min_after_dequeue)))

        summary_name = ("fraction_over_%d_of_%d_full" %
                        (min_after_dequeue, capacity - min_after_dequeue))
        summary.scalar(summary_name, full)

        if allow_smaller_final_batch:
            dequeued = queue.dequeue_up_to(batch_size, name=name)
        else:
            dequeued = queue.dequeue_many(batch_size, name=name)

        dequeued = tf_input._restore_sparse_tensors(dequeued, sparse_info)

        return tf_input._as_original_type(tensors, dequeued)
示例#20
0
def main(args):
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    train_set = facenet.get_dataset(args.data_dir)
    logging.info('Model directory: %s' % model_dir)
    logging.info('Log directory: %s' % log_dir)
    logging.info('Train set size:%d' % len(train_set))

    # Build graph
    logging.info('building graph...')
    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)
        ae = autoencoder()
        optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize(
            ae['cost'])

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        datasets = get_dataset(args.data_dir)

        batch_size = args.batch_size
        n_epochs = args.epoch_size

        batch_xs = []
        batch_num = 0

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=100)

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string],
                                              shapes=[(1, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many([image_paths_placeholder])

        nrof_preprocess_threads = 4
        images = []
        for _ in range(nrof_preprocess_threads):
            filenames = input_queue.dequeue()
            images_ = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_jpeg(file_contents)
                image.set_shape((args.image_size, args.image_size, 3))
                images_.append(tf.image.per_image_standardization(image))
            images.append([images_])

        image_batch = tf.train.batch_join(
            images,
            batch_size=args.batch_size,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)

        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')

        sess.run(enqueue_op, {image_paths_placeholder: datasets})

        logging.info('training ...')
        logging.info('dataset len:%d' % len(datasets))
        for epoch_i in range(n_epochs):
            for idx in range(len(datasets)):
                filename = datasets[idx]
                file_contents = tf.read_file(filename)
                image = tf.image.decode_jpeg(file_contents)
                # image = tf.subtract(image, 0.5)
                # image = tf.multiply(image, 2.0)
                image = tf.cast(image, tf.float32)
                batch_xs.append(sess.run(image))
                if (idx + 1) % batch_size == 0:

                    train = np.array([img for img in batch_xs])
                    if batch_num % 10 == 0:
                        _ae, _ = sess.run([ae, optimizer],
                                          feed_dict={ae['x']: train})
                        x_ = _ae['x']
                        z_ = _ae['z']
                        y_ = _ae['y']
                        logging.info('type x_ : %s' % type(z_))
                        logging.info('%s, %s, %s' %
                                     (x_.shape, z_.shape, y_.shape))
                        cost_ = _ae['cost']
                        logging.info('epoch:%d, loss:%s' %
                                     (epoch_i, str(cost_)))
                    else:
                        sess.run(optimizer, feed_dict={ae['x']: train})

                    batch_num += 1
                    batch_xs[:] = []
                    logging.info('batch num:%d' % batch_num)

                # if batch_num % 100 == 0:
                #     pass

            # save model
            logging.info('Saving variables')
            start_time = time.time()
            checkpoint_path = os.path.join(model_dir, 'model-%s.ckpt' % subdir)
            saver.save(sess,
                       checkpoint_path,
                       global_step=batch_num,
                       write_meta_graph=False)
            save_time_variables = time.time() - start_time
            logging.info('Variables saved in %.2f seconds' %
                         save_time_variables)
            metagraph_filename = os.path.join(model_dir,
                                              'model-%s.meta' % subdir)
            save_time_metagraph = 0
            if not os.path.exists(metagraph_filename):
                logging.info('Saving metagraph')
                start_time = time.time()
                saver.export_meta_graph(metagraph_filename)
                save_time_metagraph = time.time() - start_time
                logging.info('Metagraph saved in %.2f seconds' %
                             save_time_metagraph)
示例#21
0
def main(args):

    with tf.Graph().as_default():

        with tf.Session() as sess:

            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))

            # Get the paths for the corresponding images
            paths, actual_issame = lfw.get_paths(
                os.path.expanduser(args.lfw_dir), pairs)

            image_paths_placeholder = tf.placeholder(tf.string,
                                                     shape=(None, 1),
                                                     name='image_paths')
            labels_placeholder = tf.placeholder(tf.int32,
                                                shape=(None, 1),
                                                name='labels')
            batch_size_placeholder = tf.placeholder(tf.int32,
                                                    name='batch_size')
            control_placeholder = tf.placeholder(tf.int32,
                                                 shape=(None, 1),
                                                 name='control')
            phase_train_placeholder = tf.placeholder(tf.bool,
                                                     name='phase_train')

            nrof_preprocess_threads = 4
            image_size = (args.image_size, args.image_size)
            eval_input_queue = data_flow_ops.FIFOQueue(
                capacity=2000000,
                dtypes=[tf.string, tf.int32, tf.int32],
                shapes=[(1, ), (1, ), (1, )],
                shared_name=None,
                name=None)
            eval_enqueue_op = eval_input_queue.enqueue_many(
                [
                    image_paths_placeholder, labels_placeholder,
                    control_placeholder
                ],
                name='eval_enqueue_op')
            image_batch, label_batch = facenet.create_input_pipeline(
                eval_input_queue, image_size, nrof_preprocess_threads,
                batch_size_placeholder)

            # Load the model
            input_map = {
                'image_batch': image_batch,
                'label_batch': label_batch,
                'phase_train': phase_train_placeholder
            }
            facenet.load_model(args.model, input_map=input_map)

            # Get output tensor
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            #
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(coord=coord, sess=sess)

            evaluate(sess, eval_enqueue_op, image_paths_placeholder,
                     labels_placeholder, phase_train_placeholder,
                     batch_size_placeholder, control_placeholder, embeddings,
                     label_batch, paths, actual_issame, args.lfw_batch_size,
                     args.lfw_nrof_folds, args.distance_metric,
                     args.subtract_mean, args.use_flipped_images,
                     args.use_fixed_image_standardization)
示例#22
0
def input_producer(input_tensor,
                   element_shape=None,
                   num_epochs=None,
                   shuffle=True,
                   seed=None,
                   capacity=32,
                   shared_name=None,
                   summary_name=None,
                   name=None,
                   cancel_op=None):
    """Output the rows of `input_tensor` to a queue for an input pipeline.

  Note: if `num_epochs` is not `None`, this function creates local counter
  `epochs`. Use `local_variable_initializer()` to initialize local variables.

  Args:
    input_tensor: A tensor with the rows to produce. Must be at least
      one-dimensional. Must either have a fully-defined shape, or
      `element_shape` must be defined.
    element_shape: (Optional.) A `TensorShape` representing the shape of a
      row of `input_tensor`, if it cannot be inferred.
    num_epochs: (Optional.) An integer. If specified `input_producer` produces
      each row of `input_tensor` `num_epochs` times before generating an
      `OutOfRange` error. If not specified, `input_producer` can cycle through
      the rows of `input_tensor` an unlimited number of times.
    shuffle: (Optional.) A boolean. If true, the rows are randomly shuffled
      within each epoch.
    seed: (Optional.) An integer. The seed to use if `shuffle` is true.
    capacity: (Optional.) The capacity of the queue to be used for buffering
      the input.
    shared_name: (Optional.) If set, this queue will be shared under the given
      name across multiple sessions.
    summary_name: (Optional.) If set, a scalar summary for the current queue
      size will be generated, using this name as part of the tag.
    name: (Optional.) A name for queue.
    cancel_op: (Optional.) Cancel op for the queue

  Returns:
    A queue with the output rows.  A `QueueRunner` for the queue is
    added to the current `QUEUE_RUNNER` collection of the current
    graph.

  Raises:
    ValueError: If the shape of the input cannot be inferred from the arguments.
  """
    with ops.name_scope(name, "input_producer", [input_tensor]):
        input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
        element_shape = input_tensor.get_shape()[1:].merge_with(element_shape)
        if not element_shape.is_fully_defined():
            raise ValueError(
                "Either `input_tensor` must have a fully defined shape "
                "or `element_shape` must be specified")

        if shuffle:
            input_tensor = random_ops.random_shuffle(input_tensor, seed=seed)

        input_tensor = limit_epochs(input_tensor, num_epochs)

        q = data_flow_ops.FIFOQueue(capacity=capacity,
                                    dtypes=[input_tensor.dtype.base_dtype],
                                    shapes=[element_shape],
                                    shared_name=shared_name,
                                    name=name)
        enq = q.enqueue_many([input_tensor])
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(q, [enq], cancel_op=cancel_op))
        if summary_name is not None:
            summary.scalar(
                "queue/%s/%s" % (q.name, summary_name),
                math_ops.cast(q.size(), dtypes.float32) * (1. / capacity))
        return q
示例#23
0
def main(args):

    network = importlib.import_module(args.model_def)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 3),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 3),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(3, ), (3, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)

                if args.random_crop:
                    image = tf.random_crop(
                        image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(
                        image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss = facenet.triplet_loss(anchor, positive, negative,
                                            args.alpha)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables())

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, train_set, epoch, image_paths_placeholder,
                      labels_placeholder, labels_batch, batch_size_placeholder,
                      learning_rate_placeholder, phase_train_placeholder,
                      enqueue_op, input_queue, global_step, embeddings,
                      total_loss, train_op, summary_op, summary_writer,
                      args.learning_rate_schedule_file, args.embedding_size,
                      anchor, positive, negative, triplet_loss)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, lfw_paths, embeddings, labels_batch,
                             image_paths_placeholder, labels_placeholder,
                             batch_size_placeholder, learning_rate_placeholder,
                             phase_train_placeholder, enqueue_op,
                             actual_issame, args.batch_size,
                             args.lfw_nrof_folds, log_dir, step,
                             summary_writer, args.embedding_size)

    return model_dir
示例#24
0
def read_keyed_batch_features(file_pattern,
                              batch_size,
                              features,
                              reader,
                              randomize_input=True,
                              num_epochs=None,
                              queue_capacity=10000,
                              reader_num_threads=1,
                              feature_queue_capacity=100,
                              num_queue_runners=2,
                              parser_num_threads=None,
                              name=None):
    """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Number of queue runners to start for the feature queue,
      Adding multiple queue runners for the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    parser_num_threads: (Deprecated) The number of threads to parse examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.
    If `keep_keys` is `True`, returns tuple of string `Tensor` and above dict.

  Raises:
    ValueError: for invalid inputs.
  """

    if parser_num_threads:
        # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016.
        logging.warning(
            'parser_num_threads is deprecated, it will be removed on'
            'Sept 3 2016')
    with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
        keys, examples = read_keyed_batch_examples(
            file_pattern,
            batch_size,
            reader,
            randomize_input=randomize_input,
            num_epochs=num_epochs,
            queue_capacity=queue_capacity,
            num_threads=reader_num_threads,
            read_batch_size=batch_size,
            name=scope)

        # Parse the example.
        feature_map = parsing_ops.parse_example(examples, features)

        # Lets also add preprocessed tensors into the queue types for each item of
        # the queue.
        tensors_to_enqueue = []
        # Each entry contains the key, and a boolean which indicates whether the
        # tensor was a sparse tensor.
        tensors_mapping = []
        # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
        # tensors into a queue. This could be taken care in somewhere else so others
        # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
        # directly.
        for key in sorted(feature_map.keys()):
            tensor = feature_map[key]
            if isinstance(tensor, ops.SparseTensor):
                tensors_mapping.append((key, True))
                tensors_to_enqueue.extend(
                    [tensor.indices, tensor.values, tensor.shape])
            else:
                tensors_mapping.append((key, False))
                tensors_to_enqueue.append(tensor)
        tensors_to_enqueue.append(keys)

        queue_dtypes = [x.dtype for x in tensors_to_enqueue]
        input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity,
                                              queue_dtypes)

        # Add a summary op to debug if our feature queue is full or not.
        logging_ops.scalar_summary(
            'queue/parsed_features/%s/fraction_of_%d_full' %
            (input_queue.name, feature_queue_capacity),
            math_ops.cast(input_queue.size(), dtypes.float32) *
            (1. / feature_queue_capacity))

        # Add multiple queue runners so that the queue is always full. Adding more
        # than two queue-runners may hog the cpu on the worker to fill up the queue.
        for _ in range(num_queue_runners):
            queue_runner.add_queue_runner(
                queue_runner.QueueRunner(
                    input_queue, [input_queue.enqueue(tensors_to_enqueue)]))

        dequeued_tensors = input_queue.dequeue()

        # Reset shapes on dequeued tensors.
        for i in range(len(tensors_to_enqueue)):
            dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

        # Recreate feature mapping according to the original dictionary.
        dequeued_feature_map = {}
        index = 0
        for key, is_sparse_tensor in tensors_mapping:
            if is_sparse_tensor:
                # Three tensors are (indices, values, shape).
                dequeued_feature_map[key] = ops.SparseTensor(
                    dequeued_tensors[index], dequeued_tensors[index + 1],
                    dequeued_tensors[index + 2])
                index += 3
            else:
                dequeued_feature_map[key] = dequeued_tensors[index]
                index += 1
        dequeued_keys = dequeued_tensors[-1]

        return dequeued_keys, dequeued_feature_map
示例#25
0
def main(args):

    network = importlib.import_module(args.model_def)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    train_set = facenet.get_dataset(args.data_dir)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    if args.pretrained_model:
        print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model))

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.compat.v1.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.compat.v1.placeholder(tf.float32, name='learning_rate')

        batch_size_placeholder = tf.compat.v1.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.compat.v1.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.compat.v1.placeholder(tf.string, shape=(None, 3), name='image_paths')
        image_placeholder = tf.compat.v1.placeholder(tf.int8, shape=(None, 3, args.image_size, args.image_size, 3), name='image_in')
        labels_placeholder = tf.compat.v1.placeholder(tf.int64, shape=(None, 3), name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.int8, tf.int64],
                                              shapes=[(3, args.image_size, args.image_size, 3), (3,)],
                                              shared_name=None, name=None)
        enqueue_op = input_queue.enqueue_many([
            image_placeholder,
            # image_paths_placeholder,
            labels_placeholder
        ])

        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                """file_contents = tf.io.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)

                if args.random_crop:
                    image = tf.image.random_crop(image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))"""
                images.append(tf.image.per_image_standardization(filename))
            images_and_labels.append([
                images,
                label
            ])

        image_batch, labels_batch = tf.compat.v1.train.batch_join(
            images_and_labels, batch_size=batch_size_placeholder,
            shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Build the inference graph
        prelogits, _ = network.inference(image_batch, args.keep_probability,
                                         phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size,
                                         weight_decay=args.weight_decay)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1)
        triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha)

        learning_rate = tf.compat.v1.train.exponential_decay(learning_rate_placeholder, global_step,
                                                             args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True)
        tf.compat.v1.summary.scalar('learning_rate', learning_rate)

        # Calculate the total losses
        regularization_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op, opt = facenet.train(total_loss, global_step, args.optimizer,
                                      learning_rate, args.moving_average_decay, tf.compat.v1.global_variables())

        # Create a saver
        saver = tf.compat.v1.train.Saver(tf.compat.v1.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.compat.v1.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.compat.v1.global_variables_initializer(), feed_dict={phase_train_placeholder: True})
        sess.run(tf.compat.v1.local_variables_initializer(), feed_dict={phase_train_placeholder: True})

        summary_writer = tf.compat.v1.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.compat.v1.train.start_queue_runners(coord=coord, sess=sess)

        train_dataset_path = os.path.join(args.data_dir, 'train')
        test_dataset_path = os.path.join(args.data_dir, 'test')

        train_dataset = AWEDataset(train_dataset_path)
        test_dataset = AWEDataset(test_dataset_path)

        with sess.as_default():

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                saver.restore(sess, os.path.expanduser(args.pretrained_model))

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, train_dataset, epoch,
                      image_placeholder,
                      # image_paths_placeholder,
                      labels_placeholder, labels_batch,
                      batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step,
                      embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file,
                      args.embedding_size, anchor, positive, negative, triplet_loss)

                # Save variables and the metagraph if it doesn't exist already
                if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
                    os.makedirs(model_dir)
                save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

                imgs = test_dataset.images
                img = []
                c_same = 0
                c_diff = 0
                for i in range(len(imgs)):
                    im2 = imgs[i]
                    if len(im2) > 1:
                        random.shuffle(im2)
                        img.append([
                            im2[0]['src'],
                            im2[1]['src'],
                            True
                        ])
                        c_same += 1
                while c_diff < (c_same * 5) or (c_diff + c_same) % 3 != 0:
                    a = random.randint(0, len(imgs) - 1)
                    b = random.randint(0, len(imgs) - 1)
                    if a != b:
                        img.append([
                            imgs[a][-1]['src'],
                            imgs[b][-1]['src'],
                            False
                        ])
                        c_diff += 1
                random.shuffle(img)
                is_same = [x[2] for x in img]
                img = [y for x in img for y in x[0:-1]]

                evaluate(sess, img, embeddings, labels_batch,
                         image_placeholder,
                         # image_paths_placeholder,
                         labels_placeholder,
                         batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, is_same, args.batch_size,
                         args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size, args)

    return model_dir
示例#26
0
def _enqueue_data(data,
                  capacity,
                  shuffle=False,
                  min_after_dequeue=None,
                  num_threads=1,
                  seed=None,
                  name="enqueue_input",
                  enqueue_size=1,
                  num_epochs=None,
                  pad_value=None):
    """Creates a queue filled from a numpy array or pandas `DataFrame`.

    Returns a queue filled with the rows of the given (`OrderedDict` of) array
    or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued
    `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of)
    numpy arrays, the first enqueued `Tensor` contains the row number.

  Args:
    data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator
       yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
       into the queue.
    capacity: the capacity of the queue.
    shuffle: whether or not to shuffle the rows of the array.
    min_after_dequeue: minimum number of elements that can remain in the queue
    after a dequeue operation. Only used when `shuffle` is true. If not set,
    defaults to `capacity` / 4.
    num_threads: number of threads used for reading and enqueueing.
    seed: used to seed shuffling and reader starting points.
    name: a scope name identifying the data.
    enqueue_size: the number of rows to enqueue per step.
    num_epochs: limit enqueuing to a specified number of epochs, if provided.
    pad_value: default value for dynamic padding of data samples, if provided.

  Returns:
    A queue filled with the rows of the given (`OrderedDict` of) array or
      `DataFrame`.

  Raises:
    TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy
      arrays, a numpy `ndarray`, or a generator producing these.
    NotImplementedError: padding and shuffling data at the same time.
    NotImplementedError: padding usage with non generator data type.
  """
    with ops.name_scope(name):
        if isinstance(data, np.ndarray):
            types = [dtypes.int64, dtypes.as_dtype(data.dtype)]
            queue_shapes = [(), data.shape[1:]]
            get_feed_fn = _ArrayFeedFn
        elif isinstance(data, collections.OrderedDict):
            types = [dtypes.int64
                     ] + [dtypes.as_dtype(col.dtype) for col in data.values()]
            queue_shapes = [()] + [col.shape[1:] for col in data.values()]
            get_feed_fn = _OrderedDictNumpyFeedFn
        elif isinstance(data, tp.FunctionType):
            x_first_el = six.next(data())
            x_first_keys = sorted(x_first_el.keys())
            x_first_values = [x_first_el[key] for key in x_first_keys]
            types = [dtypes.as_dtype(col.dtype) for col in x_first_values]
            queue_shapes = [col.shape for col in x_first_values]
            get_feed_fn = _GeneratorFeedFn
        elif HAS_PANDAS and isinstance(data, pd.DataFrame):
            types = [
                dtypes.as_dtype(dt)
                for dt in [data.index.dtype] + list(data.dtypes)
            ]
            queue_shapes = [() for _ in types]
            get_feed_fn = _PandasFeedFn
        else:
            raise TypeError(
                "data must be either a numpy array or pandas DataFrame if pandas is "
                "installed; got {}".format(type(data).__name__))

        pad_data = pad_value is not None
        if pad_data and get_feed_fn is not _GeneratorFeedFn:
            raise NotImplementedError(
                "padding is only available with generator usage")
        if shuffle and pad_data:
            raise NotImplementedError(
                "padding and shuffling data at the same time is not implemented"
            )

        # TODO(jamieas): TensorBoard warnings for all warnings below once available.

        if num_threads > 1 and num_epochs is not None:
            logging.warning(
                "enqueue_data was called with num_epochs and num_threads > 1. "
                "num_epochs is applied per thread, so this will produce more "
                "epochs than you probably intend. "
                "If you want to limit epochs, use one thread.")

        if shuffle and num_threads > 1 and num_epochs is not None:
            logging.warning(
                "enqueue_data was called with shuffle=True, num_threads > 1, and "
                "num_epochs. This will create multiple threads, all reading the "
                "array/dataframe in order adding to the same shuffling queue; the "
                "results will likely not be sufficiently shuffled.")

        if not shuffle and num_threads > 1:
            logging.warning(
                "enqueue_data was called with shuffle=False and num_threads > 1. "
                "This will create multiple threads, all reading the "
                "array/dataframe in order. If you want examples read in order, use"
                " one thread; if you want multiple threads, enable shuffling.")

        if shuffle:
            min_after_dequeue = int(
                capacity /
                4 if min_after_dequeue is None else min_after_dequeue)
            queue = data_flow_ops.RandomShuffleQueue(capacity,
                                                     min_after_dequeue,
                                                     dtypes=types,
                                                     shapes=queue_shapes,
                                                     seed=seed)
        elif pad_data:
            min_after_dequeue = 0  # just for the summary text
            queue_shapes = list(
                map(
                    lambda x: tuple(list(x[:-1]) + [None])
                    if len(x) > 0 else x, queue_shapes))
            queue = data_flow_ops.PaddingFIFOQueue(capacity,
                                                   dtypes=types,
                                                   shapes=queue_shapes)
        else:
            min_after_dequeue = 0  # just for the summary text
            queue = data_flow_ops.FIFOQueue(capacity,
                                            dtypes=types,
                                            shapes=queue_shapes)

        enqueue_ops = []
        feed_fns = []

        for i in range(num_threads):
            # Note the placeholders have no shapes, so they will accept any
            # enqueue_size.  enqueue_many below will break them up.
            placeholders = [array_ops.placeholder(t) for t in types]

            enqueue_ops.append(queue.enqueue_many(placeholders))
            seed_i = None if seed is None else (i + 1) * seed

            if not pad_data:
                feed_fns.append(
                    get_feed_fn(placeholders,
                                data,
                                enqueue_size,
                                random_start=shuffle,
                                seed=seed_i,
                                num_epochs=num_epochs))
            else:
                feed_fns.append(
                    get_feed_fn(placeholders,
                                data,
                                enqueue_size,
                                random_start=shuffle,
                                seed=seed_i,
                                num_epochs=num_epochs,
                                pad_value=pad_value))

        runner = fqr._FeedingQueueRunner(  # pylint: disable=protected-access
            queue=queue,
            enqueue_ops=enqueue_ops,
            feed_fns=feed_fns)
        queue_runner.add_queue_runner(runner)

        full = (math_ops.cast(
            math_ops.maximum(0,
                             queue.size() - min_after_dequeue), dtypes.float32)
                * (1. / (capacity - min_after_dequeue)))
        # Note that name contains a '/' at the end so we intentionally do not place
        # a '/' after %s below.
        summary_name = (
            "queue/%sfraction_over_%d_of_%d_full" %
            (queue.name, min_after_dequeue, capacity - min_after_dequeue))
        summary.scalar(summary_name, full)
        return queue
示例#27
0
def queue_parsed_features(parsed_features,
                          keys=None,
                          feature_queue_capacity=100,
                          num_enqueue_threads=2,
                          name=None):
    """Speeds up parsing by using queues to do it asynchronously.

  This function adds the tensors in `parsed_features` to a queue, which allows
  the parsing (or any other expensive op before this) to be asynchronous wrt the
  rest of the training graph. This greatly improves read latency and speeds up
  training since the data will already be parsed and ready when each step of
  training needs it.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
    keys: `Tensor` of string keys.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` corresponding to `keys` if provided, otherwise `None`.
    -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
       to `parsed_features`.
  Raises:
    ValueError: for invalid inputs.
  """

    args = list(parsed_features.values())
    if keys is not None:
        args += [keys]

    with ops.name_scope(name, 'queue_parsed_features', args):
        # Lets also add preprocessed tensors into the queue types for each item of
        # the queue.
        tensors_to_enqueue = []
        # Each entry contains the key, and a boolean which indicates whether the
        # tensor was a sparse tensor.
        tensors_mapping = []
        # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
        # tensors into a queue. This could be taken care in somewhere else so others
        # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
        # directly.
        for key in sorted(parsed_features.keys()):
            tensor = parsed_features[key]
            if isinstance(tensor, sparse_tensor.SparseTensor):
                tensors_mapping.append((key, True))
                tensors_to_enqueue.extend(
                    [tensor.indices, tensor.values, tensor.dense_shape])
            else:
                tensors_mapping.append((key, False))
                tensors_to_enqueue.append(tensor)

        if keys is not None:
            tensors_to_enqueue.append(keys)

        queue_dtypes = [x.dtype for x in tensors_to_enqueue]
        input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity,
                                              queue_dtypes)

        # Add a summary op to debug if our feature queue is full or not.
        summary.scalar(
            'queue/parsed_features/%s/fraction_of_%d_full' %
            (input_queue.name, feature_queue_capacity),
            math_ops.cast(input_queue.size(), dtypes.float32) *
            (1. / feature_queue_capacity))

        # Use a single QueueRunner with multiple threads to enqueue so the queue is
        # always full. The threads are coordinated so the last batch will not be
        # lost.
        enqueue_ops = [
            input_queue.enqueue(tensors_to_enqueue)
            for _ in range(num_enqueue_threads)
        ]
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(
                input_queue,
                enqueue_ops,
                queue_closed_exception_types=(errors.OutOfRangeError,
                                              errors.CancelledError)))

        dequeued_tensors = input_queue.dequeue()
        if not isinstance(dequeued_tensors, list):
            # input_queue.dequeue() returns a single tensor instead of a list of
            # tensors if there is only one tensor to dequeue, which breaks the
            # assumption of a list below.
            dequeued_tensors = [dequeued_tensors]

        # Reset shapes on dequeued tensors.
        for i in range(len(tensors_to_enqueue)):
            dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

        # Recreate feature mapping according to the original dictionary.
        dequeued_parsed_features = {}
        index = 0
        for key, is_sparse_tensor in tensors_mapping:
            if is_sparse_tensor:
                # Three tensors are (indices, values, shape).
                dequeued_parsed_features[key] = sparse_tensor.SparseTensor(
                    dequeued_tensors[index], dequeued_tensors[index + 1],
                    dequeued_tensors[index + 2])
                index += 3
            else:
                dequeued_parsed_features[key] = dequeued_tensors[index]
                index += 1

        dequeued_keys = None
        if keys is not None:
            dequeued_keys = dequeued_tensors[-1]

        return dequeued_keys, dequeued_parsed_features
示例#28
0
  def __init__(self,
               opt,
               global_step,
               total_num_replicas=None,
               variable_averages=None,
               variables_to_average=None,
               use_locking=False,
               name="sync_replicas"):
    """Construct a sync_replicas optimizer.
    Args:
      opt: The actual optimizer that will be used to compute and apply the
        gradients. Must be one of the Optimizer classes.
      replicas_to_aggregate: number of replicas to aggregate for each variable
        update.
      total_num_replicas: Total number of tasks/workers/replicas, could be
        different from replicas_to_aggregate.
        If total_num_replicas > replicas_to_aggregate: it is backup_replicas +
        replicas_to_aggregate.
        If total_num_replicas < replicas_to_aggregate: Replicas compute
        multiple batches per update to variables.
      variable_averages: Optional `ExponentialMovingAverage` object, used to
        maintain moving averages for the variables passed in
        `variables_to_average`.
      variables_to_average: a list of variables that need to be averaged. Only
        needed if variable_averages is passed in.
      use_locking: If True use locks for update operation.
      name: string. Optional name of the returned operation.
    """
    if total_num_replicas is None:
      total_num_replicas = replicas_to_aggregate

    super(TimeoutReplicasOptimizer, self).__init__(use_locking, name)
    logging.info(
        "TimeoutReplicas: total_num_replicas=%s",
        total_num_replicas)
    self._n_updates = 0
    self._opt = opt
    self._gradients_applied = False
    self._variable_averages = variable_averages
    self._variables_to_average = variables_to_average
    self._total_num_replicas = total_num_replicas
    self._global_step = None

    # The synchronization op will be executed in a queue runner which should
    # only be executed by one of the replicas (usually the chief).
    self._chief_queue_runner = None

    # Remember which accumulator is on which device to set the initial step in
    # the accumulator to be global step. This list contains list of the
    # following format: (accumulator, device).
    self._accumulator_list = []
    self._constant_for_comparison = 1
    # For timeout, we have one token queue per worker. This makes it so that
    # a worker can not take the work of another worker if it finishes early.
    self._sync_token_queues = [0] * self._total_num_replicas
    for worker in range(self._total_num_replicas):
      with ops.device(global_step.device):
        self._sync_token_queues[worker] = data_flow_ops.FIFOQueue(-1,
                                                                  global_step.dtype.base_dtype,
                                                                  shapes=(),
                                                                  shared_name="sync_token_q_%d" % worker)
def main(args):

    network = importlib.import_module(args.model_def)
    image_size = (args.image_size, args.image_size)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    stat_file_name = os.path.join(log_dir, 'stat.h5')

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    dataset = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        dataset = filter_dataset(dataset,
                                 os.path.expanduser(args.filter_filename),
                                 args.filter_percentile,
                                 args.filter_min_nrof_images_per_class)

    if args.validation_set_split_ratio > 0.0:
        train_set, val_set = facenet.split_dataset(
            dataset, args.validation_set_split_ratio,
            args.min_nrof_val_images_per_class, 'SPLIT_IMAGES')
    else:
        train_set, val_set = dataset, []

    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The training set should not be empty'

        val_image_list, val_label_list = facenet.get_image_paths_and_labels(
            val_set)

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name='labels')
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name='control')

        nrof_preprocess_threads = 4
        input_queue = data_flow_ops.FIFOQueue(
            capacity=2000000,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name='enqueue_op')
        image_batch, label_batch = facenet.create_input_pipeline(
            input_queue, image_size, nrof_preprocess_threads,
            batch_size_placeholder)

        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Number of classes in training set: %d' % nrof_classes)
        print('Number of examples in training set: %d' % len(image_list))

        print('Number of classes in validation set: %d' % len(val_set))
        print('Number of examples in validation set: %d' % len(val_image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=slim.initializers.xavier_initializer(),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Norm for the prelogits
        eps = 1e-4
        prelogits_norm = tf.reduce_mean(
            tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p,
                    axis=1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_norm * args.prelogits_norm_loss_factor)

        # Add center loss
        prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch,
                                                       args.center_loss_alfa,
                                                       nrof_classes)
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        correct_prediction = tf.cast(
            tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)),
            tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables(), args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            nrof_steps = args.max_nrof_epochs * args.epoch_size
            # Validate every validate_every_n_epochs as well as in the last epoch
            nrof_val_samples = int(
                math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs))
            stat = {
                'loss':
                np.zeros((nrof_steps, ), np.float32),
                'center_loss':
                np.zeros((nrof_steps, ), np.float32),
                'reg_loss':
                np.zeros((nrof_steps, ), np.float32),
                'xent_loss':
                np.zeros((nrof_steps, ), np.float32),
                'prelogits_norm':
                np.zeros((nrof_steps, ), np.float32),
                'accuracy':
                np.zeros((nrof_steps, ), np.float32),
                'val_loss':
                np.zeros((nrof_val_samples, ), np.float32),
                'val_xent_loss':
                np.zeros((nrof_val_samples, ), np.float32),
                'val_accuracy':
                np.zeros((nrof_val_samples, ), np.float32),
                'lfw_accuracy':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'lfw_valrate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'learning_rate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_train':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_validate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_evaluate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'prelogits_hist':
                np.zeros((args.max_nrof_epochs, 1000), np.float32),
            }
            for epoch in range(1, args.max_nrof_epochs + 1):
                step = sess.run(global_step, feed_dict=None)
                # Train for one epoch
                t = time.time()
                cont = train(
                    args, sess, epoch, image_list, label_list,
                    index_dequeue_op, enqueue_op, image_paths_placeholder,
                    labels_placeholder, learning_rate_placeholder,
                    phase_train_placeholder, batch_size_placeholder,
                    control_placeholder, global_step, total_loss, train_op,
                    summary_op, summary_writer, regularization_losses,
                    args.learning_rate_schedule_file, stat, cross_entropy_mean,
                    accuracy, learning_rate, prelogits, prelogits_center_loss,
                    args.random_rotate, args.random_crop, args.random_flip,
                    prelogits_norm, args.prelogits_hist_max,
                    args.use_fixed_image_standardization)
                stat['time_train'][epoch - 1] = time.time() - t

                if not cont:
                    break

                t = time.time()
                if len(val_image_list) > 0 and (
                    (epoch - 1) % args.validate_every_n_epochs
                        == args.validate_every_n_epochs - 1
                        or epoch == args.max_nrof_epochs):
                    validate(args, sess, epoch, val_image_list, val_label_list,
                             enqueue_op, image_paths_placeholder,
                             labels_placeholder, control_placeholder,
                             phase_train_placeholder, batch_size_placeholder,
                             stat, total_loss, regularization_losses,
                             cross_entropy_mean, accuracy,
                             args.validate_every_n_epochs,
                             args.use_fixed_image_standardization)
                stat['time_validate'][epoch - 1] = time.time() - t

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, epoch)

                # Evaluate on LFW
                t = time.time()
                if args.lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder,
                             labels_placeholder, phase_train_placeholder,
                             batch_size_placeholder, control_placeholder,
                             embeddings, label_batch, lfw_paths, actual_issame,
                             args.lfw_batch_size, args.lfw_nrof_folds, log_dir,
                             step, summary_writer, stat, epoch,
                             args.lfw_distance_metric, args.lfw_subtract_mean,
                             args.lfw_use_flipped_images,
                             args.use_fixed_image_standardization)
                stat['time_evaluate'][epoch - 1] = time.time() - t

                print('Saving statistics')
                with h5py.File(stat_file_name, 'w') as f:
                    for key, value in stat.items():
                        f.create_dataset(key, data=value)

    return model_dir
示例#30
0
 def testFIFOSharedQueue(self):
     shared_queue = data_flow_ops.FIFOQueue(
         capacity=256, dtypes=[dtypes_lib.string, dtypes_lib.string])
     self._verify_all_data_sources_read(shared_queue)