Exemplo n.º 1
0
 def graph_fn(global_step):
     boundaries = [2, 3, 7]
     rates = [1.0, 2.0, 3.0, 4.0]
     learning_rate = learning_schedules.manual_stepping(
         global_step, boundaries, rates)
     assert learning_rate.op.name.endswith('learning_rate')
     return (learning_rate, )
Exemplo n.º 2
0
 def graph_fn(global_step):
     boundaries = [4, 6, 8]
     rates = [0.02, 0.10, 0.01, 0.001]
     learning_rate = learning_schedules.manual_stepping(global_step,
                                                        boundaries,
                                                        rates,
                                                        warmup=True)
     assert learning_rate.op.name.endswith('learning_rate')
     return (learning_rate, )
Exemplo n.º 3
0
def _create_learning_rate(learning_rate_config):
  """Create optimizer learning rate based on config.

  Args:
    learning_rate_config: A LearningRate proto message.

  Returns:
    A learning rate.

  Raises:
    ValueError: when using an unsupported input data type.
  """
  learning_rate = None
  learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
  if learning_rate_type == 'constant_learning_rate':
    config = learning_rate_config.constant_learning_rate
    learning_rate = tf.constant(config.learning_rate, dtype=tf.float32,
                                name='learning_rate')

  if learning_rate_type == 'exponential_decay_learning_rate':
    config = learning_rate_config.exponential_decay_learning_rate
    learning_rate = learning_schedules.exponential_decay_with_burnin(
        tf.train.get_or_create_global_step(),
        config.initial_learning_rate,
        config.decay_steps,
        config.decay_factor,
        burnin_learning_rate=config.burnin_learning_rate,
        burnin_steps=config.burnin_steps,
        min_learning_rate=config.min_learning_rate,
        staircase=config.staircase)

  if learning_rate_type == 'manual_step_learning_rate':
    config = learning_rate_config.manual_step_learning_rate
    if not config.schedule:
      raise ValueError('Empty learning rate schedule.')
    learning_rate_step_boundaries = [x.step for x in config.schedule]
    learning_rate_sequence = [config.initial_learning_rate]
    learning_rate_sequence += [x.learning_rate for x in config.schedule]
    learning_rate = learning_schedules.manual_stepping(
        tf.train.get_or_create_global_step(), learning_rate_step_boundaries,
        learning_rate_sequence, config.warmup)

  if learning_rate_type == 'cosine_decay_learning_rate':
    config = learning_rate_config.cosine_decay_learning_rate
    learning_rate = learning_schedules.cosine_decay_with_warmup(
        tf.train.get_or_create_global_step(),
        config.learning_rate_base,
        config.total_steps,
        config.warmup_learning_rate,
        config.warmup_steps,
        config.hold_base_rate_steps)

  if learning_rate is None:
    raise ValueError('Learning_rate %s not supported.' % learning_rate_type)

  return learning_rate
 def testManualStepping(self):
     global_step = tf.placeholder(tf.int64, [])
     boundaries = [2, 3, 7]
     rates = [1.0, 2.0, 3.0, 4.0]
     exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
     learning_rate = learning_schedules.manual_stepping(
         global_step, boundaries, rates)
     with self.test_session() as sess:
         output_rates = []
         for input_global_step in range(10):
             output_rate = sess.run(
                 learning_rate, feed_dict={global_step: input_global_step})
             output_rates.append(output_rate)
         self.assertAllClose(output_rates, exp_rates)
def _create_learning_rate(learning_rate_config, global_summaries):
    """Create optimizer learning rate based on config.

  Args:
    learning_rate_config: A LearningRate proto message.
    global_summaries: A set to attach learning rate summary to.

  Returns:
    A learning rate.

  Raises:
    ValueError: when using an unsupported input data type.
  """
    learning_rate = None
    learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
    if learning_rate_type == 'constant_learning_rate':
        config = learning_rate_config.constant_learning_rate
        learning_rate = config.learning_rate

    if learning_rate_type == 'exponential_decay_learning_rate':
        config = learning_rate_config.exponential_decay_learning_rate
        learning_rate = tf.train.exponential_decay(
            config.initial_learning_rate,
            slim.get_or_create_global_step(),
            config.decay_steps,
            config.decay_factor,
            staircase=config.staircase)

    if learning_rate_type == 'manual_step_learning_rate':
        config = learning_rate_config.manual_step_learning_rate
        if not config.schedule:
            raise ValueError('Empty learning rate schedule.')
        learning_rate_step_boundaries = [x.step for x in config.schedule]
        learning_rate_sequence = [config.initial_learning_rate]
        learning_rate_sequence += [x.learning_rate for x in config.schedule]
        learning_rate = learning_schedules.manual_stepping(
            slim.get_or_create_global_step(), learning_rate_step_boundaries,
            learning_rate_sequence)

    if learning_rate is None:
        raise ValueError('Learning_rate %s not supported.' %
                         learning_rate_type)

    global_summaries.add(tf.summary.scalar('Learning_Rate', learning_rate))
    return learning_rate
Exemplo n.º 6
0
def main(_):

    with tf.Graph().as_default() as graph:
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        global_summaries = set([])

        num_batches_epoch = num_samples // (FLAGS.batch_size *
                                            FLAGS.num_clones)
        print(num_batches_epoch)

        #######################
        # Config model_deploy #
        #######################
        config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.ps_tasks)

        # Create global_step
        with tf.device(config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        with tf.device(config.inputs_device()):
            # Train Process
            dataset = get_split('train', FLAGS.dataset_dir)
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=FLAGS.batch_size * 20,
                common_queue_min=FLAGS.batch_size * 10)
            [image_a, image_b,
             label] = provider.get(['image_a', 'image_b', 'label'])
            probe = image_a

            galleries = tf.unstack(image_b)
            galleries_process = []
            probe = process_image(probe)
            probe.set_shape([FLAGS.target_height, FLAGS.target_width, 3])

            gallery_target = tf.slice(image_b, [label, 0, 0, 0],
                                      [1, -1, -1, -1])
            gallery_target = tf.squeeze(gallery_target, axis=[0])
            gallery = process_image(gallery_target)
            gallery.set_shape([FLAGS.target_height, FLAGS.target_width, 3])
            galleries_process.append(gallery)

            for Idx in range(FLAGS.top_k - 1):

                imgIdx = tf.cond(Idx >= label, lambda: Idx + 1, lambda: Idx)
                gallery_other = tf.slice(image_b, [imgIdx, 0, 0, 0],
                                         [1, -1, -1, -1])
                gallery_other = tf.squeeze(gallery_other, axis=[0])
                gallery = process_image(gallery_other)
                gallery.set_shape([FLAGS.target_height, FLAGS.target_width, 3])
                galleries_process.append(gallery)

            label_new = 0
            galleries_process = tf.stack(galleries_process)

            probe_batch, galleries_batch, labels = tf.train.batch(
                [probe, galleries_process, label_new],
                batch_size=FLAGS.batch_size,
                num_threads=8,
                capacity=FLAGS.batch_size * 10)

            inputs_queue = prefetch_queue(
                [probe_batch, galleries_batch, labels])

        ######################
        # Select the network #
        ######################
        def model_fn(inputs_queue):
            probe_batch, galleries_batch, labels = inputs_queue.dequeue()
            probe_batch_tile = tf.tile(tf.expand_dims(probe_batch, axis=1),
                                       [1, FLAGS.top_k, 1, 1, 1])
            shape = probe_batch_tile.get_shape().as_list()
            probe_batch_reshape = tf.reshape(
                probe_batch_tile, [-1, shape[2], shape[3], shape[4]])
            galleries_batch_reshape = tf.reshape(
                galleries_batch, [-1, shape[2], shape[3], shape[4]])
            images_a = probe_batch_reshape
            images_b = galleries_batch_reshape

            model = find_class_by_name(FLAGS.model, [models])()

            logits = model.create_model(images_a,
                                        images_b,
                                        reuse=False,
                                        is_training=True)
            logits = tf.reshape(logits, [FLAGS.batch_size, -1])
            label_onehot = tf.one_hot(labels, FLAGS.top_k)
            crossentropy_loss = tf.losses.softmax_cross_entropy(
                onehot_labels=label_onehot, logits=logits)

            tf.summary.histogram('images_a', images_a)

        clones = model_deploy.create_clones(config, model_fn, [inputs_queue])
        first_clone_scope = clones[0].scope

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(config.optimizer_device()):

            learning_rate_step_boundaries = [
                int(num_batches_epoch * num_epoches * 0.60),
                int(num_batches_epoch * num_epoches * 0.75),
                int(num_batches_epoch * num_epoches * 0.90)
            ]
            learning_rate_sequence = [FLAGS.learning_rate]
            learning_rate_sequence += [
                FLAGS.learning_rate * 0.1, FLAGS.learning_rate * 0.01,
                FLAGS.learning_rate * 0.001
            ]
            learning_rate = learning_schedules.manual_stepping(
                global_step, learning_rate_step_boundaries,
                learning_rate_sequence)
            #             learning_rate = learning_schedules.exponential_decay_with_burnin(global_step,
            #                                   FLAGS.learning_rate,num_batches_epoch*num_epoches,0.001/FLAGS.learning_rate,
            #                                   burnin_learning_rate=0.01,
            #                                   burnin_steps=5000)
            if FLAGS.optimizer == 'adam':
                opt = tf.train.AdamOptimizer(learning_rate)
            if FLAGS.optimizer == 'momentum':
                opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)
        with tf.device(config.optimizer_device()):
            training_optimizer = opt

        # Create ops required to initialize the model from a given checkpoint. TODO!!
        init_fn = None
        if FLAGS.model == 'DCSL':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionResnetV2')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir,
                                 'inception_resnet_v2.ckpt'),
                    slim.get_model_variables('InceptionResnetV2'))
        if FLAGS.model == 'DCSL_inception_v1':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionV1')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'),
                    slim.get_model_variables('InceptionV1'))
        if FLAGS.model == 'DCSL_NAS':
            #             if FLAGS.weights is None:
            #                 # if not FLAGS.moving_average_decay:
            #                 variables = slim.get_model_variables('NAS')
            #                 init_fn = slim.assign_from_checkpoint_fn(
            #                     os.path.join(FLAGS.checkpoints_dir, 'nasnet-a_large_04_10_2017/model.ckpt'),
            #                     slim.get_model_variables('NAS'))
            def restore_map():
                variables_to_restore = {}
                for variable in tf.global_variables():
                    for scope_name in ['NAS']:
                        if variable.op.name.startswith(scope_name):
                            var_name = variable.op.name.replace(
                                scope_name + '/', '')
                            #                             var_name = variable.op.name
                            variables_to_restore[
                                var_name +
                                '/ExponentialMovingAverage'] = variable


#                             variables_to_restore[var_name] = variable
                return variables_to_restore

            var_map = restore_map()
            # restore_var = [v for v in tf.global_variables() if 'global_step' not in v.name]
            available_var_map = (
                variables_helper.get_variables_available_in_checkpoint(
                    var_map, FLAGS.weights))
            init_saver = tf.train.Saver(available_var_map)

            def initializer_fn(sess):
                init_saver.restore(sess, FLAGS.weights)

            init_fn = initializer_fn

        if FLAGS.model == 'MultiHeadAttentionBaseModel_set':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionV1')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'),
                    slim.get_model_variables('InceptionV1'))
            else:
                restore_var = [
                    v for v in slim.get_model_variables()
                    if 'Score' not in v.name
                ]
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.weights, restore_var)
        if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionV1')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'),
                    slim.get_model_variables('InceptionV1'))
            else:
                restore_var = [
                    v for v in slim.get_model_variables()
                    if 'Score' not in v.name
                ]
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.weights, restore_var)

        if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_softmatch':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionV1')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'),
                    slim.get_model_variables('InceptionV1'))
            else:
                restore_var = [
                    v for v in slim.get_model_variables()
                    if 'Score' not in v.name
                ]
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.weights, restore_var)
        if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_softmatch_v2':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('InceptionV1')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'),
                    slim.get_model_variables('InceptionV1'))
            else:
                restore_var = [
                    v for v in slim.get_model_variables()
                    if 'Score' not in v.name
                ]
                init_fn = slim.assign_from_checkpoint_fn(
                    FLAGS.weights, restore_var)

        if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_res50':
            if FLAGS.weights is None:
                # if not FLAGS.moving_average_decay:
                variables = slim.get_model_variables('resnet_v2_50')
                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(FLAGS.checkpoints_dir, 'resnet_v2_50.ckpt'),
                    slim.get_model_variables('resnet_v2_50'))
        if FLAGS.model == 'MultiHeadAttentionBaseModel_set_inv3':
            # if not FLAGS.moving_average_decay:
            variables = slim.get_model_variables('InceptionV3')
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(FLAGS.checkpoints_dir, 'inception_v3.ckpt'),
                slim.get_model_variables('InceptionV3'))

        # compute and update gradients
        with tf.device(config.optimizer_device()):
            if FLAGS.moving_average_decay:
                update_ops.append(
                    variable_averages.apply(moving_average_variables))

            # Variables to train.
            all_trainable = tf.trainable_variables()

            #  and returns a train_tensor and summary_op
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones,
                training_optimizer,
                regularization_losses=None,
                var_list=all_trainable)

            grad_mult = utils.get_model_gradient_multipliers(
                FLAGS.last_layer_gradient_multiplier)
            grads_and_vars = slim.learning.multiply_gradients(
                grads_and_vars, grad_mult)
            # Optionally clip gradients
            # with tf.name_scope('clip_grads'):
            #     grads_and_vars = slim.learning.clip_gradient_norms(grads_and_vars, 10)

            total_loss = tf.check_numerics(total_loss,
                                           'LossTensor is inf or nan.')

            # Create gradient updates.
            grad_updates = training_optimizer.apply_gradients(
                grads_and_vars, global_step=global_step)
            update_ops.append(grad_updates)

            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add summaries.
        for loss_tensor in tf.losses.get_losses():
            global_summaries.add(
                tf.summary.scalar(loss_tensor.op.name, loss_tensor))
        global_summaries.add(
            tf.summary.scalar('TotalLoss', tf.losses.get_total_loss()))

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))
        summaries |= global_summaries
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # GPU settings
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = False
        # Save checkpoints regularly.
        keep_checkpoint_every_n_hours = 2.0

        saver = tf.train.Saver(
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(train_tensor,
                            logdir=logdir,
                            master=FLAGS.master,
                            is_chief=(FLAGS.task == 0),
                            session_config=session_config,
                            startup_delay_steps=10,
                            summary_op=summary_op,
                            init_fn=init_fn,
                            number_of_steps=num_batches_epoch *
                            FLAGS.num_epoches,
                            save_summaries_secs=240,
                            sync_optimizer=None,
                            saver=saver)
Exemplo n.º 7
0
 def graph_fn(global_step):
     boundaries = []
     rates = [0.01]
     learning_rate = learning_schedules.manual_stepping(
         global_step, boundaries, rates)
     return (learning_rate, )
 def graph_fn(global_step):
     boundaries = [2, 3, 7]
     rates = [1.0, 2.0, 3.0, 4.0]
     learning_rate = learning_schedules.manual_stepping(
         global_step, boundaries, rates)
     return (learning_rate, )