Exemplo n.º 1
0
def tfprint(tensor, fun=None, prefix=""):
    if fun is None:
        fun = lambda x: x
    return tf.Print(tensor, [fun(tensor)], prefix)
Exemplo n.º 2
0
def get_masked_sent_lm_output(bert_config,
                              input_tensor,
                              cur_sent_reps_doc_unmask,
                              sent_masked_positions,
                              sent_masked_weights,
                              debugging=False):
  """Get the sentence level masked LM loss.

  Args:
      bert_config: BertConfig object. The configuration file for the document
        level BERT model.
      input_tensor: float Tensor. The contextualized representations of all
        sentences learned by the document level BERT model. The shape is [batch,
        loop_sent_number_per_doc, hidden]. This is the model prediction.
      cur_sent_reps_doc_unmask: float Tensor. The unmasked sentence
        representations of the current document. The shape is [batch,
        loop_sent_number_per_doc, hidden]. This is the source of the ground
        truth and negative examples in the masked sentence prediction.
      sent_masked_positions: int Tensor. The masked sentence positions in the
        current document. The shape is [batch, max_masked_sent_per_doc].
      sent_masked_weights: float Tensor. The masked sentence weights in the
        current document. The shape is [batch, max_masked_sent_per_doc].
      debugging: bool. Whether it is in the debugging mode.

  Returns:
    The masked sentence LM loss and the mask sentence LM loss per example.

  """
  # The current method for masked sentence prediction: we approach this problem
  # as a multi-class classification problem similar to the masked word LM task.
  # For each masked sentence position, the sentence in the current position is
  # the positive example. The other co-masked sentences in the current document
  # and in the other documents of the same batch are the negative examples. We
  # compute the cross entropy loss over the sentence prediction task following
  # the implementation of the masked word LM loss in the BERT model.

  input_tensor_shape = modeling.get_shape_list(input_tensor)
  batch_size = input_tensor_shape[0]
  masked_position_shape = modeling.get_shape_list(sent_masked_positions)
  max_predictions_per_seq = masked_position_shape[1]

  # In the context of masked sentence prediction, the max_predictions_per_seq
  # is the same with max_masked_sent_per_doc.
  # Output Shape: [batch * max_predictions_per_seq, hidden].
  # Input_tensor is the model prediction for each position.
  input_tensor = gather_indexes(input_tensor, sent_masked_positions)
  # Independent_sent_embeddings is the ground truth input sentence embeddings
  # for the document level BERT model. The output shape is [batch *
  # max_predictions_per_seq, hidden].
  independent_sent_embeddings = gather_indexes(cur_sent_reps_doc_unmask,
                                               sent_masked_positions)

  with tf.variable_scope("cls/sent_predictions", reuse=tf.AUTO_REUSE):
    # We apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("transform"):
      input_tensor = tf.layers.dense(
          input_tensor,
          units=bert_config.hidden_size,
          activation=modeling.get_activation(bert_config.hidden_act),
          kernel_initializer=modeling.create_initializer(
              bert_config.initializer_range))
      # Output Shape: [batch * max_predictions_per_seq, hidden].
      input_tensor = modeling.layer_norm(input_tensor)

    # The output weights are the same as the input embeddings, but there is
    # an output-only bias for each predicted position.
    output_bias = tf.get_variable(
        "output_bias",
        shape=[batch_size * max_predictions_per_seq],
        initializer=tf.zeros_initializer())
    # Shape of input_tensor [batch * max_predictions_per_seq, hidden].
    # Shape of independent_sent_embeddings is [batch * max_predictions_per_seq,
    # hidden].
    # Shape of logits: [batch * max_predictions_per_seq,
    # batch * max_predictions_per_seq].
    logits = tf.matmul(
        input_tensor, independent_sent_embeddings, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    # Output Shape: [batch * max_predictions_per_seq,
    # batch * max_predictions_per_seq].
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Output Shape: [batch * max_predictions_per_seq].
    # Double checked the setting of label_ids here. The label_ids
    # should be the label index in the "sentence vocabulary". Thus if batch=32,
    # max_predictions_per_seq = 2, then label ids should be like
    # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 63]. For the ground truth one hot
    # label matrix, only the values in the diagonal positions are 1. All the
    # other positions should be 0.
    label_ids = tf.range(
        0, batch_size * max_predictions_per_seq, dtype=tf.int32)
    if debugging:
      label_ids = tf.Print(
          label_ids, [label_ids],
          message="label_ids in get_masked_sent_lm_output",
          summarize=30)
    # Output Shape: [batch * max_predictions_per_seq].
    # The label_weights is the flatten vector based on sent_masked_weights,
    # where the weight is 1.0 for sampled real sentences and 0.0 for sampled
    # masked sentences.
    label_weights = tf.reshape(sent_masked_weights, [-1])

    # Output Shape: [batch * max_predictions_per_seq,
    # batch * max_predictions_per_seq].
    one_hot_labels = tf.one_hot(
        label_ids, depth=batch_size * max_predictions_per_seq, dtype=tf.float32)

    # Output Shape: [batch * max_predictions_per_seq].
    per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
    # Output Shape: [1].
    numerator = tf.reduce_sum(label_weights * per_example_loss)
    # Output Shape: [1].
    denominator = tf.reduce_sum(label_weights) + 1e-5
    # Output Shape: [1].
    loss = numerator / denominator
    # Shape of loss [1].
    # Shape of per_example_loss is [batch * max_predictions_per_seq].
  return (loss, per_example_loss, log_probs)
Exemplo n.º 3
0
def _define_collect(batch_env,
                    ppo_hparams,
                    scope,
                    frame_stack_size,
                    eval_phase,
                    sampling_temp,
                    force_beginning_resets,
                    distributional_size=1):
    """Collect trajectories.

  Args:
    batch_env: Batch environment.
    ppo_hparams: PPO hparams, defined in tensor2tensor.models.research.rl.
    scope: var scope.
    frame_stack_size: Number of last observations to feed into the policy.
    eval_phase: TODO(koz4k): Write docstring.
    sampling_temp: Sampling temperature for the policy.
    force_beginning_resets: Whether to reset at the beginning of each episode.
    distributional_size: optional, number of buckets in distributional RL.

  Returns:
    Returns memory (observations, rewards, dones, actions,
    pdfs, values_functions)
    containing a rollout of environment from nested wrapped structure.
  """
    epoch_length = ppo_hparams.epoch_length

    to_initialize = []
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        num_agents = batch_env.batch_size

        to_initialize.append(batch_env)
        wrappers = [(StackWrapper, {
            "history": frame_stack_size
        }), (_MemoryWrapper, {})]
        rollout_metadata = None
        speculum = None
        for w in wrappers:
            tf.logging.info("Applying wrapper %s(%s) to env %s." %
                            (str(w[0]), str(w[1]), str(batch_env)))
            batch_env = w[0](batch_env, **w[1])
            to_initialize.append(batch_env)

        rollout_metadata = _rollout_metadata(batch_env, distributional_size)
        speculum = batch_env.speculum

        def initialization_lambda(sess):
            for batch_env in to_initialize:
                batch_env.initialize(sess)

        memory = [
            tf.get_variable(  # pylint: disable=g-complex-comprehension
                "collect_memory_%d_%s" % (epoch_length, name),
                shape=[epoch_length] + shape,
                dtype=dtype,
                initializer=tf.zeros_initializer(),
                trainable=False) for (shape, dtype, name) in rollout_metadata
        ]

        cumulative_rewards = tf.get_variable("cumulative_rewards",
                                             len(batch_env),
                                             trainable=False)

        eval_phase_t = tf.convert_to_tensor(eval_phase)
        should_reset_var = tf.Variable(True, trainable=False)
        zeros_tensor = tf.zeros(len(batch_env))

    force_beginning_resets = tf.convert_to_tensor(force_beginning_resets)

    def reset_ops_group():
        return tf.group(batch_env.reset(tf.range(len(batch_env))),
                        tf.assign(cumulative_rewards, zeros_tensor))

    reset_op = tf.cond(
        tf.logical_or(should_reset_var.read_value(), force_beginning_resets),
        reset_ops_group, tf.no_op)

    with tf.control_dependencies([reset_op]):
        reset_once_op = tf.assign(should_reset_var, False)

    with tf.control_dependencies([reset_once_op]):

        def step(index, scores_sum, scores_num):
            """Single step."""
            index %= epoch_length  # Only needed in eval runs.
            # Note - the only way to ensure making a copy of tensor is to run simple
            # operation. We are waiting for tf.copy:
            # https://github.com/tensorflow/tensorflow/issues/11186
            obs_copy = batch_env.observ + 0
            value_fun_shape = (num_agents, )
            if distributional_size > 1:
                value_fun_shape = (num_agents, distributional_size)

            def env_step(arg1, arg2, arg3):  # pylint: disable=unused-argument
                """Step of the environment."""

                (logits, value_function) = get_policy(obs_copy, ppo_hparams,
                                                      batch_env.action_space,
                                                      distributional_size)
                action = common_layers.sample_with_temperature(
                    logits, sampling_temp)
                action = tf.cast(action, tf.int32)
                action = tf.reshape(action, shape=(num_agents, ))

                reward, done = batch_env.simulate(action)

                pdf = tfp.distributions.Categorical(logits=logits).prob(action)
                pdf = tf.reshape(pdf, shape=(num_agents, ))
                value_function = tf.reshape(value_function,
                                            shape=value_fun_shape)
                done = tf.reshape(done, shape=(num_agents, ))

                with tf.control_dependencies([reward, done]):
                    return tf.identity(pdf), tf.identity(value_function), \
                           tf.identity(done)

            # TODO(piotrmilos): while_body is executed at most once,
            # thus should be replaced with tf.cond
            pdf, value_function, top_level_done = tf.while_loop(
                lambda _1, _2, _3: tf.equal(speculum.size(), 0),
                env_step,
                [
                    tf.constant(0.0, shape=(num_agents, )),
                    tf.constant(0.0, shape=value_fun_shape),
                    tf.constant(False, shape=(num_agents, ))
                ],
                parallel_iterations=1,
                back_prop=False,
            )

            with tf.control_dependencies([pdf, value_function]):
                obs, reward, done, action = speculum.dequeue()
                to_save = [obs, reward, done, action, pdf, value_function]
                save_ops = [
                    tf.scatter_update(memory_slot, index, value)
                    for memory_slot, value in zip(memory, to_save)
                ]
                cumulate_rewards_op = cumulative_rewards.assign_add(reward)

                agent_indices_to_reset = tf.where(top_level_done)[:, 0]
            with tf.control_dependencies([cumulate_rewards_op]):
                # TODO(piotrmilos): possibly we need cumulative_rewards.read_value()
                scores_sum_delta = tf.reduce_sum(
                    tf.gather(cumulative_rewards.read_value(),
                              agent_indices_to_reset))
                scores_num_delta = tf.count_nonzero(done, dtype=tf.int32)
            with tf.control_dependencies(save_ops +
                                         [scores_sum_delta, scores_num_delta]):
                reset_env_op = batch_env.reset(agent_indices_to_reset)
                reset_cumulative_rewards_op = tf.scatter_update(
                    cumulative_rewards, agent_indices_to_reset,
                    tf.gather(zeros_tensor, agent_indices_to_reset))
            with tf.control_dependencies(
                [reset_env_op, reset_cumulative_rewards_op]):
                return [
                    index + 1, scores_sum + scores_sum_delta,
                    scores_num + scores_num_delta
                ]

        def stop_condition(i, _, resets):
            return tf.cond(eval_phase_t, lambda: resets < num_agents,
                           lambda: i < epoch_length)

        init = [tf.constant(0), tf.constant(0.0), tf.constant(0)]
        index, scores_sum, scores_num = tf.while_loop(stop_condition,
                                                      step,
                                                      init,
                                                      parallel_iterations=1,
                                                      back_prop=False)

    # We handle force_beginning_resets differently. We assume that all envs are
    # reseted at the end of episod (though it happens at the beginning of the
    # next one
    scores_num = tf.cond(force_beginning_resets,
                         lambda: scores_num + len(batch_env),
                         lambda: scores_num)

    with tf.control_dependencies([scores_sum]):
        scores_sum = tf.cond(
            force_beginning_resets, lambda: scores_sum + tf.reduce_sum(
                cumulative_rewards.read_value()), lambda: scores_sum)

    mean_score = tf.cond(tf.greater(scores_num, 0),
                         lambda: scores_sum / tf.cast(scores_num, tf.float32),
                         lambda: 0.)
    printing = tf.Print(0, [mean_score, scores_sum, scores_num],
                        "mean_score: ")
    with tf.control_dependencies([index, printing]):
        memory = [mem.read_value() for mem in memory]
        # When generating real data together with PPO training we must use single
        # agent. For PPO to work we reshape the history, as if it was generated
        # by real_ppo_effective_num_agents.
        if ppo_hparams.effective_num_agents is not None and not eval_phase:
            new_memory = []
            effective_num_agents = ppo_hparams.effective_num_agents
            assert epoch_length % ppo_hparams.effective_num_agents == 0, (
                "The rollout of ppo_hparams.epoch_length will be distributed amongst"
                "effective_num_agents of agents")
            new_epoch_length = int(epoch_length / effective_num_agents)
            for mem, info in zip(memory, rollout_metadata):
                shape, _, name = info
                new_shape = [effective_num_agents, new_epoch_length
                             ] + shape[1:]
                perm = list(range(len(shape) + 1))
                perm[0] = 1
                perm[1] = 0
                mem = tf.transpose(mem, perm=perm)
                mem = tf.reshape(mem, shape=new_shape)
                mem = tf.transpose(mem,
                                   perm=perm,
                                   name="collect_memory_%d_%s" %
                                   (new_epoch_length, name))
                new_memory.append(mem)
            memory = new_memory

        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            mean_score_summary = tf.cond(
                tf.greater(scores_num, 0),
                lambda: tf.summary.scalar("mean_score_this_iter", mean_score),
                str)
            summaries = tf.summary.merge([
                mean_score_summary,
                tf.summary.scalar("episodes_finished_this_iter", scores_num)
            ])
            return memory, summaries, initialization_lambda
Exemplo n.º 4
0
def main(_):
    with tf.Graph().as_default():
        # Create inputs in [0, 1], as expected by vgg_16.
        inputs, _ = image_utils.imagenet_inputs(FLAGS.batch_size,
                                                FLAGS.image_size)
        evaluation_images = image_utils.load_evaluation_images(
            FLAGS.image_size)

        # Process style and weight flags
        if FLAGS.style_coefficients is None:
            style_coefficients = [1.0 for _ in range(FLAGS.num_styles)]
        else:
            style_coefficients = ast.literal_eval(FLAGS.style_coefficients)
        if len(style_coefficients) != FLAGS.num_styles:
            raise ValueError(
                'number of style coefficients differs from number of styles')
        content_weights = ast.literal_eval(FLAGS.content_weights)
        style_weights = ast.literal_eval(FLAGS.style_weights)

        # Load style images.
        style_images, labels, style_gram_matrices = image_utils.style_image_inputs(
            os.path.expanduser(FLAGS.style_dataset_file),
            batch_size=FLAGS.num_styles,
            image_size=FLAGS.image_size,
            square_crop=True,
            shuffle=False)
        labels = tf.unstack(labels)

        def _create_normalizer_params(style_label):
            """Creates normalizer parameters from a style label."""
            return {
                'labels': tf.expand_dims(style_label, 0),
                'num_categories': FLAGS.num_styles,
                'center': True,
                'scale': True
            }

        # Dummy call to simplify the reuse logic
        model.transform(inputs,
                        alpha=FLAGS.alpha,
                        reuse=False,
                        normalizer_params=_create_normalizer_params(labels[0]))

        def _style_sweep(inputs):
            """Transfers all styles onto the input one at a time."""
            inputs = tf.expand_dims(inputs, 0)

            stylized_inputs = []
            for _, style_label in enumerate(labels):
                stylized_input = model.transform(
                    inputs,
                    alpha=FLAGS.alpha,
                    reuse=True,
                    normalizer_params=_create_normalizer_params(style_label))
                stylized_inputs.append(stylized_input)

            return tf.concat([inputs] + stylized_inputs, 0)

        if FLAGS.style_grid:
            style_row = tf.concat([
                tf.ones([1, FLAGS.image_size, FLAGS.image_size, 3]),
                style_images
            ], 0)
            stylized_training_example = _style_sweep(inputs[0])
            stylized_evaluation_images = [
                _style_sweep(image) for image in tf.unstack(evaluation_images)
            ]
            stylized_noise = _style_sweep(
                tf.random_uniform([FLAGS.image_size, FLAGS.image_size, 3]))
            stylized_style_images = [
                _style_sweep(image) for image in tf.unstack(style_images)
            ]
            if FLAGS.style_crossover:
                grid = tf.concat(
                    [style_row, stylized_training_example, stylized_noise] +
                    stylized_evaluation_images + stylized_style_images, 0)
            else:
                grid = tf.concat(
                    [style_row, stylized_training_example, stylized_noise] +
                    stylized_evaluation_images, 0)
            if FLAGS.style_crossover:
                grid_shape = [
                    3 + evaluation_images.get_shape().as_list()[0] +
                    FLAGS.num_styles, 1 + FLAGS.num_styles
                ]
            else:
                grid_shape = [
                    3 + evaluation_images.get_shape().as_list()[0],
                    1 + FLAGS.num_styles
                ]

            tf.summary.image(
                'Style Grid',
                tf.cast(
                    image_utils.form_image_grid(
                        grid, grid_shape, [FLAGS.image_size, FLAGS.image_size],
                        3) * 255.0, tf.uint8))

        if FLAGS.learning_curves:
            metrics = {}
            for i, label in enumerate(labels):
                gram_matrices = dict(
                    (key, value[i:i + 1])
                    for key, value in style_gram_matrices.items())
                stylized_inputs = model.transform(
                    inputs,
                    alpha=FLAGS.alpha,
                    reuse=True,
                    normalizer_params=_create_normalizer_params(label))
                _, loss_dict = learning.total_loss(inputs,
                                                   stylized_inputs,
                                                   gram_matrices,
                                                   content_weights,
                                                   style_weights,
                                                   reuse=i > 0)
                for key, value in loss_dict.items():
                    metrics['{}_style_{}'.format(
                        key, i)] = slim.metrics.streaming_mean(value)

            names_values, names_updates = slim.metrics.aggregate_metric_map(
                metrics)
            for name, value in names_values.items():
                summary_op = tf.summary.scalar(name, value, [])
                print_op = tf.Print(summary_op, [value], name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, print_op)
            eval_op = list(names_updates.values())
            num_evals = FLAGS.num_evals
        else:
            eval_op = None
            num_evals = 1

        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=os.path.expanduser(FLAGS.train_dir),
            logdir=os.path.expanduser(FLAGS.eval_dir),
            eval_op=eval_op,
            num_evals=num_evals,
            eval_interval_secs=FLAGS.eval_interval_secs)
import numpy as np
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()

# create the graph
tf.reset_default_graph()
x = tf.get_variable('x', shape=(), dtype=tf.float32)
f = x ** 2
# logging with tf.Print
f = tf.Print(f, [x, f], "x, f:")
# say we want to minimize the function f
optimizer = tf.train.GradientDescentOptimizer(0.1)
step = optimizer.minimize(f)
# as all the variables are trainable by defualt with 'trainable'm positional 
# argument in variable scope, we dont neeed to specify again.
# we can get all the trainable variables as follows: tf.trainable_variables()

# Making gd steps
# create a session and initialize the variables
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

# say we want to take 10 gd steps
for i in range(10):
    #_, curr_x, curr_f = sess.run([step, x, f]) #1st element prints None, Hence ignored
    #print(curr_x, curr_f)
    print(sess.run([step, f]))
#close the session , You know : safe practice
tf.Session.close(sess)
Exemplo n.º 6
0
def inception_model_fn(features, labels, mode, params):
    """Inception v4 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)

    if isinstance(features, dict):
        features = features['feature']

    features = tensor_transform_fn(features, params['model_transpose_dims'])

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        if FLAGS.precision == 'bfloat16':
            with contrib_tpu.bfloat16_scope():
                logits, end_points = inception.inception_v4(
                    features, num_classes, is_training=is_training)
            logits = tf.cast(logits, tf.float32)
        elif FLAGS.precision == 'float32':
            logits, end_points = inception.inception_v4(
                features, num_classes, is_training=is_training)
        return logits, end_points

    if FLAGS.clear_update_collections:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    weight_decay=0.0,
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = build_network()
    else:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = build_network()

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                        logits=tf.cast(end_points['AuxLogits'],
                                                       tf.float32),
                                        weights=0.4,
                                        label_smoothing=0.1,
                                        scope='aux_loss')

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)

    losses = tf.add_n(tf.losses.get_losses())
    l2_loss = []
    for v in tf.trainable_variables():
        tf.logging.info(v.name)
        if 'BatchNorm' not in v.name and 'weights' in v.name:
            l2_loss.append(tf.nn.l2_loss(v))
        tf.logging.info(len(l2_loss))
    loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    # Adjust the initial learning rate for warmup
    initial_learning_rate /= (
        FLAGS.learning_rate_decay**((FLAGS.warmup_epochs + FLAGS.cold_epochs) /
                                    FLAGS.learning_rate_decay_epochs))
    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        clr = FLAGS.cold_learning_rate
        wlr = initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.cold_epochs)
        learning_rate = tf.where(
            tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(
                tf.greater_equal(current_epoch,
                                 FLAGS.warmup_epochs + FLAGS.cold_epochs),
                tf.train.exponential_decay(
                    learning_rate=initial_learning_rate,
                    global_step=global_step,
                    decay_steps=int(
                        FLAGS.learning_rate_decay_epochs * batches_per_epoch),
                    decay_rate=FLAGS.learning_rate_decay,
                    staircase=True),
                tf.multiply(tf.cast(current_epoch, tf.float32), wlr))), clr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        if not FLAGS.skip_host_call:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                        summary.scalar('learning_rate',
                                       tf.reduce_mean(lr),
                                       step=gs)
                        summary.scalar('current_epoch',
                                       tf.reduce_mean(ce),
                                       step=gs)

                        return summary.all_summary_ops()

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics)
Exemplo n.º 7
0
def debugprint(x, name=''):
    """Small wrapper for tf.Print which prints summary statistics."""
    name += '\t' + x.name
    return tf.Print(x, [tf.reduce_min(x),
                        tf.reduce_mean(x),
                        tf.reduce_max(x)], name)
Exemplo n.º 8
0
 def fail_push():
     pop = tf.Print(failed_push, [failed_push], "Failed to push")
     return tf.group(failed_push.assign_add(1), pop, name="fail_push")
Exemplo n.º 9
0
def build_smith_dual_encoder(dual_encoder_config,
                             train_mode,
                             is_training,
                             input_ids_1,
                             input_mask_1,
                             masked_lm_positions_1,
                             masked_lm_ids_1,
                             masked_lm_weights_1,
                             input_ids_2,
                             input_mask_2,
                             masked_lm_positions_2,
                             masked_lm_ids_2,
                             masked_lm_weights_2,
                             use_one_hot_embeddings,
                             documents_match_labels,
                             debugging=False):
    """Build the dual encoder SMITH model.

  Args:
    dual_encoder_config: the configuration file for the dual encoder model.
    train_mode: string. The train mode of the current. It can be finetune,
      pretrain or joint_train.
    is_training: bool. Whether it in training mode.
    input_ids_1: int Tensor with shape [batch, max_seq_length]. The input ids of
      input examples of text 1.
    input_mask_1: int Tensor with shape [batch, max_seq_length]. The input masks
      of input examples of text 1.
    masked_lm_positions_1: int Tensor with shape [batch,
      max_predictions_per_seq]. The input masked LM prediction positions of
      input examples of text 1. This can be useful to compute the masked word
      prediction LM loss.
    masked_lm_ids_1: int Tensor with shape [batch, max_predictions_per_seq]. The
      input masked LM prediction ids of input examples of text 1. It is the
      ground truth in the masked word LM prediction task. This can be useful to
      compute the masked word prediction LM loss.
    masked_lm_weights_1: float Tensor with shape [batch,
      max_predictions_per_seq]. The input masked LM prediction weights of input
      examples of text 1.
    input_ids_2: int Tensor with shape [batch, max_seq_length]. The input ids of
      input examples of text 2.
    input_mask_2: int Tensor with shape [batch, max_seq_length]. The input masks
      of input examples of text 2.
    masked_lm_positions_2: int Tensor with shape [batch,
      max_predictions_per_seq]. The input masked LM prediction positions of
      input examples of text 2. This can be useful to compute the masked word
      prediction LM loss.
    masked_lm_ids_2: int Tensor with shape [batch, max_predictions_per_seq]. The
      input masked LM prediction ids of input examples of text 2. It is the
      ground truth in the masked word LM prediction task. This can be useful to
      compute the masked word prediction LM loss.
    masked_lm_weights_2: float Tensor with shape [batch,
      max_predictions_per_seq]. The input masked LM prediction weights of input
      examples of text 2.
    use_one_hot_embeddings: bool. Whether use one hot embeddings.
    documents_match_labels: float Tensor with shape [batch]. The ground truth
      labels for the input examples.
    debugging: bool. Whether it is in the debugging mode.

  Returns:
    The masked LM loss, per example LM loss, masked sentence LM loss, per
    example masked sentence LM loss, sequence representations, text matching
    loss, per example text matching loss, text matching logits, text matching
    probabilities and text matching log probabilities.

  Raises:
    ValueError: if the doc_rep_combine_mode in dual_encoder_config is invalid.
  """
    bert_config = modeling.BertConfig.from_json_file(
        dual_encoder_config.encoder_config.bert_config_file)
    doc_bert_config = modeling.BertConfig.from_json_file(
        dual_encoder_config.encoder_config.doc_bert_config_file)
    (input_sent_reps_doc_1_unmask, input_mask_doc_level_1_tensor,
     input_sent_reps_doc_2_unmask, input_mask_doc_level_2_tensor,
     masked_lm_loss_doc_1, masked_lm_loss_doc_2, masked_lm_example_loss_doc_1,
     masked_lm_example_loss_doc_2, masked_lm_weights_doc_1,
     masked_lm_weights_doc_2) = layers.learn_sent_reps_normal_loop(
         dual_encoder_config, is_training, train_mode, input_ids_1,
         input_mask_1, masked_lm_positions_1, masked_lm_ids_1,
         masked_lm_weights_1, input_ids_2, input_mask_2, masked_lm_positions_2,
         masked_lm_ids_2, masked_lm_weights_2, use_one_hot_embeddings)
    if debugging:
        input_mask_doc_level_1_tensor = tf.Print(
            input_mask_doc_level_1_tensor,
            [input_mask_doc_level_1_tensor, input_mask_doc_level_2_tensor],
            message="input_mask_doc_level_1_tensor in build_smith_dual_encoder",
            summarize=30)

    if dual_encoder_config.encoder_config.use_masked_sentence_lm_loss:
        batch_size_static = (
            dual_encoder_config.train_eval_config.train_batch_size
            if is_training else
            dual_encoder_config.train_eval_config.eval_batch_size)
        # Generates the sentence masked document represenations.
        with tf.variable_scope("mask_sent_in_doc", reuse=tf.AUTO_REUSE):
            # Randomly initialize a masked sentence vector and reuse it.
            # We also need to return the masked sentence position index to get the
            # ground truth labels for the masked positions. The shape of
            # sent_mask_embedding is [hidden].
            sent_mask_embedding = tf.get_variable(
                name="sentence_mask_embedding",
                shape=[bert_config.hidden_size],
                initializer=tf.truncated_normal_initializer(
                    stddev=bert_config.initializer_range))
            # Output Shape: [batch, loop_sent_number_per_doc, hidden].
            (input_sent_reps_doc_1_masked, masked_sent_index_1,
             masked_sent_weight_1) = layers.get_doc_rep_with_masked_sent(
                 input_sent_reps_doc=input_sent_reps_doc_1_unmask,
                 sent_mask_embedding=sent_mask_embedding,
                 input_mask_doc_level=input_mask_doc_level_1_tensor,
                 batch_size_static=batch_size_static,
                 max_masked_sent_per_doc=dual_encoder_config.encoder_config.
                 max_masked_sent_per_doc,
                 loop_sent_number_per_doc=dual_encoder_config.encoder_config.
                 loop_sent_number_per_doc)
            (input_sent_reps_doc_2_masked, masked_sent_index_2,
             masked_sent_weight_2) = layers.get_doc_rep_with_masked_sent(
                 input_sent_reps_doc=input_sent_reps_doc_2_unmask,
                 sent_mask_embedding=sent_mask_embedding,
                 input_mask_doc_level=input_mask_doc_level_2_tensor,
                 batch_size_static=batch_size_static,
                 max_masked_sent_per_doc=dual_encoder_config.encoder_config.
                 max_masked_sent_per_doc,
                 loop_sent_number_per_doc=dual_encoder_config.encoder_config.
                 loop_sent_number_per_doc)
        # Learn the document representations based on masked sentence embeddings.
        # Note that the variables in the DocBert model are not within the
        # "mask_sent_in_doc" variable scope.
        model_doc_1 = modeling.DocBertModel(
            config=doc_bert_config,
            is_training=is_training,
            input_reps=input_sent_reps_doc_1_masked,
            input_mask=input_mask_doc_level_1_tensor)
        model_doc_2 = modeling.DocBertModel(
            config=doc_bert_config,
            is_training=is_training,
            input_reps=input_sent_reps_doc_2_masked,
            input_mask=input_mask_doc_level_2_tensor)
        # Shape of masked_sent_lm_loss_1 [1].
        # Shape of masked_sent_lm_example_loss_1 is [batch *
        # max_predictions_per_seq].
        (masked_sent_lm_loss_1, masked_sent_per_example_loss_1,
         _) = layers.get_masked_sent_lm_output(
             doc_bert_config, model_doc_1.get_sequence_output(),
             input_sent_reps_doc_1_unmask, masked_sent_index_1,
             masked_sent_weight_1)
        (masked_sent_lm_loss_2, masked_sent_per_example_loss_2,
         _) = layers.get_masked_sent_lm_output(
             doc_bert_config, model_doc_2.get_sequence_output(),
             input_sent_reps_doc_2_unmask, masked_sent_index_2,
             masked_sent_weight_2)
    else:
        # Learn the document representations based on unmasked sentence embeddings.
        model_doc_1 = modeling.DocBertModel(
            config=doc_bert_config,
            is_training=is_training,
            input_reps=input_sent_reps_doc_1_unmask,
            input_mask=input_mask_doc_level_1_tensor)
        model_doc_2 = modeling.DocBertModel(
            config=doc_bert_config,
            is_training=is_training,
            input_reps=input_sent_reps_doc_2_unmask,
            input_mask=input_mask_doc_level_2_tensor)
        masked_sent_lm_loss_1 = 0
        masked_sent_lm_loss_2 = 0
        masked_sent_per_example_loss_1 = tf.zeros(1)
        masked_sent_per_example_loss_2 = tf.zeros(1)
        masked_sent_weight_1 = tf.zeros(1)
        masked_sent_weight_2 = tf.zeros(1)

    with tf.variable_scope("seq_rep_from_bert_doc_dense", reuse=tf.AUTO_REUSE):
        normalized_doc_rep_1 = layers.get_seq_rep_from_bert(model_doc_1)
        normalized_doc_rep_2 = layers.get_seq_rep_from_bert(model_doc_2)

        # We also dump the contextualized sentence embedding output by document
        # level Transformer model. These representations maybe useful for sentence
        # level tasks.
        output_sent_reps_doc_1 = model_doc_1.get_sequence_output()
        output_sent_reps_doc_2 = model_doc_2.get_sequence_output()

    # Here we support multiple modes to generate the final document
    # representations based on the word/sentence/document level representations
    # 1. normal: only use the document level representation as the final document
    # representations.
    # 2. sum_concat: firstly compute the sum of all sentence level repsentations.
    # Then concatenate the sum vector with the document level representations.
    # 3. mean_concat: firstly compute the mean of all sentence level
    # repsentations. Then concatenate the mean vector with the document level
    # representations.
    # 4. attention: firstly compute the weighted sum of sentence level
    # representations with attention mechanism, then concatenate the weighted sum
    # vector with the document level representations.
    # The document level mask is to indicate whether each sentence is
    # a real sentence (1) or a paded sentence (0). The shape of
    # input_mask_doc_level_1_tensor is [batch, max_doc_length_by_sentence]. The
    # shape of input_sent_reps_doc_1_unmask is
    # [batch, max_doc_length_by_sentence, hidden].
    final_doc_rep_combine_mode = dual_encoder_config.encoder_config.doc_rep_combine_mode
    if final_doc_rep_combine_mode == constants.DOC_COMBINE_NORMAL:
        final_doc_rep_1 = normalized_doc_rep_1
        final_doc_rep_2 = normalized_doc_rep_2
    elif final_doc_rep_combine_mode == constants.DOC_COMBINE_SUM_CONCAT:
        # Output Shape: [batch, 2*hidden].
        final_doc_rep_1 = tf.concat([
            tf.reduce_sum(input_sent_reps_doc_1_unmask, 1),
            normalized_doc_rep_1
        ],
                                    axis=1)
        final_doc_rep_2 = tf.concat([
            tf.reduce_sum(input_sent_reps_doc_2_unmask, 1),
            normalized_doc_rep_2
        ],
                                    axis=1)
    elif final_doc_rep_combine_mode == constants.DOC_COMBINE_MEAN_CONCAT:
        final_doc_rep_1 = tf.concat([
            tf.reduce_mean(input_sent_reps_doc_1_unmask, 1),
            normalized_doc_rep_1
        ],
                                    axis=1)
        final_doc_rep_2 = tf.concat([
            tf.reduce_mean(input_sent_reps_doc_2_unmask, 1),
            normalized_doc_rep_2
        ],
                                    axis=1)
    elif final_doc_rep_combine_mode == constants.DOC_COMBINE_ATTENTION:
        final_doc_rep_1 = tf.concat([
            layers.get_attention_weighted_sum(
                input_sent_reps_doc_1_unmask, bert_config, is_training,
                dual_encoder_config.encoder_config.
                doc_rep_combine_attention_size), normalized_doc_rep_1
        ],
                                    axis=1)
        final_doc_rep_2 = tf.concat([
            layers.get_attention_weighted_sum(
                input_sent_reps_doc_2_unmask, bert_config, is_training,
                dual_encoder_config.encoder_config.
                doc_rep_combine_attention_size), normalized_doc_rep_2
        ],
                                    axis=1)
    else:
        raise ValueError(
            "Only normal, sum_concat, mean_concat and attention are"
            " supported: %s" % final_doc_rep_combine_mode)
    (siamese_loss, siamese_example_loss,
     siamese_logits) = loss_fns.get_prediction_loss_cosine(
         input_tensor_1=final_doc_rep_1,
         input_tensor_2=final_doc_rep_2,
         labels=documents_match_labels,
         similarity_score_amplifier=dual_encoder_config.loss_config.
         similarity_score_amplifier,
         neg_to_pos_example_ratio=dual_encoder_config.train_eval_config.
         neg_to_pos_example_ratio)

    # The shape of masked_lm_loss_doc is [1].
    # The shape of masked_lm_example_loss_doc is [batch * max_predictions_per_seq,
    # max_doc_length_by_sentence].
    return (masked_lm_loss_doc_1, masked_lm_loss_doc_2,
            masked_lm_example_loss_doc_1, masked_lm_example_loss_doc_2,
            masked_lm_weights_doc_1, masked_lm_weights_doc_2,
            masked_sent_lm_loss_1, masked_sent_lm_loss_2,
            masked_sent_per_example_loss_1, masked_sent_per_example_loss_2,
            masked_sent_weight_1, masked_sent_weight_2, final_doc_rep_1,
            final_doc_rep_2, input_sent_reps_doc_1_unmask,
            input_sent_reps_doc_2_unmask, output_sent_reps_doc_1,
            output_sent_reps_doc_2, siamese_loss, siamese_example_loss,
            siamese_logits)
Exemplo n.º 10
0
 def compute_teacher_loss(log_q, reward, baseline, std):
     advantage = tf.abs((reward - baseline) / std)
     advantage = tf.stop_gradient(advantage)
     log_q = tf.Print(log_q, [log_q], "log_q: ")
     teacher_loss = tf.reduce_mean(-log_q * advantage)
     return teacher_loss
Exemplo n.º 11
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name,
            is_training=False,
            use_grayscale=FLAGS.use_grayscale)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.quantize:
            contrib_quantize.create_eval_graph()

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
Exemplo n.º 12
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemplo n.º 13
0
def debug_tensor(s, msg=None, summarize=10):
    """Print the shape and value of a tensor at test time. Return a new tensor."""
    if not msg:
        msg = s.name
    return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize)
x = TimeDistributed(Flatten())(x)
rnn_size = 128
x = Bidirectional(
    RNN(LSTMCell(rnn_size, recurrent_activation='sigmoid'),
        return_sequences=True))(x)
x = Bidirectional(
    RNN(LSTMCell(rnn_size, recurrent_activation='sigmoid'),
        return_sequences=True))(x)
x = Dense(n_class, activation='softmax')(x)

base_model = Model(inputs=input_tensor, outputs=x)

labels = Input(name='the_labels', shape=[n_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
print_node = tf.Print(x, [x], "shape of output")
loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                  name='ctc')([print_node, labels, input_length, label_length])

model = Model(inputs=[input_tensor, labels, input_length, label_length],
              outputs=loss_out)
model.load_weights(model_path + 'ctc_best.h5')

final_notification_info = ''

for phone_index, phone in enumerate(phone_list):

    table = None
    out = None

    while table is None:
Exemplo n.º 15
0
    def _policy_loss(self, mean, logstd, old_mean, old_logstd, action,
                     advantage, length):
        """Compute the policy loss composed of multiple components.

    1. The policy gradient loss is importance sampled from the data-collecting
       policy at the beginning of training.
    2. The second term is a KL penalty between the policy at the beginning of
       training and the current policy.
    3. Additionally, if this KL already changed more than twice the target
       amount, we activate a strong penalty discouraging further divergence.

    Args:
      mean: Sequences of action means of the current policy.
      logstd: Sequences of action log stddevs of the current policy.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      action: Sequences of actions.
      advantage: Sequences of advantages.
      length: Batch of sequence lengths.

    Returns:
      Tuple of loss tensor and summary tensor.
    """
        with tf.name_scope('policy_loss'):
            entropy = utility.diag_normal_entropy(mean, logstd)
            kl = tf.reduce_mean(
                self._mask(
                    utility.diag_normal_kl(old_mean, old_logstd, mean, logstd),
                    length), 1)
            policy_gradient = tf.exp(
                utility.diag_normal_logpdf(mean, logstd, action) -
                utility.diag_normal_logpdf(old_mean, old_logstd, action))
            surrogate_loss = -tf.reduce_mean(
                self._mask(policy_gradient * tf.stop_gradient(advantage),
                           length), 1)
            kl_penalty = self._penalty * kl
            cutoff_threshold = self._config.kl_target * self._config.kl_cutoff_factor
            cutoff_count = tf.reduce_sum(
                tf.cast(kl > cutoff_threshold, tf.int32))
            with tf.control_dependencies([
                    tf.cond(cutoff_count > 0,
                            lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '),
                            int)
            ]):
                kl_cutoff = (self._config.kl_cutoff_coef *
                             tf.cast(kl > cutoff_threshold, tf.float32) *
                             (kl - cutoff_threshold)**2)
            policy_loss = surrogate_loss + kl_penalty + kl_cutoff
            summary = tf.summary.merge([
                tf.summary.histogram('entropy', entropy),
                tf.summary.histogram('kl', kl),
                tf.summary.histogram('surrogate_loss', surrogate_loss),
                tf.summary.histogram('kl_penalty', kl_penalty),
                tf.summary.histogram('kl_cutoff', kl_cutoff),
                tf.summary.histogram('kl_penalty_combined',
                                     kl_penalty + kl_cutoff),
                tf.summary.histogram('policy_loss', policy_loss),
                tf.summary.scalar('avg_surr_loss',
                                  tf.reduce_mean(surrogate_loss)),
                tf.summary.scalar('avg_kl_penalty',
                                  tf.reduce_mean(kl_penalty)),
                tf.summary.scalar('avg_policy_loss',
                                  tf.reduce_mean(policy_loss))
            ])
            policy_loss = tf.reduce_mean(policy_loss, 0)
            return tf.check_numerics(policy_loss, 'policy_loss'), summary
Exemplo n.º 16
0
def define_ppo_epoch(memory, hparams, action_space, batch_size,
                     distributional_size=1, distributional_subscale=0.04,
                     distributional_threshold=0.0, epoch=-1):
  """PPO epoch."""
  observation, reward, done, action, old_pdf, value_sm = memory

  # This is to avoid propagating gradients through simulated environment.
  observation = tf.stop_gradient(observation)
  action = tf.stop_gradient(action)
  reward = tf.stop_gradient(reward)
  if hasattr(hparams, "rewards_preprocessing_fun"):
    reward = hparams.rewards_preprocessing_fun(reward)
  done = tf.stop_gradient(done)
  value_sm = tf.stop_gradient(value_sm)
  old_pdf = tf.stop_gradient(old_pdf)

  value = value_sm
  if distributional_size > 1:
    value = _distributional_to_value(
        value_sm, distributional_size, distributional_subscale,
        distributional_threshold)

  advantage = calculate_generalized_advantage_estimator(
      reward, value, done, hparams.gae_gamma, hparams.gae_lambda)

  if distributional_size > 1:
    # Create discounted reward values range.
    half = distributional_size // 2
    value_range = tf.to_float(tf.range(-half, half)) + 0.5  # Mid-bucket value.
    value_range *= distributional_subscale
    # Acquire new discounted rewards by using the above range as end-values.
    end_values = tf.expand_dims(value_range, 0)
    discounted_reward = discounted_rewards(
        reward, done, hparams.gae_gamma, end_values)
    # Re-normalize the discounted rewards to integers, in [0, dist_size] range.
    discounted_reward /= distributional_subscale
    discounted_reward += half
    discounted_reward = tf.maximum(discounted_reward, 0.0)
    discounted_reward = tf.minimum(discounted_reward, distributional_size)
    # Multiply the rewards by 2 for greater fidelity and round to integers.
    discounted_reward = tf.stop_gradient(tf.round(2 * discounted_reward))
    # The probabilities corresponding to the end values from old predictions.
    discounted_reward_prob = tf.stop_gradient(value_sm[-1])
    discounted_reward_prob = tf.nn.softmax(discounted_reward_prob, axis=-1)
  else:
    discounted_reward = tf.stop_gradient(advantage + value[:-1])
    discounted_reward_prob = discounted_reward  # Unused in this case.

  advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1],
                                                     keep_dims=True)
  advantage_normalized = tf.stop_gradient(
      (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8))

  add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)]

  number_of_batches = ((hparams.epoch_length-1) * hparams.optimization_epochs
                       // hparams.optimization_batch_size)
  epoch_length = hparams.epoch_length
  if hparams.effective_num_agents is not None:
    number_of_batches *= batch_size
    number_of_batches //= hparams.effective_num_agents
    epoch_length //= hparams.effective_num_agents

  assert number_of_batches > 0, "Set the paremeters so that number_of_batches>0"
  lr = learning_rate.learning_rate_schedule(hparams)

  shuffled_indices = [tf.random.shuffle(tf.range(epoch_length - 1))
                      for _ in range(hparams.optimization_epochs)]
  shuffled_indices = tf.concat(shuffled_indices, axis=0)
  shuffled_indices = shuffled_indices[:number_of_batches *
                                      hparams.optimization_batch_size]
  indices_of_batches = tf.reshape(shuffled_indices,
                                  shape=(-1, hparams.optimization_batch_size))
  input_tensors = [observation, action, discounted_reward,
                   discounted_reward_prob, advantage_normalized, old_pdf]

  ppo_step_rets = tf.scan(
      lambda a, i: add_lists_elementwise(  # pylint: disable=g-long-lambda
          a, define_ppo_step(
              [tf.gather(t, indices_of_batches[i, :]) for t in input_tensors],
              hparams, action_space, lr,
              epoch=epoch,
              distributional_size=distributional_size,
              distributional_subscale=distributional_subscale
          )),
      tf.range(number_of_batches),
      [0., 0., 0.],
      parallel_iterations=1)

  ppo_summaries = [tf.reduce_mean(ret) / number_of_batches
                   for ret in ppo_step_rets]
  ppo_summaries.append(lr)
  summaries_names = [
      "policy_loss", "value_loss", "entropy_loss", "learning_rate"
  ]

  summaries = [tf.summary.scalar(summary_name, summary)
               for summary_name, summary in zip(summaries_names, ppo_summaries)]
  losses_summary = tf.summary.merge(summaries)

  for summary_name, summary in zip(summaries_names, ppo_summaries):
    losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ")

  return losses_summary
Exemplo n.º 17
0
            def mix_data(example):
                """Function to mix the different datasets according to a schedule."""
                del example
                # This block computes the probability of mixing the primary task with
                # the secondary tasks. 0 = only the primary task, 1 = only the secondary
                # tasks.
                if hparams.multiproblem_mixing_schedule == MixingSchedule.EXPONENTIAL:
                    prob = get_exp_sched_prob()
                    prob = tf.cond(
                        tf.equal(
                            tf.floormod(problem_step,
                                        tf.cast(5e6, dtype=tf.int64)), 0),
                        lambda: tf.Print(prob, [prob], message="Probability"),
                        lambda: prob)
                elif hparams.multiproblem_mixing_schedule == MixingSchedule.CONSTANT:
                    prob = get_const_sched_prob()
                elif hparams.multiproblem_mixing_schedule == MixingSchedule.PRETRAIN:
                    prob = get_pretrain_sched_prob()
                else:
                    raise ValueError("Unknown schedule %s" %
                                     str(hparams.multiproblem_mixing_schedule))
                tf.logging.info("Using the %s schedule to "
                                "train the MultiProblem." %
                                str(hparams.multiproblem_mixing_schedule))
                tf.logging.info("Schedule mixing threshold "
                                "%.2f" %
                                hparams.multiproblem_schedule_threshold)

                # If per-task thresholds are specified, use them.
                thresholds = None
                if hparams.multiproblem_per_task_threshold:
                    thresholds = hparams.multiproblem_per_task_threshold.split(
                        ",")
                    thresholds = [float(t)
                                  for t in thresholds]  # Convert to floats.
                    thresholds_sum = sum(thresholds)
                    tf.logging.info("Per-task thresholds: %s." %
                                    str(thresholds))
                    thresholds = [t / thresholds_sum
                                  for t in thresholds]  # Normalize.
                    thresholds = [
                        sum(thresholds[:i + 1]) for i in range(len(thresholds))
                    ]
                    tf.logging.info("Per-task threshold sums: %s." %
                                    str(thresholds))
                    if len(thresholds) != len(self.task_list):
                        tf.logging.warn(
                            "Specified %d thresholds but encountered %d tasks."
                            % (len(thresholds), len(self.task_list)))
                        thresholds = None

                def sample_task(curr_task, num_tasks_left, randnum):
                    """A recursive function to sample a task.

          This function treats the probability as the threshold for the primary
          task and divides the remaining probability mass across the other
          tasks.

          Args:
            curr_task: The index of the task being considered for sampling.
            num_tasks_left: Number of tasks remaining to possibly sample from.
            randnum: The random number used to select the dataset.

          Returns:
            A Tensor representing an example from the task that was sampled
            from.
          """
                    if num_tasks_left == 0:
                        return get_next_from_dataset(
                            dataset_iterators[curr_task])

                    if thresholds is not None:  # Use per-task thresholds if specified.
                        prob_sum = thresholds[curr_task]
                        return tf.cond(
                            randnum < prob_sum, lambda: get_next_from_dataset(
                                dataset_iterators[curr_task]),
                            lambda: sample_task(curr_task + 1, num_tasks_left -
                                                1, randnum))

                    # When curr_task is 0, the primary task, the new prob is the same as
                    # the original probability. `tf.greater` indicates that the primary
                    # task receives (1-prob) of the probability mass.
                    # Otherwise, `prob` is divided equally amongst all the secondary
                    # tasks.
                    new_prob = prob - (curr_task * prob /
                                       (len(self.task_list) - 1))
                    return tf.cond(
                        tf.greater(randnum, new_prob),
                        lambda: get_next_from_dataset(dataset_iterators[
                            curr_task]), lambda: sample_task(
                                curr_task + 1, num_tasks_left - 1, randnum))

                return tf.data.Dataset.from_tensors(
                    sample_task(0,
                                len(self.task_list) - 1,
                                tf.random_uniform([])))
Exemplo n.º 18
0
def parse_example_proto(example_serialized, has_3d=False):
    """Parses an Example proto.
    It's contents are:

        'image/height'       : _int64_feature(height),
        'image/width'        : _int64_feature(width),
        'image/x'            : _float_feature(label[0,:].astype(np.float)),
        'image/y'            : _float_feature(label[1,:].astype(np.float)),
        'image/visibility'   : _int64_feature(label[2,:].astype(np.int)),
        'image/format'       : _bytes_feature
        'image/filename'     : _bytes_feature
        'image/encoded'      : _bytes_feature
        'image/face_points'  : _float_feature,
         this is the 2D keypoints of the face points in coco 5*3 (x,y,vis) = 15

    if has_3d is on, it also has:
        'mosh/pose'          : float_feature(pose.astype(np.float)),
        'mosh/shape'         : float_feature(shape.astype(np.float)),
        # gt3d is 14x3
        'mosh/gt3d'          : float_feature(shape.astype(np.float)),
    """
    feature_map = {
        'image/encoded':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/height':
        tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
        'image/width':
        tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
        'image/filename':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/center':
        tf.FixedLenFeature((2, 1), dtype=tf.int64),
        'image/visibility':
        tf.FixedLenFeature((1, 14), dtype=tf.int64),
        'image/x':
        tf.FixedLenFeature((1, 14), dtype=tf.float32),
        'image/y':
        tf.FixedLenFeature((1, 14), dtype=tf.float32),
        'image/face_pts':
        tf.FixedLenFeature((1, 15),
                           dtype=tf.float32,
                           default_value=[
                               0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                               0., 0., 0.
                           ]),
    }
    if has_3d:
        feature_map.update({
            'mosh/pose':
            tf.FixedLenFeature((72, ), dtype=tf.float32),
            'mosh/shape':
            tf.FixedLenFeature((10, ), dtype=tf.float32),
            'mosh/gt3d':
            tf.FixedLenFeature((14 * 3, ), dtype=tf.float32),
            # has_3d is for pose and shape: 0 for mpi_inf_3dhp, 1 for h3.6m.
            'meta/has_3d':
            tf.FixedLenFeature((1), dtype=tf.int64, default_value=[0]),
        })

    features = tf.parse_single_example(example_serialized, feature_map)

    height = tf.cast(features['image/height'], dtype=tf.int32)
    width = tf.cast(features['image/width'], dtype=tf.int32)
    center = tf.cast(features['image/center'], dtype=tf.int32)
    fname = tf.cast(features['image/filename'], dtype=tf.string)
    fname = tf.Print(fname, [fname], message="image name: ")

    face_pts = tf.reshape(
        tf.cast(features['image/face_pts'], dtype=tf.float32), [3, 5])

    vis = tf.cast(features['image/visibility'], dtype=tf.float32)
    x = tf.cast(features['image/x'], dtype=tf.float32)
    y = tf.cast(features['image/y'], dtype=tf.float32)

    label = tf.concat([x, y, vis], 0)
    label = tf.concat([label, face_pts], 1)

    image = decode_jpeg(features['image/encoded'])
    image_size = tf.concat([height, width], 0)

    if has_3d:
        pose = tf.cast(features['mosh/pose'], dtype=tf.float32)
        shape = tf.cast(features['mosh/shape'], dtype=tf.float32)
        gt3d = tf.reshape(tf.cast(features['mosh/gt3d'], dtype=tf.float32),
                          [14, 3])
        has_smpl3d = tf.cast(features['meta/has_3d'], dtype=tf.bool)
        return image, image_size, label, center, fname, pose, shape, gt3d, has_smpl3d
    else:
        return image, image_size, label, center, fname
Exemplo n.º 19
0
def knn_affinity(input_x,
                 n_nbrs,
                 scale=None,
                 scale_nbr=None,
                 local_scale=None,
                 verbose=False):
  """Calculates Gaussian affinity matrix.

  Calculates the symmetrized Gaussian affinity matrix with k1 nonzero
  affinities for each point, scaled by
  1) a provided scale,
  2) the median distance of the k2-th neighbor of each point in X, or
  3) a covariance matrix S where S_ii is the distance of the k2-th
  neighbor of each point i, and S_ij = 0 for all i != j
  Here, k1 = n_nbrs, k2 = scale_nbr

  Args:
    input_x: input dataset of size n
    n_nbrs: k1
    scale: provided scale
    scale_nbr: k2, used if scale not provided
    local_scale: if True, then we use the aforementioned option 3), else we
      use option 2)
    verbose: extra printouts

  Returns:
    n x n affinity matrix
  """
  if isinstance(n_nbrs, np.float):
    n_nbrs = int(n_nbrs)
  elif isinstance(n_nbrs,
                  tf.Variable) and n_nbrs.dtype.as_numpy_dtype != np.int32:
    n_nbrs = tf.cast(n_nbrs, np.int32)
  # get squared distance
  dist_x = squared_distance(input_x)
  # calculate the top k losest neighbors
  nn = tf.nn.top_k(-dist_x, n_nbrs, sorted=True)

  vals = nn[0]
  # apply scale
  if scale is None:
    # if scale not provided, use local scale
    if scale_nbr is None:
      scale_nbr = 0
    else:
      assert scale_nbr > 0 and scale_nbr <= n_nbrs
    if local_scale:
      scale = -nn[0][:, scale_nbr - 1]
      scale = tf.reshape(scale, [-1, 1])
      scale = tf.tile(scale, [1, n_nbrs])
      scale = tf.reshape(scale, [-1, 1])
      vals = tf.reshape(vals, [-1, 1])
      if verbose:
        vals = tf.Print(vals, [tf.shape(vals), tf.shape(scale)],
                        'vals, scale shape')
      vals = vals / (2 * scale)
      vals = tf.reshape(vals, [-1, n_nbrs])
    else:

      def get_median(scales, m):
        with tf.device('/cpu:0'):
          scales = tf.nn.top_k(scales, m)[0]
        scale = scales[m - 1]
        return scale, scales

      scales = -vals[:, scale_nbr - 1]
      const = tf.shape(input_x)[0] // 2
      scale, scales = get_median(scales, const)
      vals = vals / (2 * scale)
  else:
    # otherwise, use provided value for global scale
    vals = vals / (2 * scale**2)

  # get the affinity
  aff_vals = tf.exp(vals)
  # flatten this into a single vector of values to shove in a sparse matrix
  aff_vals = tf.reshape(aff_vals, [-1])
  # get the matrix of indices corresponding to each rank
  # with 1 in the first column and k in the kth column
  nn_ind = nn[1]
  # get the j index for the sparse matrix
  j_index = tf.reshape(nn_ind, [-1, 1])
  # the i index is just sequential to the j matrix
  i_index = tf.range(tf.shape(nn_ind)[0])
  i_index = tf.reshape(i_index, [-1, 1])
  i_index = tf.tile(i_index, [1, tf.shape(nn_ind)[1]])
  i_index = tf.reshape(i_index, [-1, 1])
  # concatenate the indices to build the sparse matrix
  indices = tf.concat((i_index, j_index), axis=1)
  # assemble the sparse weight matrix
  weight_mat = tf.SparseTensor(
      indices=tf.cast(indices, dtype='int64'),
      values=aff_vals,
      dense_shape=tf.cast(tf.shape(dist_x), dtype='int64'))
  # fix the ordering of the indices
  weight_mat = tf.sparse_reorder(weight_mat)
  # convert to dense tensor
  weight_mat = tf.sparse_tensor_to_dense(weight_mat)
  # symmetrize
  weight_mat = (weight_mat + tf.transpose(weight_mat)) / 2.0

  return weight_mat
Exemplo n.º 20
0
  def train(self, sess):
    """Main training function/loop.

    Args:
      sess: a tf session object
    """
    # For debugging/pushing limits of model
    gpu_mb = tf.constant(1024*1024, dtype=tf.int64)
    gpus = tf.config.experimental.list_logical_devices("GPU")
    memory_footprints = []
    for gpu in gpus:
      with tf.device(gpu.name):
        memory_footprint = tf.Print(
            tf.constant(0), [
                contrib_memory_stats.BytesLimit() / gpu_mb,
                contrib_memory_stats.MaxBytesInUse() / gpu_mb
            ],
            message=gpu.name)
      memory_footprints.append(memory_footprint)

    epochs = FLAGS.num_epochs
    prints = FLAGS.log_frequency

    training_start_time = time.time()
    epochs_start_time = time.time()

    num_batches = max(int(len(self.train_examples)/self.batch_size), 1)
    tf.logging.info("Num batches per epoch: {}".format(num_batches))

    # Additional logging
    losses = np.zeros((epochs * num_batches))
    accuracies = np.zeros((epochs * num_batches))

    for epoch in range(epochs):
      random.shuffle(self.train_examples)
      for batch in range(num_batches):
        batch_no = epoch * num_batches + batch
        should_sample = (batch_no % prints == 0)

        train_ops_to_run = {
            "train_step": self.train_step,
            "loss": self.model.loss,
            "accuracy": self.model.accuracy,
            "accuracy_per_example": self.model.accuracy_per_ex,
            "output_relations": self.model.log_decoded_relations,
        }
        if should_sample:
          train_ops_to_run["props"] = self.model.property_loss
          train_ops_to_run["regularization"] = self.model.regularization
          for i, memory_footprint in enumerate(memory_footprints):
            train_ops_to_run["memory_footprint_{}".format(i)] = memory_footprint

        batch_examples = self.train_examples[batch:
                                             batch + self.batch_size]
        feed_dict = self._compute_feed_dict(batch_examples)
        train_output = sess.run(train_ops_to_run, feed_dict)
        losses[batch_no] = train_output["loss"]
        accuracies[batch_no] = train_output["accuracy"]

        if should_sample:
          # Timing info
          epochs_end_time = time.time()
          epochs_time_str = str(datetime.timedelta(
              seconds=epochs_end_time - epochs_start_time))
          epochs_start_time = epochs_end_time
          precision, recall = self._evaluate_sample(sess,
                                                    train_output,
                                                    feed_dict,
                                                    batch_examples,
                                                    full_log=True)
          if precision and recall:
            pr_string = "\tPrecision: {:.3f}\tRecall {:.3f}".format(
                np.mean(precision), np.mean(recall))
          else:
            pr_string = ""
          tf.logging.info(
              ("[{}] Epoch: {}.{}\tLoss: {:.3f}|{:.3f}|{:.3f}\t" +
               "Accuracy: {:.3f}{}\n").format(
                   epochs_time_str,
                   epoch, batch,
                   train_output["loss"],
                   train_output["props"],
                   train_output["regularization"],
                   train_output["accuracy"],
                   pr_string))

          # Do a dev run, it doesn't take that long
          self.evaluate(sess, full=False)

    training_end_time = time.time()
    tf.logging.info("Training took: %s" % str(datetime.timedelta(
        seconds=training_end_time - training_start_time)))
    if self.ckpt_dir is not None:
      save_path = self.saver.save(sess,
                                  os.path.join(self.ckpt_dir, "model.ckpt"))
      tf.logging.info("Saved model at {}".format(save_path))
Exemplo n.º 21
0
        def _body(i, posterior, center, wx, activation_biases, sigma_biases,
                  input_activation, tile_filter):
            """Body of EM while loop."""
            tf.logging.info('  Wx: %s', wx)

            beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))

            posterior = tf.Print(posterior, [
                layer_name, i, h, ih,
                tf.reduce_min(posterior),
                tf.reduce_max(posterior)
            ],
                                 message='posterior')
            # route: [outdim, height?, width?, batch, indim]
            with tf.name_scope('vote_conf'):
                vote_conf = posterior * input_activation
                vote_conf = tf.maximum(vote_conf, 0.0)

            # masses: [batch, 1, outdim, 1, height, width, 1, 1]
            with tf.name_scope('masses'):
                masses = tf.reduce_sum(vote_conf,
                                       axis=[1, -1, -2],
                                       keepdims=True,
                                       name='masses_calculation') + 0.0000001
            with tf.name_scope('preactivate_unrolled'):
                preactivate_unrolled = vote_conf * wx

            # center: [batch, 1, outdim, outatom, height, width]
            with tf.name_scope('center'):
                center = .9 * tf.reduce_sum(
                    preactivate_unrolled, axis=[1, -1, -2],
                    keepdims=True) / masses + .1 * center

            # Rematerialization to save GPU memory. (+22ms/-1.6GB)
            # @tf.contrib.layers.recompute_grad
            def compute_noise_and_variance(wx, center, vote_conf, masses):
                noise = tf.squared_difference(wx, center)
                variance = min_var + tf.reduce_sum(
                    vote_conf * noise,
                    axis=[1, -1, -2],
                    keepdims=True,
                    name='variance_calculation') / masses
                return noise, variance

            with tf.name_scope('compute_noise_and_variance'):
                noise, variance = compute_noise_and_variance(
                    wx, center, vote_conf, masses)

            with tf.name_scope('win'):
                log_variance = tf.log(variance)
                p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True)
                log_2pi = tf.log(2 * math.pi)
                sigma_b = tf.log(sigma_biases * sigma_biases + min_var)
                win = masses * (p_i - num_out_atoms *
                                (sigma_b + log_2pi + 1.0))
            with tf.name_scope('logit'):
                logit = beta * (win - activation_biases * 50 * num_out_atoms)
            with tf.name_scope('activation_update'):
                activation_update = tf.minimum(
                    0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
            with tf.name_scope('sigma_update'):
                log_det_sigma = -1 * p_i
                sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
            with tf.name_scope('exp_update'):
                exp_update = tf.reduce_sum(noise / (2 * variance),
                                           axis=3,
                                           keep_dims=True)
            prior_update = tf.subtract(activation_update - sigma_update,
                                       exp_update,
                                       name='prior_update_sub')
            max_prior_update = tf.reduce_max(prior_update,
                                             axis=[2, 3, 4, 5, 6, 7],
                                             keepdims=True,
                                             name='max_prior_opdate')
            prior_normal = tf.add(prior_update, -1 * max_prior_update)
            prior_exp = tf.exp(prior_normal)
            prior_exp_out = tf.reduce_sum(prior_exp,
                                          axis=2,
                                          keepdims=True,
                                          name='prior_exp_out')
            prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k],
                                           name='prior_exp_reshape')

            sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape,
                                               tile_filter,
                                               output_shape=[b * c, ih, ih, 1],
                                               strides=[1, s, s, 1],
                                               padding='VALID')
            sum_prior = tf.maximum(1e-6, sum_prior)

            sum_prior_patch = utils.kernel_tile(sum_prior,
                                                k,
                                                s,
                                                1,
                                                name='sum_prior_patch')

            with utils.maybe_jit_scope(), tf.name_scope('posterior'):
                sum_prior_reshape = tf.reshape(
                    sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k])
                posterior = prior_exp / sum_prior_reshape

            return (i + 1, posterior, logit, center, masses)
Exemplo n.º 22
0
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(input=y_pred)[:3],
                       tf.constant([3, -1])],
                      axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(input=y_true)[1]
        grid_w = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input=input_image)[1]
        net_w = tf.shape(input=input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(input_tensor=object_mask)
        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
        detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5,
                              dtype=tf.float32)
        class_mask = tf.expand_dims(
            tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1),
                             true_box_class),
                    dtype=tf.float32), 4)
        recall50 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(input_tensor=pred_box_conf *
                                object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(input_tensor=object_mask *
                                class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            pred=tf.less(batch_seen, self.warmup_batches + 1),
            true_fn=lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ],
            false_fn=lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta),
                                axis=list(range(1, 5)))
        loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta),
                                axis=list(range(1, 5)))
        loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta),
                                  axis=list(range(1, 5)))
        loss_class = tf.reduce_sum(input_tensor=class_delta,
                                   axis=list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        loss = tf.Print(loss, [grid_h, avg_obj],
                        message='avg_obj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_noobj],
                        message='avg_noobj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_iou],
                        message='avg_iou \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_cat],
                        message='avg_cat \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall50],
                        message='recall50 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall75],
                        message='recall75 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, count],
                        message='count \t',
                        summarize=1000)
        loss = tf.Print(loss, [
            grid_h,
            tf.reduce_sum(input_tensor=loss_xy),
            tf.reduce_sum(input_tensor=loss_wh),
            tf.reduce_sum(input_tensor=loss_conf),
            tf.reduce_sum(input_tensor=loss_class)
        ],
                        message='loss xy, wh, conf, class: \t',
                        summarize=1000)

        return loss * self.grid_scale
Exemplo n.º 23
0
def main(unused_argv):
    FLAGS.comb_dropout_keep_prob = 1.0
    FLAGS.image_keep_prob = 1.0
    FLAGS.elements_keep_prob = 1.0

    # Get dataset-dependent information.

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.split)

    with tf.Graph().as_default():
        samples = model_input.get_input_fn(FLAGS)()

        # Get model segmentation predictions.
        num_classes = model_input.dataset_descriptors[
            FLAGS.dataset].num_classes
        output_to_num_classes = model.get_output_to_num_classes(FLAGS)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions, probs = model.predict_labels(
                samples['image'],
                samples,
                FLAGS,
                outputs_to_num_classes=output_to_num_classes,
                image_pyramid=FLAGS.image_pyramid,
                merge_method=FLAGS.merge_method,
                atrous_rates=FLAGS.atrous_rates,
                add_image_level_feature=FLAGS.add_image_level_feature,
                aspp_with_batch_norm=FLAGS.aspp_with_batch_norm,
                aspp_with_separable_conv=FLAGS.aspp_with_separable_conv,
                multi_grid=FLAGS.multi_grid,
                depth_multiplier=FLAGS.depth_multiplier,
                output_stride=FLAGS.output_stride,
                decoder_output_stride=FLAGS.decoder_output_stride,
                decoder_use_separable_conv=FLAGS.decoder_use_separable_conv,
                crop_size=[FLAGS.image_size, FLAGS.image_size],
                logits_kernel_size=FLAGS.logits_kernel_size,
                model_variant=FLAGS.model_variant)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions, probs = model.predict_labels_multi_scale(
                samples['image'],
                samples,
                FLAGS,
                outputs_to_num_classes=output_to_num_classes,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images,
                merge_method=FLAGS.merge_method,
                atrous_rates=FLAGS.atrous_rates,
                add_image_level_feature=FLAGS.add_image_level_feature,
                aspp_with_batch_norm=FLAGS.aspp_with_batch_norm,
                aspp_with_separable_conv=FLAGS.aspp_with_separable_conv,
                multi_grid=FLAGS.multi_grid,
                depth_multiplier=FLAGS.depth_multiplier,
                output_stride=FLAGS.output_stride,
                decoder_output_stride=FLAGS.decoder_output_stride,
                decoder_use_separable_conv=FLAGS.decoder_use_separable_conv,
                crop_size=[FLAGS.image_size, FLAGS.image_size],
                logits_kernel_size=FLAGS.logits_kernel_size,
                model_variant=FLAGS.model_variant)

        metric_map = {}
        for output in output_to_num_classes:
            output_predictions = predictions[output]
            output_probs = probs[output]
            if output == 'segment':
                output_predictions = tf.expand_dims(output_predictions, 3)
                if num_classes == 2:
                    labels = samples['label']

                    iou, weights = model.foreground_iou(
                        labels, output_predictions, FLAGS)
                    soft_iou, _ = model.foreground_iou(
                        labels, output_probs[:, :, :, 1:2], FLAGS)

                    metric_map['mIOU'] = tf.metrics.mean(iou)
                    metric_map['soft_mIOU'] = tf.metrics.mean(soft_iou)

                    high_prob_overlaps = calc_high_prob_overlaps(
                        labels, output_probs, weights)
                    metric_map['highestOverlaps'] = tf.metrics.mean(
                        high_prob_overlaps)

                    output_probs *= weights

                else:
                    output_predictions = tf.reshape(output_predictions,
                                                    shape=[-1])
                    labels = tf.reshape(samples['label'], shape=[-1])
                    weights = tf.to_float(
                        tf.not_equal(
                            labels, model_input.dataset_descriptors[
                                FLAGS.dataset].ignore_label))

                    # Set ignore_label regions to label 0, because metrics.mean_iou
                    # requires range of labels=[0, dataset.num_classes).
                    # Note the ignore_label regions are not evaluated since
                    # the corresponding regions contain weights=0.
                    labels = tf.where(
                        tf.equal(
                            labels, model_input.dataset_descriptors[
                                FLAGS.dataset].ignore_label),
                        tf.zeros_like(labels), labels)

                    predictions_tag = 'mIOU'
                    for eval_scale in FLAGS.eval_scales:
                        predictions_tag += '_' + str(eval_scale)
                    if FLAGS.add_flipped_images:
                        predictions_tag += '_flipped'

                    # Define the evaluation metric.
                    metric_map[
                        predictions_tag] = contrib_slim.metrics.mean_iou(
                            output_predictions,
                            labels,
                            num_classes,
                            weights=weights)

                def label_summary(labels, weights, name):
                    tf.summary.image(
                        name,
                        tf.reshape(
                            tf.cast(
                                tf.to_float(labels * 255) /
                                tf.to_float(num_classes), tf.uint8) *
                            tf.cast(weights, tf.uint8),
                            [-1, FLAGS.image_size, FLAGS.image_size, 1]), 8)

                label_summary(labels, weights, 'label')
                label_summary(output_predictions, weights,
                              'output_predictions')
                tf.summary.image('logits',
                                 tf.expand_dims(output_probs[:, :, :, 1], 3))

            elif output == 'regression':
                labels = samples['label']
                ignore_mask = model.get_ignore_mask(labels, FLAGS)

                accurate = calc_accuracy_in_box(labels, output_probs,
                                                ignore_mask)
                metric_map['inBoxAccuracy'] = tf.metrics.mean(accurate)

        tf.summary.image('image', samples['image'], 8)

        metrics_to_values, metrics_to_updates = contrib_slim.metrics.aggregate_metric_map(
            metric_map)

        for metric_name, metric_value in metrics_to_values.iteritems():
            metric_value = tf.Print(metric_value, [metric_value], metric_name)
            tf.summary.scalar(metric_name, metric_value)

        num_batches = int(
            math.ceil(FLAGS.num_samples / float(FLAGS.batch_size)))

        tf.logging.info('Eval num images %d', FLAGS.num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.batch_size, num_batches)

        contrib_slim.evaluation.evaluation_loop(
            master='',
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            eval_op=metrics_to_updates.values(),
            summary_op=tf.summary.merge_all(),
            max_number_of_evaluations=None,
            eval_interval_secs=FLAGS.eval_interval_secs)
Exemplo n.º 24
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    dataset = data_generator.Dataset(
        dataset_name=FLAGS.dataset,
        split_name=FLAGS.eval_split,
        dataset_dir=FLAGS.dataset_dir,
        batch_size=FLAGS.eval_batch_size,
        crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        model_variant=FLAGS.model_variant,
        num_readers=2,
        is_training=False,
        should_shuffle=False,
        should_repeat=False,
        with_cls=True,
        cls_only=False,
        output_valid=True)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()

        model_options = common.ModelOptions(
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes
            },
            crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples[common.IMAGE].set_shape([
            FLAGS.eval_batch_size,
            int(FLAGS.eval_crop_size[0]),
            int(FLAGS.eval_crop_size[1]), 3
        ])
        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            raise NotImplementedError('Multi-scale is not supported yet!')

        metric_map = {}
        ## Extract cls logits
        if FLAGS.weakly:
            _, end_points = feature_extractor.extract_features(
                samples[common.IMAGE],
                output_stride=model_options.output_stride,
                multi_grid=model_options.multi_grid,
                model_variant=model_options.model_variant,
                depth_multiplier=model_options.depth_multiplier,
                divisible_by=model_options.divisible_by,
                reuse=tf.AUTO_REUSE,
                is_training=False,
                preprocessed_images_dtype=model_options.
                preprocessed_images_dtype,
                global_pool=True,
                num_classes=dataset.num_of_classes - 1)
            # ResNet beta version has an additional suffix in FLAGS.model_variant, but
            # it shares the same variable names with original version. Add a special
            # handling here for beta version ResNet.
            logits = end_points['{}/logits'.format(
                FLAGS.model_variant).replace('_beta', '')]
            logits = tf.reshape(logits, [-1, dataset.num_of_classes - 1])
            cls_pred = tf.sigmoid(logits)

            # Multi-label classification evaluation
            cls_label = samples['cls_label']
            cls_pred = tf.cast(tf.greater_equal(cls_pred, 0.5), tf.int32)

            ## For classification
            metric_map['eval/cls_overall'] = tf.metrics.accuracy(
                labels=cls_label, predictions=cls_pred)
            metric_map['eval/cls_precision'] = tf.metrics.precision(
                labels=cls_label, predictions=cls_pred)
            metric_map['eval/cls_recall'] = tf.metrics.recall(
                labels=cls_label, predictions=cls_pred)

        ## For segmentation branch eval
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        # Define the evaluation metric.
        num_classes = dataset.num_of_classes

        ## For segmentation
        metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou(
            labels=labels,
            predictions=predictions,
            num_classes=num_classes,
            weights=weights)
        # IoU for each class.
        one_hot_predictions = tf.one_hot(predictions, num_classes)
        one_hot_predictions = tf.reshape(one_hot_predictions,
                                         [-1, num_classes])
        one_hot_labels = tf.one_hot(labels, num_classes)
        one_hot_labels = tf.reshape(one_hot_labels, [-1, num_classes])
        for c in range(num_classes):
            predictions_tag_c = '%s_class_%d' % (predictions_tag, c)
            tp, tp_op = tf.metrics.true_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fp, fp_op = tf.metrics.false_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fn, fn_op = tf.metrics.false_negatives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op)
            iou = tf.where(tf.greater(tp + fn, 0.0), tp / (tp + fn + fp),
                           tf.constant(np.NaN))
            metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op)

        (metrics_to_values,
         metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map)

        summary_ops = []
        for metric_name, metric_value in six.iteritems(metrics_to_values):
            op = tf.summary.scalar(metric_name, metric_value)
            op = tf.Print(op, [metric_value], metric_name)
            summary_ops.append(op)

        summary_op = tf.summary.merge(summary_ops)
        summary_hook = contrib_training.SummaryAtEndHook(
            log_dir=FLAGS.eval_logdir, summary_op=summary_op)
        hooks = [summary_hook]

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations

        if FLAGS.quantize_delay_step >= 0:
            contrib_quantize.create_eval_graph()

        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
        contrib_training.evaluate_repeatedly(
            checkpoint_dir=FLAGS.checkpoint_dir,
            master=FLAGS.master,
            eval_ops=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            hooks=hooks,
            eval_interval_secs=FLAGS.eval_interval_secs)
Exemplo n.º 25
0
    def my_model_fn(features, labels, mode, params=None, config=None):
        """Estimator model function.
        Args:
          features: dictionary where keys are strings like "inputs" and "targets"
            and the values are the actual values of "inputs". See TPUEstimator's
            docs for more information
          labels: ignored argument
          mode: a tf.estimator.ModeKeys
          params: dictionary containing the key "context"
          config: ignored argument
        Returns:
          a TPUEstimatorSpec
        """
        del labels, config
        global_step = tf.train.get_global_step()
        if use_tpu and "context" in params:
            ctx = params["context"]
            num_hosts = ctx.num_hosts
            host_placement_fn = ctx.tpu_host_placement_function
            device_list = [
                host_placement_fn(host_id=t) for t in range(num_hosts)
            ]
            # TODO(ylc): Better estimation of replica cache size?
            replica_cache_size = 300 * 1000000  # 300M per replica
            # Worker 0 caches all the TPU binaries.
            worker0_mem = replica_cache_size * ctx.num_replicas
            devices_memeory_usage = [worker0_mem] + [0] * (num_hosts - 1)
            var_placer = mtf.utils.BalancedVariablePlacer(
                device_list, devices_memeory_usage)
            # deprecated mesh_devices = [""] * mesh_shape.size
            physical_shape = list(
                params["context"].device_assignment.topology.mesh_shape)
            logical_to_physical = mtf.simd_mesh_impl.auto_logical_to_physical_tpu(
                mesh_shape.to_integer_list, physical_shape)
            mesh_impl = mtf.simd_mesh_impl.SimdMeshImpl(
                mesh_shape,
                layout_rules,
                mesh_devices,
                ctx.device_assignment,
                logical_to_physical=logical_to_physical)
        else:
            var_placer = None
            # deprecated mesh_devices = [""] * mesh_shape.size
            mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
                mesh_shape, layout_rules, mesh_devices)

        graph = mtf.Graph()
        mesh = mtf.Mesh(graph, "my_mesh", var_placer)

        mtf_features = {}
        for key, x in features.items():
            outer_batch_dim = mtf.Dimension("outer_batch", outer_batch_size)
            batch_dim = mtf.Dimension("batch", batch_size // outer_batch_size)
            # Some auxiliary features may have been generated in packing.
            # The names of these new features are of the form
            #   "<original_feature_name>_<suffix>", e.g. "inputs_segmentation".
            #   We look up the lengths based on the original feature name, without
            #   the "_<suffix>".
            feature_length = sequence_length[key.split("_")[0]]
            length_dim = mtf.Dimension("length", feature_length)
            ensemble_dims = ([mtf.Dimension("ensemble", ensemble_inputs)]
                             if ensemble_inputs else [])
            feature_shape = mtf.Shape(ensemble_dims +
                                      [outer_batch_dim, batch_dim, length_dim])
            x = tf.cast(features[key], tf.int32)
            x = tf.reshape(x, feature_shape.to_integer_list)
            if not use_tpu:
                tf.logging.info("feature %s : %s" % (key, x))
                x = tf.Print(x, [x],
                             "import feature %s" % key,
                             summarize=1000,
                             first_n=10)
            mtf_features[key] = mtf.import_fully_replicated(mesh,
                                                            x,
                                                            feature_shape,
                                                            name=key)
            if key == "targets" or key == "codeprefixedtargets" or key == "controlcode":
                anon_targets = mtf.anonymize(mtf_features[key])

        if mode == tf.estimator.ModeKeys.PREDICT:

            def _feature_shape(key):
                feature_length = sequence_length[key.split("_")[0]]
                return mtf.Shape([
                    mtf.Dimension("batch", batch_size),
                    mtf.Dimension("length", feature_length)
                ])

            mtf_features = {
                k: mtf.reshape(v, _feature_shape(k))
                for k, v in six.iteritems(mtf_features)
            }
            inputs = mtf_features["inputs"]

            if attribute_embedding:
                attributes = mtf_features["attribute"]
            else:
                attributes = None

            if has_partial_sequences:
                controlcodes = mtf_features["controlcode"]
            else:
                controlcodes = None

            if predict_fn:
                mtf_samples = predict_fn(model=transformer_model,
                                         features=mtf_features,
                                         variable_dtype=get_variable_dtype())
            elif isinstance(transformer_model, transformer.Unitransformer):
                # pad so that there is enough room for the targets
                inputs = mtf.pad(inputs, [0, sequence_length["targets"]],
                                 length_dim.name)
                mtf_samples = transformer_model.sample_autoregressive(
                    inputs,
                    variable_dtype=get_variable_dtype(),
                    remove_partial_sequences=True)
            elif isinstance(transformer_model, Bitransformer_ll):
                mtf_samples = transformer_model.decode(
                    inputs,
                    attributes=attributes,
                    controlcodes=controlcodes,
                    has_partial_sequences=has_partial_sequences,
                    remove_partial_sequences=remove_partial_sequences,
                    variable_dtype=get_variable_dtype())  #
            elif isinstance(
                    transformer_model,
                (transformer.Bitransformer, transformer.StudentTeacher)):
                mtf_samples = transformer_model.decode(
                    inputs, variable_dtype=get_variable_dtype())
            else:
                raise ValueError("unrecognized class")
            mtf_samples = mtf.anonymize(mtf_samples)
            inputs = mtf.anonymize(inputs)
            lowering = mtf.Lowering(graph, {mesh: mesh_impl},
                                    autostack=autostack)
            inputs = lowering.export_to_tf_tensor(inputs)
            outputs = lowering.export_to_tf_tensor(mtf_samples)
            predictions = {"inputs": inputs, "outputs": outputs}

            # When exporting a model, we need to communicate to TF-Serving that
            # master variables need to be copied to their slave slice variables.
            # Estimator uses a Scaffold's "local_init_op" for this purpose, so we
            # augment the default "local_init_op" here.
            #
            # The "ready_op" is also constructed here to ensure the variables
            # initialized by "local_init_op" are the same ones checked by "ready_op".
            #
            # WARNING: Any variables created outside of this model_fn()
            # (e.g. tpu_estimator/iterations_per_loop) will NOT be initialized nor
            # checked by these ops.
            def scaffold_fn():
                return tf.train.Scaffold(
                    local_init_op=tf.group(
                        tf.train.Scaffold.default_local_init_op(),
                        lowering.copy_masters_to_slices(),
                        name="mtf_local_init_op"),
                    ready_op=tf.concat([
                        tf.report_uninitialized_variables(),
                        resources.report_uninitialized_resources()
                    ],
                                       axis=0,
                                       name="mtf_ready_op"))

            return tpu_estimator.TPUEstimatorSpec(
                mode=tf.estimator.ModeKeys.PREDICT,
                predictions=predictions,
                scaffold_fn=scaffold_fn,
                prediction_hooks=[mtf.MtfRestoreHook(lowering)])

        assert (mode == tf.estimator.ModeKeys.TRAIN
                or mode == tf.estimator.ModeKeys.EVAL)

        def logits_and_loss(mtf_features):
            """Compute logits and loss.
            Args:
              mtf_features: a dictionary
            Returns:
              logits: a mtf.Tensor
              loss: a mtf.Tensor
            """
            if model_type == "lm":  # TOTRY Adapt that to our case
                if "inputs" in mtf_features:
                    mtf_features = _dynamic_text2self(mtf_features)
                _, _, length_dim = mtf_features["targets"].shape
                inputs = mtf.shift(mtf_features["targets"],
                                   offset=1,
                                   dim=length_dim,
                                   wrap=False)
            else:
                inputs = mtf_features["inputs"]

            if attribute_embedding:
                attributes = mtf_features["attribute"]
            else:
                attributes = None

            if control_codes:
                codeprefixedtargets = mtf_features["codeprefixedtargets"]
            else:
                codeprefixedtargets = None

            if isinstance(transformer_model, transformer.Unitransformer):
                position_kwargs = dict(
                    sequence_id=mtf_features.get("targets_segmentation", None),
                    position=mtf_features.get("targets_position", None),
                )
            elif isinstance(transformer_model, transformer.Bitransformer
                            ) or model_type == "bi_student_teacher":
                if control_codes:
                    position_kwargs = dict(
                        encoder_sequence_id=mtf_features.get(
                            "inputs_segmentation", None),
                        decoder_sequence_id=mtf_features.get(
                            "codeprefixedtargets_segmentation", None),
                        decoder_subsequence_id=mtf_features.get(
                            "codeprefixedtargets_subsegmentation", None),
                        encoder_position=mtf_features.get(
                            "inputs_position", None),
                        decoder_position=mtf_features.get(
                            "codeprefixedtargets_position", None),
                    )
                else:
                    position_kwargs = dict(
                        encoder_sequence_id=mtf_features.get(
                            "inputs_segmentation", None),
                        decoder_sequence_id=mtf_features.get(
                            "targets_segmentation", None),
                        decoder_subsequence_id=mtf_features.get(
                            "targets_subsegmentation", None),
                        encoder_position=mtf_features.get(
                            "inputs_position", None),
                        decoder_position=mtf_features.get(
                            "targets_position", None),
                    )
            else:
                raise ValueError("unrecognized class")

            if isinstance(transformer_model, Bitransformer_ll):
                if cycle_consistency_loss:
                    logits_ae, l_ae = transformer_model.call_simple(
                        inputs=inputs,
                        targets=mtf_features["targets"],
                        compute_loss=True,
                        attributes=attributes,
                        codeprefixedtargets=codeprefixedtargets,
                        mode=mode,
                        variable_dtype=get_variable_dtype(),
                        **position_kwargs)

                    if has_partial_sequences:
                        controlcodes = mtf_features["controlcode"]
                    else:
                        controlcodes = None

                    with gin.config_scope('training'):
                        mtf_samples = transformer_model.decode(
                            inputs,
                            attributes=attributes,
                            controlcodes=controlcodes,
                            has_partial_sequences=has_partial_sequences,
                            remove_partial_sequences=remove_partial_sequences,
                            variable_dtype=get_variable_dtype())
                        # mtf_samples = mtf.anonymize(mtf_samples)
                    outputs = mtf_samples

                    logits_cycle, l_cycle = transformer_model.call_simple(
                        inputs=outputs,
                        targets=mtf_features["targets"],
                        compute_loss=True,
                        attributes=attributes,
                        codeprefixedtargets=codeprefixedtargets,
                        mode=mode,
                        variable_dtype=get_variable_dtype(),
                        **position_kwargs)

                    loss_ae_cycle = lambda_ae * l_ae + lambda_cycle * l_cycle
                    return logits_cycle, loss_ae_cycle
                else:
                    return transformer_model.call_simple(
                        inputs=inputs,
                        targets=mtf_features["targets"],
                        compute_loss=True,
                        attributes=attributes,
                        codeprefixedtargets=codeprefixedtargets,
                        mode=mode,
                        variable_dtype=get_variable_dtype(),
                        **position_kwargs)
            else:
                return transformer_model.call_simple(
                    inputs=inputs,
                    targets=mtf_features["targets"],
                    compute_loss=True,
                    mode=mode,
                    variable_dtype=get_variable_dtype(),
                    num_microbatches=num_microbatches,
                    **position_kwargs)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_microbatches = serialize_num_microbatches(
                batch_dim, sequence_length, mesh_shape, layout_rules)
            if num_microbatches > 1:

                def serialized_fn(mtf_features):
                    return {
                        "loss":
                        (logits_and_loss(mtf_features)[1] / num_microbatches)
                    }

                var_grads, loss_dict = mtf.serialize_training_step(
                    mtf_features, serialized_fn, batch_dim, num_microbatches)
                loss = loss_dict["loss"]
            else:
                loss = logits_and_loss(mtf_features)[1]
                var_grads = mtf.gradients(
                    [loss], [v.outputs[0] for v in graph.trainable_variables])

            if tpu_summaries:
                mtf.scalar_summary("loss", loss)

            if callable(learning_rate_schedule):
                # the following happens on CPU since TPU can't handle summaries.
                with mtf.utils.outside_all_rewrites():
                    learning_rate = learning_rate_schedule(
                        step=tf.train.get_global_step())
                    tf.summary.scalar("learning_rate", learning_rate)
            else:
                learning_rate = learning_rate_schedule

            if isinstance(variable_filter, str):
                pattern = re.compile(variable_filter)
                variable_filter_fn = lambda v: pattern.search(v.name)
            elif variable_filter is None:
                variable_filter_fn = lambda v: True
            elif callable(variable_filter):
                variable_filter_fn = variable_filter
            else:
                raise ValueError(
                    "variable_filter must be None, a string, or a callable function"
                )
            trainable_vars = [
                v for v in graph.trainable_variables if variable_filter_fn(v)
            ]
            trainable_var_grads = [
                g for g, v in zip(var_grads, graph.trainable_variables)
                if variable_filter_fn(v)
            ]
            if len(trainable_vars) != len(graph.trainable_variables):
                tf.logging.info("Variables being trained:")
                tf.logging.info([v.name for v in trainable_vars])
                tf.logging.info("Variables not being trained:")
                tf.logging.info([
                    v.name for v in graph.trainable_variables
                    if not variable_filter_fn(v)
                ])

            update_ops = optimizer(learning_rate=learning_rate).apply_grads(
                trainable_var_grads, trainable_vars)

            lowering = mtf.Lowering(graph, {mesh: mesh_impl},
                                    autostack=autostack)

            tf_loss = lowering.export_to_tf_tensor(loss)
            tf_loss = tf.cast(tf_loss, tf.float32)
            if not use_tpu:
                tf_loss = tf.Print(
                    tf_loss, [tf_loss, tf.train.get_global_step()],
                    "step, tf_loss")

            tf_update_ops = [
                lowering.lowered_operation(op) for op in update_ops
            ]
            tf_update_ops.append(tf.assign_add(global_step, 1))
            train_op = tf.group(tf_update_ops)

            if hasattr(transformer_model, "initialize"):
                with mtf.utils.outside_all_rewrites():
                    transformer_model.initialize()

            if tpu_summaries:
                # has to be outside of
                # with mtf.utils.outside_all_rewrites()
                host_call = mtf.utils.create_host_call(model_dir)
                mtf.utils.remove_summaries()
            else:
                host_call = None

            with mtf.utils.outside_all_rewrites():

                if init_checkpoint:
                    ckpt_vars = {
                        v
                        for v, _ in tf.train.list_variables(init_checkpoint)
                    }
                    global_vars = {v.op.name for v in tf.global_variables()}
                    restore_vars = ckpt_vars.intersection(global_vars)
                    tf.logging.info("Initializing variables from %s:",
                                    init_checkpoint)
                    tf.logging.debug("\n".join(sorted(restore_vars)))
                    tf.logging.info("Variables in %s but not in graph:",
                                    init_checkpoint)
                    tf.logging.info("\n".join(sorted(ckpt_vars - global_vars)))
                    tf.logging.info("Variables in graph but not in %s:",
                                    init_checkpoint)
                    tf.logging.info("\n".join(sorted(global_vars - ckpt_vars)))
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  {v: v
                                                   for v in restore_vars})

                # Copy master variables to slices. Must be called first.
                restore_hook = mtf.MtfRestoreHook(lowering)
                saver = tf.train.Saver(tf.global_variables(),
                                       sharded=True,
                                       max_to_keep=keep_checkpoint_max,
                                       keep_checkpoint_every_n_hours=2,
                                       defer_build=False,
                                       save_relative_paths=True)
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
                saver_listener = mtf.MtfCheckpointSaverListener(lowering)
                saver_hook = tf.train.CheckpointSaverHook(
                    model_dir,
                    save_steps=save_checkpoints_steps,
                    saver=saver,
                    listeners=[saver_listener])
                gin_config_saver_hook = gin.tf.GinConfigSaverHook(
                    model_dir,
                    summarize_config=True,
                    include_step_in_filename=False)

                if use_tpu:
                    return tpu_estimator.TPUEstimatorSpec(
                        mode=tf.estimator.ModeKeys.TRAIN,
                        loss=tf_loss,
                        train_op=train_op,
                        host_call=host_call,
                        training_hooks=[
                            restore_hook,
                            saver_hook,
                            gin_config_saver_hook,
                        ])
                else:
                    return tf.estimator.EstimatorSpec(
                        tf.estimator.ModeKeys.TRAIN,
                        loss=tf_loss,
                        train_op=train_op,
                        training_chief_hooks=[
                            restore_hook,
                            saver_hook,
                            gin_config_saver_hook,
                        ])
        elif mode == tf.estimator.ModeKeys.EVAL:
            logits, loss = logits_and_loss(mtf_features)
            anon_logits = mtf.anonymize(logits)
            lowering = mtf.Lowering(graph, {mesh: mesh_impl},
                                    autostack=autostack)
            tf_loss = tf.cast(lowering.export_to_tf_tensor(loss), tf.float32)
            tf_loss = tf.cast(tf_loss, tf.float32)
            tf_logits = tf.cast(lowering.export_to_tf_tensor(anon_logits),
                                tf.float32)

            def simple_metrics(logits, labels):
                """Simple metrics for teacher-forced eval."""
                weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
                xent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels, logits=logits)
                predictions = tf.cast(tf.argmax(logits, axis=-1), labels.dtype)
                token_correct = tf.cast(tf.equal(predictions, labels),
                                        tf.float32) * weights
                sequence_correct = tf.to_float(
                    tf.equal(tf.reduce_sum(token_correct, -1),
                             tf.reduce_sum(weights, -1)))
                sequence_weights = tf.to_float(
                    tf.not_equal(tf.reduce_sum(weights, -1), 0))
                return {
                    "neg_log_perplexity":
                    tf.metrics.mean(-xent, weights),
                    "token_accuracy":
                    tf.metrics.mean(token_correct, weights),
                    "sequence_accuracy":
                    tf.metrics.mean(sequence_correct, sequence_weights)
                }

            labels = lowering.export_to_tf_tensor(anon_targets)
            eval_metrics = (simple_metrics, [tf_logits, labels])
            with mtf.utils.outside_all_rewrites():
                restore_hook = mtf.MtfRestoreHook(lowering)
            return tpu_estimator.TPUEstimatorSpec(
                tf.estimator.ModeKeys.EVAL,
                evaluation_hooks=[restore_hook],
                loss=tf_loss,
                eval_metrics=eval_metrics)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        _ = slim.get_or_create_global_step(
        )  # Required when creating the session.

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        #########################
        # Configure the network #
        #########################
        inception_params = network_params.InceptionV3FCNParams(
            receptive_field_size=FLAGS.receptive_field_size,
            prelogit_dropout_keep_prob=0.8,
            depth_multiplier=0.1,
            min_depth=16,
            inception_fcn_stride=0,
        )
        conv_params = network_params.ConvScopeParams(
            dropout=False,
            dropout_keep_prob=0.8,
            batch_norm=True,
            batch_norm_decay=0.99,
            l2_weight_decay=4e-05,
        )
        network_fn = inception_v3_fcn.get_inception_v3_fcn_network_fn(
            inception_params,
            conv_params,
            num_classes=dataset.num_classes,
            is_training=False,
        )

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])

        #####################################
        # Select the preprocessing function #
        #####################################
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            'inception_v3', is_training=False)
        eval_image_size = FLAGS.receptive_field_size
        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)
        images, labels = tf.train.batch([image, label],
                                        batch_size=FLAGS.batch_size,
                                        num_threads=PREPROCESSING_THREADS,
                                        capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_2':
            slim.metrics.streaming_recall_at_k(logits, labels, 2),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # This ensures that we make a single pass over all of the data.
        num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s', checkpoint_path)

        slim.evaluation.evaluate_once(
            master='',
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            session_config=tf.ConfigProto(allow_soft_placement=True),
            variables_to_restore=variables_to_restore)
Exemplo n.º 27
0
def get_sent_reps_masks_normal_loop(sent_index,
                                    input_sent_reps_doc,
                                    input_mask_doc_level,
                                    masked_lm_loss_doc,
                                    masked_lm_example_loss_doc,
                                    masked_lm_weights_doc,
                                    dual_encoder_config,
                                    is_training,
                                    train_mode,
                                    input_ids,
                                    input_mask,
                                    masked_lm_positions,
                                    masked_lm_ids,
                                    masked_lm_weights,
                                    use_one_hot_embeddings,
                                    debugging=False):
  """Get the sentence encodings, mask ids and masked word LM loss.

  Args:
      sent_index: The index of the current looped sentence.
      input_sent_reps_doc: The representations of all sentences in the doc
        learned by BERT.
      input_mask_doc_level: The document level input masks, which indicates
        whether a sentence is a real sentence or a padded sentence.
      masked_lm_loss_doc: The sum of all the masked word LM loss.
      masked_lm_example_loss_doc: The per example masked word LM loss.
      masked_lm_weights_doc: the weights of the maksed LM words. If the position
        is corresponding to a real masked word, it is 1.0; It is a padded mask,
        the weight is 0.
      dual_encoder_config: The config of the dual encoder.
      is_training: Whether it is in the training mode.
      train_mode: string. The train mode which can be finetune, joint_train, or
        pretrain.
      input_ids: The ids of the input tokens.
      input_mask: The mask of the input tokens.
      masked_lm_positions: The positions of the masked words in the language
        model training.
      masked_lm_ids: The ids of the masked words in LM model training.
      masked_lm_weights: The weights of the masked words in LM model training.
      use_one_hot_embeddings: Whether use one hot embedding. It should be true
        for the runs on TPUs.
      debugging: bool. Whether it is in the debugging mode.

  Returns:
    A list of tensors on the learned sentence representations and the masked
    word LM loss.
  """
  # Collect token information for the current sentence.
  bert_config = modeling.BertConfig.from_json_file(
      dual_encoder_config.encoder_config.bert_config_file)
  max_sent_length_by_word = dual_encoder_config.encoder_config.max_sent_length_by_word
  sent_bert_trainable = dual_encoder_config.encoder_config.sent_bert_trainable
  max_predictions_per_seq = dual_encoder_config.encoder_config.max_predictions_per_seq
  sent_start = sent_index * max_sent_length_by_word
  input_ids_cur_sent = tf.slice(input_ids, [0, sent_start],
                                [-1, max_sent_length_by_word])
  # Output shape: [batch, max_sent_length_by_word].
  input_mask_cur_sent = tf.slice(input_mask, [0, sent_start],
                                 [-1, max_sent_length_by_word])
  # Output Shape:  [batch].
  input_mask_cur_sent_max = tf.reduce_max(input_mask_cur_sent, 1)
  # Output Shape:  [loop_sent_number_per_doc, batch].
  input_mask_doc_level.append(input_mask_cur_sent_max)
  if debugging:
    input_ids_cur_sent = tf.Print(
        input_ids_cur_sent, [input_ids_cur_sent, input_mask_cur_sent],
        message="input_ids_cur_sent in get_sent_reps_masks_lm_loss",
        summarize=20)
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids_cur_sent,
      input_mask=input_mask_cur_sent,
      use_one_hot_embeddings=use_one_hot_embeddings,
      sent_bert_trainable=sent_bert_trainable)
  with tf.variable_scope("seq_rep_from_bert_sent_dense", reuse=tf.AUTO_REUSE):
    normalized_siamese_input_tensor = get_seq_rep_from_bert(model)
  input_sent_reps_doc.append(normalized_siamese_input_tensor)

  if (train_mode == constants.TRAIN_MODE_PRETRAIN or
      train_mode == constants.TRAIN_MODE_JOINT_TRAIN):
    # Collect masked token information for the current sentence.
    sent_mask_lm_token_start = sent_index * max_predictions_per_seq
    # Output shape: [batch, max_predictions_per_seq].
    masked_lm_positions_cur_sent = tf.slice(masked_lm_positions,
                                            [0, sent_mask_lm_token_start],
                                            [-1, max_predictions_per_seq])
    masked_lm_ids_cur_sent = tf.slice(masked_lm_ids,
                                      [0, sent_mask_lm_token_start],
                                      [-1, max_predictions_per_seq])
    masked_lm_weights_cur_sent = tf.slice(masked_lm_weights,
                                          [0, sent_mask_lm_token_start],
                                          [-1, max_predictions_per_seq])
    # Since in the processed data of smith model, the masked lm positions are
    # global indices started from the 1st token of the whole sequence, we need
    # to transform this global position to a local position for the current
    # sentence. The position index is started from 0.
    # Local_index = global_index mod max_sent_length_by_word.
    masked_lm_positions_cur_sent = tf.mod(masked_lm_positions_cur_sent,
                                          max_sent_length_by_word)
    # Shape of masked_lm_loss_cur_sent [1].
    # Shape of masked_lm_example_loss_cur_sent is [batch,
    # max_predictions_per_seq].
    (masked_lm_loss_cur_sent, masked_lm_example_loss_cur_sent,
     _) = get_masked_lm_output(bert_config, model.get_sequence_output(),
                               model.get_embedding_table(),
                               masked_lm_positions_cur_sent,
                               masked_lm_ids_cur_sent,
                               masked_lm_weights_cur_sent)
    # Output Shape: [1].
    masked_lm_loss_doc += masked_lm_loss_cur_sent
    # Output Shape: [loop_sent_number_per_doc, batch * max_predictions_per_seq].
    masked_lm_example_loss_doc.append(masked_lm_example_loss_cur_sent)
    # Output Shape: [loop_sent_number_per_doc, batch, max_predictions_per_seq].
    masked_lm_weights_doc.append(masked_lm_weights_cur_sent)
  return (input_sent_reps_doc, input_mask_doc_level, masked_lm_loss_doc,
          masked_lm_example_loss_doc, masked_lm_weights_doc)
Exemplo n.º 28
0
def setup(act_fun):
    channel_num = 3
    if FLAGS.mnist_model:
        print("------------------Using MNIST model------------")
        model = MnistNet(
            num_channels=channel_num,
            num_filters=128,
            act_fun=act_fun)
    elif FLAGS.large_model:
        print("------------------Using ResNet32Large model------------")
        model = ResNet32Large(
            num_channels=channel_num,
            num_filters=128,
            train=True,
            act_fun=act_fun)
    elif FLAGS.larger_model:
        print("------------------Using ResNet32Larger model------------")
        model = ResNet32Larger(
            num_channels=channel_num,
            num_filters=128,
            act_fun=act_fun)
    elif FLAGS.wider_model:
        print("------------------Using ResNet32Wider model------------")
        model = ResNet32Wider(
            num_channels=channel_num,
            num_filters=192,
            act_fun=act_fun)
    else:
        print("------------------Using ResNet32 model------------")
        model = ResNet32(
            num_channels=channel_num,
            num_filters=128,
            act_fun=act_fun)

    batch_size = FLAGS.batch_size
    weights = [model.construct_weights('context_0')]

    Y = tf.placeholder(shape=(None), dtype=tf.int32)
    LABEL = None
    X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
    X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
    LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32)
    LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32)
    # Varibles to run in training
    X_SPLIT = tf.split(X, FLAGS.num_gpus)
    X_NOISE_SPLIT = tf.split(X_NOISE, FLAGS.num_gpus)
    LABEL_SPLIT = tf.split(LABEL, FLAGS.num_gpus)
    LABEL_POS_SPLIT = tf.split(LABEL_POS, FLAGS.num_gpus)
    LABEL_SPLIT_INIT = list(LABEL_SPLIT)
    tower_grads = []
    tower_gen_grads = []
    x_mod_list = []

    optimizer = AdamOptimizer(FLAGS.lr, beta1=0.0, beta2=0.999)
    optimizer = hvd.DistributedOptimizer(optimizer)

    for j in range(FLAGS.num_gpus):
        if FLAGS.model_cclass:
            ind_batch_size = FLAGS.batch_size // FLAGS.num_gpus
            label_tensor = tf.Variable(
                tf.convert_to_tensor(
                    np.reshape(
                        np.tile(np.eye(10), (FLAGS.batch_size, 1, 1)),
                        (FLAGS.batch_size * 10, 10)),
                    dtype=tf.float32),
                trainable=False,
                dtype=tf.float32)
            x_split = tf.tile(
                tf.reshape(
                    X_SPLIT[j], (ind_batch_size, 1, 32, 32, 3)), (1, 10, 1, 1, 1))
            x_split = tf.reshape(x_split, (ind_batch_size * 10, 32, 32, 3))
            energy_pos = model.forward(
                x_split,
                weights[0],
                label=label_tensor,
                stop_at_grad=False)

            energy_pos_full = tf.reshape(energy_pos, (ind_batch_size, 10))
            energy_partition_est = tf.reduce_logsumexp(
                energy_pos_full, axis=1, keepdims=True)
            uniform = tf.random_uniform(tf.shape(energy_pos_full))
            label_tensor = tf.argmax(-energy_pos_full -
                                     tf.log(-tf.log(uniform)) - energy_partition_est, axis=1)
            label = tf.one_hot(label_tensor, 10, dtype=tf.float32)
            label = tf.Print(label, [label_tensor, energy_pos_full])
            LABEL_SPLIT[j] = label
            energy_pos = tf.concat(energy_pos, axis=0)
        else:
            energy_pos = [
                model.forward(
                    X_SPLIT[j],
                    weights[0],
                    label=LABEL_POS_SPLIT[j],
                    stop_at_grad=False)]
            energy_pos = tf.concat(energy_pos, axis=0)

        print("Building graph...")
        x_mod = x_orig = X_NOISE_SPLIT[j]

        x_grads = []

        energy_negs = []
        loss_energys = []

        energy_negs.extend([model.forward(tf.stop_gradient(
            x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True)])
        eps_begin = tf.zeros(1)

        steps = tf.constant(0)
        c = lambda i, x: tf.less(i, FLAGS.num_steps)

        def langevin_step(counter, x_mod):
            x_mod = x_mod + tf.random_normal(tf.shape(x_mod),
                                             mean=0.0,
                                             stddev=0.005 * FLAGS.rescale * FLAGS.noise_scale)

            energy_noise = energy_start = tf.concat(
                [model.forward(
                        x_mod,
                        weights[0],
                        label=LABEL_SPLIT[j],
                        reuse=True,
                        stop_at_grad=False,
                        stop_batch=True)],
                axis=0)

            x_grad, label_grad = tf.gradients(
                FLAGS.temperature * energy_noise, [x_mod, LABEL_SPLIT[j]])
            energy_noise_old = energy_noise

            lr = FLAGS.step_lr

            if FLAGS.proj_norm != 0.0:
                if FLAGS.proj_norm_type == 'l2':
                    x_grad = tf.clip_by_norm(x_grad, FLAGS.proj_norm)
                elif FLAGS.proj_norm_type == 'li':
                    x_grad = tf.clip_by_value(
                        x_grad, -FLAGS.proj_norm, FLAGS.proj_norm)
                else:
                    print("Other types of projection are not supported!!!")
                    assert False

            # Clip gradient norm for now
            if FLAGS.hmc:
                # Step size should be tuned to get around 65% acceptance
                def energy(x):
                    return FLAGS.temperature * \
                        model.forward(x, weights[0], label=LABEL_SPLIT[j], reuse=True)

                x_last = hmc(x_mod, 15., 10, energy)
            else:
                x_last = x_mod - (lr) * x_grad

            x_mod = x_last
            x_mod = tf.clip_by_value(x_mod, 0, FLAGS.rescale)

            counter = counter + 1

            return counter, x_mod

        steps, x_mod = tf.while_loop(c, langevin_step, (steps, x_mod))

        energy_eval = model.forward(x_mod, weights[0], label=LABEL_SPLIT[j],
                                    stop_at_grad=False, reuse=True)
        x_grad = tf.gradients(FLAGS.temperature * energy_eval, [x_mod])[0]
        x_grads.append(x_grad)

        energy_negs.append(
            model.forward(
                tf.stop_gradient(x_mod),
                weights[0],
                label=LABEL_SPLIT[j],
                stop_at_grad=False,
                reuse=True))

        test_x_mod = x_mod

        temp = FLAGS.temperature

        energy_neg = energy_negs[-1]
        x_off = tf.reduce_mean(
            tf.abs(x_mod[:tf.shape(X_SPLIT[j])[0]] - X_SPLIT[j]))

        loss_energy = model.forward(
            x_mod,
            weights[0],
            reuse=True,
            label=LABEL,
            stop_grad=True)

        print("Finished processing loop construction ...")

        target_vars = {}

        if FLAGS.cclass or FLAGS.model_cclass:
            label_sum = tf.reduce_sum(LABEL_SPLIT[0], axis=0)
            label_prob = label_sum / tf.reduce_sum(label_sum)
            label_ent = -tf.reduce_sum(label_prob *
                                       tf.math.log(label_prob + 1e-7))
        else:
            label_ent = tf.zeros(1)

        target_vars['label_ent'] = label_ent

        if FLAGS.train:

            if FLAGS.objective == 'logsumexp':
                pos_term = temp * energy_pos
                energy_neg_reduced = (energy_neg - tf.reduce_min(energy_neg))
                coeff = tf.stop_gradient(tf.exp(-temp * energy_neg_reduced))
                norm_constant = tf.stop_gradient(tf.reduce_sum(coeff)) + 1e-4
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = coeff * (-1 * temp * energy_neg) / norm_constant
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'cd':
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = -tf.reduce_mean(temp * energy_neg)
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'softplus':
                loss_ml = FLAGS.ml_coeff * \
                    tf.nn.softplus(temp * (energy_pos - energy_neg))

            loss_total = tf.reduce_mean(loss_ml)

            if not FLAGS.zero_kl:
                loss_total = loss_total + tf.reduce_mean(loss_energy)

            loss_total = loss_total + \
                FLAGS.l2_coeff * (tf.reduce_mean(tf.square(energy_pos)) + tf.reduce_mean(tf.square((energy_neg))))

            print("Started gradient computation...")
            gvs = optimizer.compute_gradients(loss_total)
            gvs = [(k, v) for (k, v) in gvs if k is not None]

            print("Applying gradients...")

            tower_grads.append(gvs)

            print("Finished applying gradients.")

            target_vars['loss_ml'] = loss_ml
            target_vars['total_loss'] = loss_total
            target_vars['loss_energy'] = loss_energy
            target_vars['weights'] = weights
            target_vars['gvs'] = gvs

        target_vars['X'] = X
        target_vars['Y'] = Y
        target_vars['LABEL'] = LABEL
        target_vars['LABEL_POS'] = LABEL_POS
        target_vars['X_NOISE'] = X_NOISE
        target_vars['energy_pos'] = energy_pos
        target_vars['energy_start'] = energy_negs[0]

        if len(x_grads) >= 1:
            target_vars['x_grad'] = x_grads[-1]
            target_vars['x_grad_first'] = x_grads[0]
        else:
            target_vars['x_grad'] = tf.zeros(1)
            target_vars['x_grad_first'] = tf.zeros(1)

        target_vars['x_mod'] = x_mod
        target_vars['x_off'] = x_off
        target_vars['temp'] = temp
        target_vars['energy_neg'] = energy_neg
        target_vars['test_x_mod'] = test_x_mod
        target_vars['eps_begin'] = eps_begin

    if FLAGS.train:
        grads = average_gradients(tower_grads)
        train_op = optimizer.apply_gradients(grads)
        target_vars['train_op'] = train_op

    config = tf.ConfigProto()

    if hvd.size() > 1:
        config.gpu_options.visible_device_list = str(hvd.local_rank())

    sess = tf.Session(config=config)
    saver = loader = tf.train.Saver(max_to_keep=30, keep_checkpoint_every_n_hours=6)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    print("Model has a total of {} parameters".format(total_parameters))

    sess.run(tf.global_variables_initializer())

    resume_itr = 0

    if (FLAGS.resume_iter != -1 or not FLAGS.train) and hvd.rank() == 0:
        model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter))
        resume_itr = FLAGS.resume_iter
        # saver.restore(sess, model_file)
        optimistic_restore(sess, model_file)

    sess.run(hvd.broadcast_global_variables(0))
    return target_vars, saver, sess, resume_itr
 def my_fn(x):
     return {k: tf.Print(v, [v], k + ": ") for k, v in x.items()}
Exemplo n.º 30
0
def model_fn(features, labels, mode, params):
  """Mobilenet v1 model using Estimator API."""
  num_classes = params['num_classes']
  training_active = (mode == tf.estimator.ModeKeys.TRAIN)
  eval_active = (mode == tf.estimator.ModeKeys.EVAL)

  if isinstance(features, dict):
    features = features['feature']

  features = supervised_images.tensor_transform_fn(
      features, params['input_perm'])

  model = tf.keras.applications.MobileNet(
      input_tensor=features,
      include_top=True,
      weights=None,
      classes=num_classes)

  logits = model(features, training=training_active)

  predictions = {
      'classes': tf.argmax(input=logits, axis=1),
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        })

  if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
      not params['use_tpu']):
    with tf.control_dependencies([
        tf.Print(
            predictions['classes'], [predictions['classes']],
            summarize=params['eval_batch_size'],
            message='prediction: ')
    ]):
      labels = tf.Print(
          labels, [labels],
          summarize=params['eval_batch_size'], message='label: ')

  one_hot_labels = tf.one_hot(labels, params['num_classes'], dtype=tf.int32)

  tf.losses.softmax_cross_entropy(
      onehot_labels=one_hot_labels,
      logits=logits,
      weights=1.0,
      label_smoothing=0.1)
  loss = tf.losses.get_total_loss(add_regularization_losses=True)

  initial_learning_rate = params['learning_rate'] * params['train_batch_size'] / 256   # pylint: disable=line-too-long
  final_learning_rate = 0.0001 * initial_learning_rate

  train_op = None
  if training_active:
    batches_per_epoch = params['num_train_images'] // params['train_batch_size']
    global_step = tf.train.get_or_create_global_step()

    learning_rate = tf.train.exponential_decay(
        learning_rate=initial_learning_rate,
        global_step=global_step,
        decay_steps=params['learning_rate_decay_epochs'] * batches_per_epoch,
        decay_rate=params['learning_rate_decay'],
        staircase=True)

    # Set a minimum boundary for the learning rate.
    learning_rate = tf.maximum(
        learning_rate, final_learning_rate, name='learning_rate')

    if params['optimizer'] == 'sgd':
      absl.logging.info('Using SGD optimizer')
      optimizer = tf.train.GradientDescentOptimizer(
          learning_rate=learning_rate)
    elif params['optimizer'] == 'momentum':
      absl.logging.info('Using Momentum optimizer')
      optimizer = tf.train.MomentumOptimizer(
          learning_rate=learning_rate, momentum=0.9)
    elif params['optimizer'] == 'RMS':
      absl.logging.info('Using RMS optimizer')
      optimizer = tf.train.RMSPropOptimizer(
          learning_rate,
          RMSPROP_DECAY,
          momentum=RMSPROP_MOMENTUM,
          epsilon=RMSPROP_EPSILON)
    else:
      absl.logging.fatal('Unknown optimizer:', params['optimizer'])

    if params['use_tpu']:
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    update_ops = model.updates
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step=global_step)
    if params['moving_average']:
      ema = tf.train.ExponentialMovingAverage(
          decay=MOVING_AVERAGE_DECAY, num_updates=global_step)
      variables_to_average = (tf.trainable_variables() +
                              tf.moving_average_variables())
      with tf.control_dependencies([train_op]), tf.name_scope('moving_average'):
        train_op = ema.apply(variables_to_average)

  eval_metrics = None
  if eval_active:
    def metric_fn(labels, predictions):
      accuracy = tf.metrics.accuracy(labels, tf.argmax(
          input=predictions, axis=1))
      return {'accuracy': accuracy}

    if params['use_logits']:
      eval_predictions = logits

    eval_metrics = (metric_fn, [labels, eval_predictions])

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode, loss=loss, train_op=train_op, eval_metrics=eval_metrics)