Пример #1
0
def train(flags):
    """Training entry point."""
    log_dir = flags.log_dir
    flags.pretrained_model_dir = log_dir
    log_dir = os.path.join(log_dir, 'train')
    flags.eval_interval_secs = 0
    with tf.Graph().as_default():
        global_step = tf.Variable(0,
                                  trainable=False,
                                  name='global_step',
                                  dtype=tf.int64)
        global_step_confidence = tf.Variable(0,
                                             trainable=False,
                                             name='global_step_confidence',
                                             dtype=tf.int64)

        model = build_model(flags)
        images_query_pl, labels_query_pl, \
        images_support_pl, labels_support_pl = \
          build_episode_placeholder(flags)

        # Augments the input.
        if flags.dataset == 'cifar10' or flags.dataset == 'cifar100':
            images_query_pl_aug = data_loader.augment_cifar(images_query_pl,
                                                            is_training=True)
            images_support_pl_aug = data_loader.augment_cifar(
                images_support_pl, is_training=True)
        elif flags.dataset == 'tinyimagenet':
            images_query_pl_aug = data_loader.augment_tinyimagenet(
                images_query_pl, is_training=True)
            images_support_pl_aug = data_loader.augment_tinyimagenet(
                images_support_pl, is_training=True)

        logits, logits_z = build_proto_train_graph(
            images_query=images_query_pl_aug,
            images_support=images_support_pl_aug,
            flags=flags,
            is_training=True,
            model=model)
        # Losses and optimizer
        ## Classification loss
        loss_classification = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits,
                labels=tf.one_hot(labels_query_pl, flags.num_classes_train)))

        # Confidence loss
        _, top_k_indices = tf.nn.top_k(logits, k=1)
        pred = tf.squeeze(top_k_indices)
        incorrect_mask = tf.math.logical_not(
            tf.math.equal(pred, labels_query_pl))
        incorrect_logits_z = tf.boolean_mask(logits_z, incorrect_mask)
        incorrect_labels_z = tf.boolean_mask(labels_query_pl, incorrect_mask)
        signal_variance = tf.math.reduce_sum(tf.cast(incorrect_mask, tf.int32))
        loss_variance_incorrect = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=incorrect_logits_z,
                labels=tf.one_hot(incorrect_labels_z,
                                  flags.num_classes_train)))
        loss_variance_zero = 0.0
        loss_confidence = tf.cond(tf.greater(signal_variance, 0),
                                  lambda: loss_variance_incorrect,
                                  lambda: loss_variance_zero)

        regu_losses = tf.losses.get_regularization_losses()
        loss = tf.add_n([loss_classification] + regu_losses)

        # Learning rate
        if flags.lr_anneal == 'const':
            learning_rate = flags.init_learning_rate
        elif flags.lr_anneal == 'pwc':
            learning_rate = get_pwc_learning_rate(global_step, flags)
        elif flags.lr_anneal == 'exp':
            lr_decay_step = flags.number_of_steps // flags.n_lr_decay
            learning_rate = tf.train.exponential_decay(
                flags.init_learning_rate,
                global_step,
                lr_decay_step,
                1.0 / flags.lr_decay_rate,
                staircase=True)
        else:
            raise Exception('Not implemented')

        # Optimizer
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=0.9)
        optimizer_confidence = tf.train.MomentumOptimizer(
            learning_rate=learning_rate, momentum=0.9)

        train_op = contrib_slim.learning.create_train_op(
            total_loss=loss,
            optimizer=optimizer,
            global_step=global_step,
            clip_gradient_norm=flags.clip_gradient_norm)
        variable_variance = []
        for v in tf.trainable_variables():
            if 'fc_variance' in v.name:
                variable_variance.append(v)
        train_op_confidence = contrib_slim.learning.create_train_op(
            total_loss=loss_confidence,
            optimizer=optimizer_confidence,
            global_step=global_step_confidence,
            clip_gradient_norm=flags.clip_gradient_norm,
            variables_to_train=variable_variance)

        tf.summary.scalar('loss', loss)
        tf.summary.scalar('loss_classification', loss_classification)
        tf.summary.scalar('loss_variance', loss_confidence)
        tf.summary.scalar('regu_loss', tf.add_n(regu_losses))
        tf.summary.scalar('learning_rate', learning_rate)
        # Merges all summaries except for pretrain
        summary = tf.summary.merge(
            tf.get_collection('summaries', scope='(?!pretrain).*'))

        # Gets datasets
        few_shot_data_train, test_dataset, train_dataset = get_train_datasets(
            flags)
        # Defines session and logging
        summary_writer_train = tf.summary.FileWriter(log_dir, flush_secs=1)
        saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
        print(saver.saver_def.filename_tensor_name)
        print(saver.saver_def.restore_op_name)
        # pylint: disable=unused-variable
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        supervisor = tf.train.Supervisor(
            logdir=log_dir,
            init_feed_dict=None,
            summary_op=None,
            init_op=tf.global_variables_initializer(),
            summary_writer=summary_writer_train,
            saver=saver,
            global_step=global_step,
            save_summaries_secs=flags.save_summaries_secs,
            save_model_secs=0)

        with supervisor.managed_session() as sess:
            checkpoint_step = sess.run(global_step)
            if checkpoint_step > 0:
                checkpoint_step += 1
            eval_interval_steps = flags.eval_interval_steps
            for step in range(checkpoint_step, flags.number_of_steps):
                # Computes the classification loss using a batch of data.
                images_query, labels_query,\
                images_support, labels_support = \
                  few_shot_data_train.next_few_shot_batch(
                      query_batch_size_per_task=flags.train_batch_size,
                      num_classes_per_task=flags.num_classes_train,
                      num_supports_per_class=flags.num_shots_train,
                      num_tasks=flags.num_tasks_per_batch)

                feed_dict = {
                    images_query_pl: images_query.astype(dtype=np.float32),
                    labels_query_pl: labels_query,
                    images_support_pl: images_support.astype(dtype=np.float32),
                    labels_support_pl: labels_support
                }

                t_batch = time.time()
                dt_batch = time.time() - t_batch

                t_train = time.time()
                loss, loss_confidence = sess.run(
                    [train_op, train_op_confidence], feed_dict=feed_dict)
                dt_train = time.time() - t_train

                if step % 100 == 0:
                    summary_str = sess.run(summary, feed_dict=feed_dict)
                    summary_writer_train.add_summary(summary_str, step)
                    summary_writer_train.flush()
                    logging.info(
                        'step %d, loss : %.4g, dt: %.3gs, dt_batch: %.3gs',
                        step, loss, dt_train, dt_batch)

                if float(step) / flags.number_of_steps > 0.5:
                    eval_interval_steps = flags.eval_interval_fine_steps

                if eval_interval_steps > 0 and step % eval_interval_steps == 0:
                    saver.save(sess,
                               os.path.join(log_dir, 'model'),
                               global_step=step)
                    eval(flags=flags,
                         train_dataset=train_dataset,
                         test_dataset=test_dataset)

                if float(
                        step
                ) > 0.5 * flags.number_of_steps + flags.number_of_steps_to_early_stop:
                    break
Пример #2
0
    def build(self):
        self.lr = tf.placeholder(tf.float32, shape=None, name='learning_rate')

        # Inputs
        self.s = tf.placeholder(tf.float32,
                                shape=[None] + self.state_dim,
                                name='state')
        self.a = tf.placeholder(tf.int32, shape=(None, ), name='action')
        self.returns = tf.placeholder(tf.float32,
                                      shape=(None, ),
                                      name='return')

        # Build network
        self.pi = dense_nn(self.s,
                           self.layer_sizes + [self.act_size],
                           name='pi_network')
        self.sampled_actions = tf.squeeze(tf.multinomial(self.pi, 1))
        self.pi_vars = self.scope_vars('pi_network')

        if self.baseline:
            # State value estimation as the baseline
            self.v = dense_nn(self.s, self.layer_sizes + [1], name='v_network')
            self.target = self.returns - self.v  # advantage

            with tf.variable_scope('v_optimize'):
                self.loss_v = tf.reduce_mean(
                    tf.squared_difference(self.v, self.returns))
                self.optim_v = tf.train.AdamOptimizer(self.lr).minimize(
                    self.loss_v, name='adam_optim_v')
        else:
            self.target = tf.identity(self.returns)

        with tf.variable_scope('pi_optimize'):
            self.loss_pi = tf.reduce_mean(
                tf.stop_gradient(self.target) *
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.pi,
                                                               labels=self.a),
                name='loss_pi')
            # self.optim_pi = tf.train.AdamOptimizer(self.lr)
            # self.grads_pi = self.optim_pi.compute_gradients(self.loss_pi, self.pi_vars)
            # self.train_pi_op = self.optim_pi.apply_gradients(self.grads_pi)
            self.optim_pi = tf.train.AdamOptimizer(self.lr).minimize(
                self.loss_pi, name='adam_optim_pi')

        with tf.variable_scope('summary'):
            self.loss_pi_summ = tf.summary.scalar('loss_pi', self.loss_pi)

            self.ep_reward = tf.placeholder(tf.float32, name='episode_reward')
            self.ep_reward_summ = tf.summary.scalar('episode_reward',
                                                    self.ep_reward)
            summ_list = [self.loss_pi_summ, self.ep_reward_summ]

            if self.baseline:
                self.loss_v_summ = tf.summary.scalar('loss_v', self.loss_v)
                summ_list.append(self.loss_v_summ)

            self.merged_summary = tf.summary.merge(summ_list)

        if self.baseline:
            self.train_ops = [self.optim_pi, self.optim_v]
        else:
            self.train_ops = [self.optim_pi]

        self.sess.run(tf.global_variables_initializer())
Пример #3
0
def transformer_ffn_layer(x,
                          hparams,
                          pad_remover=None,
                          conv_padding="LEFT",
                          nonpadding_mask=None,
                          losses=None,
                          cache=None,
                          decode_loop_step=None,
                          readout_d_ff=0,
                          layer_collection=None):
    """Feed-forward layer in the transformer.

  Args:
    x: a Tensor of shape [batch_size, length, hparams.model_d]
    hparams: hyperparameters for model
    pad_remover: an expert_utils.PadRemover object tracking the padding
      positions. If provided, when using convolutional settings, the padding
      is removed before applying the convolution, and restored afterward. This
      can give a significant speedup.
    conv_padding: a string - either "LEFT" or "SAME".
    nonpadding_mask: an optional Tensor with shape [batch_size, length].
      needed for convolutional layers with "SAME" padding.
      Contains 1.0 in positions corresponding to nonpadding.
    losses: optional list onto which to append extra training losses
    cache: dict, containing tensors which are the results of previous
        attentions, used for fast decoding.
    decode_loop_step: An integer, step number of the decoding loop.
        Only used for inference on TPU.
    readout_d_ff: if it's greater than 0, then it will be used instead of
      d_ff
    layer_collection: A tensorflow_kfac.LayerCollection. Only used by the
      KFAC optimizer. Default is None.


  Returns:
    a Tensor of shape [batch_size, length, hparams.model_d]

  Raises:
    ValueError: If losses arg is None, but layer generates extra losses.
  """
    ffn_layer = hparams.ffn_layer
    relu_dropout_broadcast_dims = (
        common_layers.comma_separated_string_to_integer_list(
            getattr(hparams, "relu_dropout_broadcast_dims", "")))
    if ffn_layer == "conv_hidden_relu":
        # Backwards compatibility
        ffn_layer = "dense_relu_dense"
    if ffn_layer == "dense_relu_dense":
        # In simple convolution mode, use `pad_remover` to speed up processing.
        mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_FFN_FILTER_DENSE,
                                     value={
                                         "d_ff": hparams.d_ff,
                                         "use_bias": "True",
                                         "activation": mlperf_log.RELU
                                     })
        mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_FFN_OUTPUT_DENSE,
                                     value={
                                         "model_d": hparams.model_d,
                                         "use_bias": "True",
                                     })
        mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_RELU_DROPOUT,
                                     value=hparams.relu_dropout)
        if pad_remover:
            original_shape = common_layers.shape_list(x)
            # Collapse `x` across examples, and remove padding positions.
            x = tf.reshape(x, tf.concat([[-1], original_shape[2:]], axis=0))
            x = tf.expand_dims(pad_remover.remove(x), axis=0)
        conv_output = common_layers.dense_relu_dense(
            x,
            hparams.d_ff,
            hparams.model_d,
            dropout=hparams.relu_dropout,
            dropout_broadcast_dims=relu_dropout_broadcast_dims,
            layer_collection=layer_collection)
        if pad_remover:
            # Restore `conv_output` to the original shape of `x`, including padding.
            conv_output = tf.reshape(
                pad_remover.restore(tf.squeeze(conv_output, axis=0)),
                original_shape)
        return conv_output
    elif ffn_layer == "conv_relu_conv":
        return common_layers.conv_relu_conv(
            x,
            readout_d_ff or hparams.d_ff,
            hparams.model_d,
            first_kernel_size=hparams.conv_first_kernel,
            second_kernel_size=1,
            padding=conv_padding,
            nonpadding_mask=nonpadding_mask,
            dropout=hparams.relu_dropout,
            cache=cache,
            decode_loop_step=decode_loop_step)
    elif ffn_layer == "parameter_attention":
        return common_attention.parameter_attention(
            x, hparams.parameter_attention_key_channels or hparams.model_d,
            hparams.parameter_attention_value_channels or hparams.model_d,
            hparams.model_d, readout_d_ff or hparams.d_ff, hparams.num_heads,
            hparams.attention_dropout)
    elif ffn_layer == "conv_hidden_relu_with_sepconv":
        return common_layers.conv_hidden_relu(x,
                                              readout_d_ff or hparams.d_ff,
                                              hparams.model_d,
                                              kernel_size=(3, 1),
                                              second_kernel_size=(31, 1),
                                              padding="LEFT",
                                              dropout=hparams.relu_dropout)
    elif ffn_layer == "sru":
        return common_layers.sru(x)
    elif ffn_layer == "local_moe_tpu":
        overhead = hparams.moe_overhead_eval
        if hparams.mode == tf.estimator.ModeKeys.TRAIN:
            overhead = hparams.moe_overhead_train
        ret, loss = expert_utils.local_moe_tpu(x,
                                               hparams.d_ff // 2,
                                               hparams.model_d,
                                               hparams.moe_num_experts,
                                               overhead=overhead,
                                               loss_coef=hparams.moe_loss_coef)
    elif ffn_layer == "local_moe":
        overhead = hparams.moe_overhead_eval
        if hparams.mode == tf.estimator.ModeKeys.TRAIN:
            overhead = hparams.moe_overhead_train
        ret, loss = expert_utils.local_moe(x,
                                           True,
                                           expert_utils.ffn_expert_fn(
                                               hparams.model_d, [hparams.d_ff],
                                               hparams.model_d),
                                           hparams.moe_num_experts,
                                           k=hparams.moe_k,
                                           hparams=hparams)
        losses.append(loss)
        return ret
    else:
        assert ffn_layer == "none"
        return x
Пример #4
0
 def resize(x):
     x["user_id"] = tf.squeeze(x["user_id"], axis=[-1])
     x["item_id"] = tf.squeeze(x["item_id"], axis=[-1])
     return x
Пример #5
0
def crop_mask_in_target_box(masks, boxes, target_boxes, output_size):
  """Crop masks in target boxes.

  Args:
    masks: A tensor with a shape of [batch_size, num_masks, height, width].
    boxes: a float tensor representing box cooridnates that tightly enclose
      masks with a shape of [batch_size, num_masks, 4] in un-normalized
      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
    target_boxes: a float tensor representing target box cooridnates for
      masks with a shape of [batch_size, num_masks, 4] in un-normalized
      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
    output_size: A scalar to indicate the output crop size. It currently only
      supports to output a square shape outputs.

  Returns:
    A 4-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size].
  """
  with tf.name_scope('crop_mask_in_target_box'):
    batch_size, num_masks, height, width = masks.get_shape().as_list()
    masks = tf.reshape(masks, [batch_size*num_masks, height, width, 1])
    # Pad zeros on the boundary of masks.
    masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4)
    masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1])

    # Projects target box locations and sizes to corresponding cropped
    # mask coordinates.
    gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(
        value=boxes, num_or_size_splits=4, axis=2)
    bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
        value=target_boxes, num_or_size_splits=4, axis=2)
    y_transform = (bb_y_min - gt_y_min) * height / (
        gt_y_max - gt_y_min + _EPSILON) + 2
    x_transform = (bb_x_min - gt_x_min) * height / (
        gt_x_max - gt_x_min + _EPSILON) + 2
    h_transform = (bb_y_max - bb_y_min) * width / (
        gt_y_max - gt_y_min + _EPSILON)
    w_transform = (bb_x_max - bb_x_min) * width / (
        gt_x_max - gt_x_min + _EPSILON)

    boundaries = tf.concat(
        [tf.to_float(tf.ones_like(y_transform) * ((height + 4) - 1)),
         tf.to_float(tf.ones_like(x_transform) * ((width + 4) - 1))],
        axis=-1)

    # Reshape tensors to have the right shape for selective_crop_and_resize.
    trasnformed_boxes = tf.concat(
        [y_transform, x_transform, h_transform, w_transform], -1)
    levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
                     [batch_size, 1])

    cropped_masks = selective_crop_and_resize(
        masks,
        trasnformed_boxes,
        levels,
        boundaries,
        output_size,
        sample_offset=0)
    cropped_masks = tf.squeeze(cropped_masks, axis=-1)

  return cropped_masks
Пример #6
0
    def _update_block_mask(self, weights, threshold, mask):
        """Performs block-granular masking of the weights.

    Block pruning occurs only if the block_height or block_width is > 1 and
    if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise
    pruning occurs.
    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold
      mask: The mask from the previous pruning update.

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if block pooling function is not AVG or MAX
    """
        squeezed_weights = tf.squeeze(weights)
        if squeezed_weights.get_shape().ndims != 2 or self._block_dim == [
                1, 1
        ]:
            if self._pruning_method == 'threshold':
                return self._update_mask(weights, threshold)
            # random_cumulative removes at random taking into account previous
            # random modification. random_indepent simply removes at random.
            elif self._pruning_method in [
                    'random_independent', 'random_cumulative'
            ]:
                return self._update_random_mask(weights, mask)
            else:
                raise ValueError('Unknown pruning method: %s' %
                                 self._pruning_method)

        if self._block_pooling_function not in ['AVG', 'MAX']:
            raise ValueError(
                'Unknown pooling function for block sparsity: %s' %
                self._block_pooling_function)

        with tf.name_scope(weights.op.name + '_pruning_ops'):
            abs_weights = tf.abs(squeezed_weights)

            pool_window = [self._block_dim[0], self._block_dim[1]]
            pool_fn = pruning_utils.factorized_pool

            if not self._use_tpu:
                pool_fn = tf.pool
                abs_weights = tf.reshape(abs_weights, [
                    1,
                    abs_weights.get_shape()[0],
                    abs_weights.get_shape()[1], 1
                ])

            pooled_weights = pool_fn(abs_weights,
                                     window_shape=pool_window,
                                     pooling_type=self._block_pooling_function,
                                     strides=pool_window,
                                     padding='SAME',
                                     name=weights.op.name + '_pooled')

            if pooled_weights.get_shape().ndims != 2:
                pooled_weights = tf.squeeze(pooled_weights)

            if self._pruning_method == 'threshold':
                smoothed_threshold, new_mask = self._update_mask(
                    pooled_weights, threshold)
            elif self._pruning_method in [
                    'random_independent', 'random_cumulative'
            ]:
                smoothed_threshold, new_mask = self._update_random_mask(
                    pooled_weights, mask)
            else:
                raise ValueError('Unknown pruning method: %s' %
                                 self._pruning_method)

            ## this is the process that updates the mask.
            updated_mask = pruning_utils.kronecker_product(
                new_mask, tf.ones(self._block_dim))
            sliced_mask = tf.slice(updated_mask, [0, 0], [
                squeezed_weights.get_shape()[0],
                squeezed_weights.get_shape()[1]
            ])

        return smoothed_threshold, tf.reshape(sliced_mask, tf.shape(weights))
Пример #7
0
    def call(self,
             inputs,
             training=True,
             features_only=None,
             pooled_features_only=False):
        """Implementation of call().

    Args:
      inputs: input tensors.
      training: boolean, whether the model is constructed for training.
      features_only: build the base feature network only.
      pooled_features_only: build the base network for features extraction
        (after 1x1 conv layer and global pooling, but before dropout and fc
        head).

    Returns:
      output tensors.
    """
        outputs = None
        self.endpoints = {}
        reduction_idx = 0
        # Calls Stem layers
        with tf.name_scope('stem'):
            outputs = self._relu_fn(
                self._bn0(self._conv_stem(inputs), training=training))
        logging.info('Built stem layers with output shape: %s', outputs.shape)
        self.endpoints['stem'] = outputs

        # Calls blocks.
        for idx, block in enumerate(self._blocks):
            is_reduction = False  # reduction flag for blocks after the stem layer
            # If the first block has super-pixel (space-to-depth) layer, then stem is
            # the first reduction point.
            if (block.block_args().super_pixel == 1 and idx == 0):
                reduction_idx += 1
                self.endpoints['reduction_%s' % reduction_idx] = outputs

            elif ((idx == len(self._blocks) - 1)
                  or self._blocks[idx + 1].block_args().strides[0] > 1):
                is_reduction = True
                reduction_idx += 1

            with tf.name_scope('blocks_%s' % idx):
                survival_prob = self._global_params.survival_prob
                if survival_prob:
                    drop_rate = 1.0 - survival_prob
                    survival_prob = 1.0 - drop_rate * float(idx) / len(
                        self._blocks)
                    logging.info('block_%s survival_prob: %s', idx,
                                 survival_prob)
                outputs = block.call(outputs,
                                     training=training,
                                     survival_prob=survival_prob)
                self.endpoints['block_%s' % idx] = outputs
                if is_reduction:
                    self.endpoints['reduction_%s' % reduction_idx] = outputs
                if block.endpoints:
                    for k, v in six.iteritems(block.endpoints):
                        self.endpoints['block_%s/%s' % (idx, k)] = v
                        if is_reduction:
                            self.endpoints['reduction_%s/%s' %
                                           (reduction_idx, k)] = v
        self.endpoints['features'] = outputs

        if not features_only:
            # Calls final layers and returns logits.
            with tf.name_scope('head'):
                outputs = self._relu_fn(
                    self._bn1(self._conv_head(outputs), training=training))
                self.endpoints['head_1x1'] = outputs

                if self._global_params.local_pooling:
                    shape = outputs.get_shape().as_list()
                    kernel_size = [
                        1, shape[self._spatial_dims[0]],
                        shape[self._spatial_dims[1]], 1
                    ]
                    outputs = tf.nn.avg_pool(outputs,
                                             ksize=kernel_size,
                                             strides=[1, 1, 1, 1],
                                             padding='VALID')
                    self.endpoints['pooled_features'] = outputs
                    if not pooled_features_only:
                        if self._dropout:
                            outputs = self._dropout(outputs, training=training)
                        self.endpoints['global_pool'] = outputs
                        if self._fc:
                            outputs = tf.squeeze(outputs, self._spatial_dims)
                            outputs = self._fc(outputs)
                        self.endpoints['head'] = outputs
                else:
                    outputs = self._avg_pooling(outputs)
                    self.endpoints['pooled_features'] = outputs
                    if not pooled_features_only:
                        if self._dropout:
                            outputs = self._dropout(outputs, training=training)
                        self.endpoints['global_pool'] = outputs
                        if self._fc:
                            outputs = self._fc(outputs)
                        self.endpoints['head'] = outputs
        return outputs
Пример #8
0
def position_sensitive_crop_regions(image,
                                    boxes,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool):
  """Position-sensitive crop and pool rectangular regions from a feature grid.

  The output crops are split into `spatial_bins_y` vertical bins
  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
  and a horizontal bin the output values are gathered by performing
  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
  channels of the image. This reduces `depth` by a factor of
  `(spatial_bins_y * spatial_bins_x)`.

  When global_pool is True, this function implements a differentiable version
  of position-sensitive RoI pooling used in
  [R-FCN detection system](https://arxiv.org/abs/1605.06409).

  When global_pool is False, this function implements a differentiable version
  of position-sensitive assembling operation used in
  [instance FCN](https://arxiv.org/abs/1603.08678).

  Args:
    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 3-D tensor of shape `[image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: A list of two integers `[crop_height, crop_width]`. All
      cropped image patches are resized to this size. The aspect ratio of the
      image content is not preserved. Both `crop_height` and `crop_width` need
      to be positive.
    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
      Represents the number of position-sensitive bins in y and x directions.
      Both values should be >= 1. `crop_height` should be divisible by
      `spatial_bins_y`, and similarly for width.
      The number of image channels should be divisible by
      (spatial_bins_y * spatial_bins_x).
      Suggested value from R-FCN paper: [3, 3].
    global_pool: A boolean variable.
      If True, we perform average global pooling on the features assembled from
        the position-sensitive score maps.
      If False, we keep the position-pooled features without global pooling
        over the spatial coordinates.
      Note that using global_pool=True is equivalent to but more efficient than
        running the function with global_pool=False and then performing global
        average pooling.

  Returns:
    position_sensitive_features: A 4-D tensor of shape
      `[num_boxes, K, K, crop_channels]`,
      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
      where K = 1 when global_pool is True (Average-pooled cropped regions),
      and K = crop_size when global_pool is False.
  Raises:
    ValueError: Raised in four situations:
      `num_spatial_bins` is not >= 1;
      `num_spatial_bins` does not divide `crop_size`;
      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
      `bin_crop_size` is not square when global_pool=False due to the
        constraint in function space_to_depth.
  """
  total_bins = 1
  bin_crop_size = []

  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
    if num_bins < 1:
      raise ValueError('num_spatial_bins should be >= 1')

    if crop_dim % num_bins != 0:
      raise ValueError('crop_size should be divisible by num_spatial_bins')

    total_bins *= num_bins
    bin_crop_size.append(crop_dim // num_bins)

  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
    raise ValueError('Only support square bin crop size for now.')

  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
  spatial_bins_y, spatial_bins_x = num_spatial_bins

  # Split each box into spatial_bins_y * spatial_bins_x bins.
  position_sensitive_boxes = []
  for bin_y in range(spatial_bins_y):
    step_y = (ymax - ymin) / spatial_bins_y
    for bin_x in range(spatial_bins_x):
      step_x = (xmax - xmin) / spatial_bins_x
      box_coordinates = [ymin + bin_y * step_y,
                         xmin + bin_x * step_x,
                         ymin + (bin_y + 1) * step_y,
                         xmin + (bin_x + 1) * step_x,
                        ]
      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2)

  image_crops = []
  for (split, box) in zip(image_splits, position_sensitive_boxes):
    if split.shape.is_fully_defined() and box.shape.is_fully_defined():
      crop = tf.squeeze(
          matmul_crop_and_resize(
              tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0),
              bin_crop_size),
          axis=0)
    else:
      crop = tf.image.crop_and_resize(
          tf.expand_dims(split, 0), box,
          tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size)
    image_crops.append(crop)

  if global_pool:
    # Average over all bins.
    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
    # Then average over spatial positions within the bins.
    position_sensitive_features = tf.reduce_mean(
        position_sensitive_features, [1, 2], keepdims=True)
  else:
    # Reorder height/width to depth channel.
    block_size = bin_crop_size[0]
    if block_size >= 2:
      image_crops = [tf.space_to_depth(
          crop, block_size=block_size) for crop in image_crops]

    # Pack image_crops so that first dimension is for position-senstive boxes.
    position_sensitive_features = tf.stack(image_crops, axis=0)

    # Unroll the position-sensitive boxes to spatial positions.
    position_sensitive_features = tf.squeeze(
        tf.batch_to_space_nd(position_sensitive_features,
                             block_shape=[1] + num_spatial_bins,
                             crops=tf.zeros((3, 2), dtype=tf.int32)),
        axis=[0])

    # Reorder back the depth channel.
    if block_size >= 2:
      position_sensitive_features = tf.depth_to_space(
          position_sensitive_features, block_size=block_size)

  return position_sensitive_features
Пример #9
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 attention_mask=None,
                 token_weights=None,
                 custom_attention_layer=None,
                 token_type_ids=None,
                 extra_embeddings=None,
                 use_position_embeddings=True,
                 reset_position_index_per_cell=False,
                 scope=None):
        """Constructor for BertModel.

    Args:
      config: `BertConfig` instance.
      is_training: bool. true for training model, false for eval model. Controls
        whether dropout will be applied.
      input_ids: int32 Tensor of shape [batch_size, seq_length].
      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
      attention_mask: (optional) float32 Tensor of shape
        [batch_size, seq_length, seq_length].
      token_weights: (optional) float32 Tensor of shape
        [batch_size, seq_length] in [0,1].
      custom_attention_layer: (optional) function with the same signature as
        `attention_layer` in order to replace it for sparse alternatives.
      token_type_ids: (optional) nested structure of int32 Tensors of shape
        [batch_size, seq_length].
      extra_embeddings: (optional) float32 Tensor of shape [batch_size, seq_len,
        embedding_dim]. Additional embeddings concatenated with all the other
        embeddings.
      use_position_embeddings: (optional) bool. Whether to use position
        embeddings.
      reset_position_index_per_cell: bool. Whether to restart position index
        when a new cell starts.
      scope: (optional) variable scope. Defaults to "bert".

    Raises:
      ValueError: The config is invalid or one of the input tensor shapes
        is invalid.
    """
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        input_shape = get_shape_list(input_ids, expected_rank=2)
        batch_size = input_shape[0]
        seq_length = input_shape[1]

        if input_mask is None:
            input_mask = tf.ones(shape=[batch_size, seq_length],
                                 dtype=tf.int32)
        if token_weights is not None:
            input_mask = token_weights * tf.cast(input_mask, dtype=tf.float32)

        if token_type_ids is None:
            token_type_ids = tf.zeros(shape=[batch_size, seq_length],
                                      dtype=tf.int32)

        with tf.variable_scope(scope, default_name="bert"):
            with tf.variable_scope("embeddings"):
                # Perform embedding lookup on the word ids.
                (self.embedding_output,
                 self.embedding_table) = embedding_lookup(
                     input_ids=input_ids,
                     vocab_size=config.vocab_size,
                     embedding_size=config.hidden_size,
                     initializer_range=config.initializer_range,
                     word_embedding_name="word_embeddings")

                # Add positional embeddings and token type embeddings, then layer
                # normalize and perform dropout.
                self.embedding_output = embedding_postprocessor(
                    input_tensor=self.embedding_output,
                    use_token_type=True,
                    token_type_ids=token_type_ids,
                    token_type_vocab_size=config.type_vocab_size,
                    token_type_embedding_name="token_type_embeddings",
                    use_position_embeddings=use_position_embeddings,
                    reset_position_index_per_cell=reset_position_index_per_cell,
                    position_embedding_name="position_embeddings",
                    initializer_range=config.initializer_range,
                    max_position_embeddings=config.max_position_embeddings,
                    extra_embeddings=extra_embeddings,
                    dropout_prob=config.hidden_dropout_prob)

            with tf.variable_scope("encoder"):
                # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
                # mask of shape [batch_size, seq_length, seq_length] which is used
                # for the attention scores.
                if attention_mask is None:
                    attention_mask = create_attention_mask_from_input_mask(
                        input_ids, input_mask)

                # Run the stacked transformer.
                # `sequence_output` shape = [batch_size, seq_length, hidden_size].
                self.all_encoder_layers, self.all_attention_probs = transformer_model(
                    input_tensor=self.embedding_output,
                    attention_mask=attention_mask,
                    custom_attention_layer=custom_attention_layer,
                    hidden_size=config.hidden_size,
                    num_hidden_layers=config.num_hidden_layers,
                    num_attention_heads=config.num_attention_heads,
                    intermediate_size=config.intermediate_size,
                    intermediate_act_fn=get_activation(config.hidden_act),
                    hidden_dropout_prob=config.hidden_dropout_prob,
                    attention_probs_dropout_prob=config.
                    attention_probs_dropout_prob,
                    initializer_range=config.initializer_range,
                    do_return_all_layers=True,
                    do_return_attention_probs=True,
                    softmax_temperature=config.softmax_temperature)

            self.sequence_output = self.all_encoder_layers[-1]
            # The "pooler" converts the encoded sequence tensor of shape
            # [batch_size, seq_length, hidden_size] to a tensor of shape
            # [batch_size, hidden_size]. This is necessary for segment-level
            # (or segment-pair-level) classification tasks where we need a fixed
            # dimensional representation of the segment.
            with tf.variable_scope("pooler"):
                # We "pool" the model by simply taking the hidden state corresponding
                # to the first token. We assume that this has been pre-trained
                first_token_tensor = tf.squeeze(self.sequence_output[:,
                                                                     0:1, :],
                                                axis=1)
                self.pooled_output = tf.layers.dense(
                    first_token_tensor,
                    config.hidden_size,
                    activation=tf.tanh,
                    kernel_initializer=create_initializer(
                        config.initializer_range))
Пример #10
0
 def call(self, inputs):
   out = tf.squeeze(self.dense(inputs), axis=-1)
   return out
Пример #11
0
def direction_net_translation(src_img,
                              trt_img,
                              rotation_gt,
                              translation_gt,
                              fov_gt,
                              rotation_pred,
                              derotate_both=False):
    """Build the computation graph to train the DirectionNet-T.

  Args:
    src_img: [BATCH, HEIGHT, WIDTH, 3] input source images.
    trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images.
    rotation_gt: [BATCH, 3, 3] ground truth rotation matrices.
    translation_gt: [BATCH, 3] ground truth translation directions.
    fov_gt: [BATCH] the ground truth field of view (degrees) of input images.
    rotation_pred: [BATCH, 3, 3] estimated rotations from DirectionNet-R.
    derotate_both: (bool) transform both input images to a middle frame by half
      the relative rotation between them to cancel out the rotation if true.
      Otherwise, only derotate the target image to the source image's frame.

  Returns:
    A collection of tensors including training ops, loss, and global step count.
  """
    net = model.DirectionNet(1)
    global_step = tf.train.get_or_create_global_step()
    perturbed_rotation = tf.cond(
        tf.less(tf.random_uniform([], 0, 1.0), 0.5),
        lambda: util.perturb_rotation(rotation_gt, [10., 5., 10.]),
        lambda: rotation_pred)

    (transformed_src, transformed_trt) = util.derotation(
        src_img, trt_img, perturbed_rotation, fov_gt, FLAGS.transformed_fov,
        [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both)

    (transformed_src_gt, transformed_trt_gt) = util.derotation(
        src_img, trt_img, rotation_gt, fov_gt, FLAGS.transformed_fov,
        [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both)

    half_derotation = util.half_rotation(perturbed_rotation)
    translation_gt = tf.squeeze(
        tf.matmul(half_derotation,
                  tf.expand_dims(translation_gt, -1),
                  transpose_a=True), -1)
    translation_gt = tf.expand_dims(translation_gt, 1)
    distribution_gt = util.spherical_normalization(util.von_mises_fisher(
        translation_gt, tf.constant(FLAGS.kappa, tf.float32),
        [FLAGS.distribution_height, FLAGS.distribution_width]),
                                                   rectify=False)

    pred = net(transformed_src, transformed_trt, training=True)
    directions, expectation, distribution_pred = util.distributions_to_directions(
        pred)

    direction_loss = losses.direction_loss(directions, translation_gt)
    distribution_loss = tf.constant(FLAGS.alpha,
                                    tf.float32) * losses.distribution_loss(
                                        distribution_pred, distribution_gt)
    spread_loss = tf.cast(FLAGS.beta,
                          tf.float32) * losses.spread_loss(expectation)
    direction_error = tf.reduce_mean(
        tf.acos(
            tf.clip_by_value(tf.reduce_sum(directions * translation_gt, -1),
                             -1., 1.)))

    loss = direction_loss + distribution_loss + spread_loss

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('distribution_loss', distribution_loss)
    tf.summary.scalar('spread_loss', spread_loss)
    tf.summary.scalar('direction_error',
                      util.radians_to_degrees(direction_error))

    tf.summary.image('distribution/translation/ground_truth',
                     distribution_gt,
                     max_outputs=4)
    tf.summary.image('distribution/translation/prediction',
                     distribution_pred,
                     max_outputs=4)

    tf.summary.image('source_image', src_img, max_outputs=4)
    tf.summary.image('target_image', trt_img, max_outputs=4)
    tf.summary.image('transformed_source_image',
                     transformed_src,
                     max_outputs=4)
    tf.summary.image('transformed_target_image',
                     transformed_trt,
                     max_outputs=4)
    tf.summary.image('transformed_source_image_gt',
                     transformed_src_gt,
                     max_outputs=4)
    tf.summary.image('transformed_target_image_gt',
                     transformed_trt_gt,
                     max_outputs=4)

    optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr)
    train_op = optimizer.minimize(loss, global_step=global_step, name='train')
    update_op = net.updates
    return Computation(tf.group([train_op, update_op]), loss, global_step)
Пример #12
0
  def train_step(self):

    def step_fn(inputs):
      """Step functon.

      Args:
        inputs: inputs from data iterator

      Returns:
        a set of variables want to observe in Tensorboard
      """

      net = self.net
      (all_images, labels), (self.probe_images, self.probe_labels) = inputs
      assert len(all_images.shape) == 5
      images, self.aug_images = all_images[:, 0], all_images[:, 1]

      self.images, self.labels = images, labels
      batch_size = int(self.batch_size / self.strategy.num_replicas_in_sync)

      logits = net(images, name='model', reuse=tf.AUTO_REUSE, training=True)
      self.logits = logits

      # other losses
      # initialized first to use self.guessed_label for meta step
      xe_loss, cs_loss = self.unsupervised_loss()

      # meta optimization
      weight, eps, meta_loss, meta_acc = self.meta_optimize()

      ## losses w.r.t new weight and loss
      onehot_labels = tf.one_hot(labels, self.dataset.num_classes)
      onehot_labels = tf.cast(onehot_labels, tf.float32)
      eps_k = tf.reshape(eps, [batch_size, 1])

      mixed_labels = tf.math.add(
          eps_k * onehot_labels, (1 - eps_k) * self.guessed_label,
          name='mixed_labels')
      net_cost = tf.losses.softmax_cross_entropy(
          mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
      # loss with initial weight
      net_loss1 = tf.reduce_mean(net_cost)

      # loss with initial eps
      init_eps = tf.constant(
          [FLAGS.grad_eps_init] * batch_size, dtype=tf.float32)
      init_eps = tf.reshape(init_eps, (-1, 1))
      init_mixed_labels = tf.math.add(
          init_eps * onehot_labels, (1 - init_eps) * self.guessed_label,
          name='init_mixed_labels')

      net_cost2 = tf.losses.softmax_cross_entropy(
          init_mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
      net_loss2 = tf.reduce_sum(tf.math.multiply(net_cost2, weight))

      net_loss = (net_loss1 + net_loss2) / 2

      net_loss = net_loss + tf.add_n([xe_loss, cs_loss])
      net_loss += net.regularization_loss
      net_loss /= self.strategy.num_replicas_in_sync

      # rescale by gpus
      with tf.control_dependencies(net.updates):
        net_grads = tf.gradients(net_loss, net.trainable_variables)
        minimizer_op = self.optimizer.apply_gradients(
            zip(net_grads, net.trainable_variables),
            global_step=self.global_step)

      with tf.control_dependencies([minimizer_op]):
        train_op = self.ema.apply(net.trainable_variables)

      acc_op, acc_update_op = self.acc_func(labels, tf.argmax(logits, axis=1))

      with tf.control_dependencies([train_op, acc_update_op]):
        return (tf.identity(net_loss), tf.identity(xe_loss),
                tf.identity(cs_loss), tf.identity(meta_loss),
                tf.identity(meta_acc), tf.identity(acc_op), tf.identity(weight),
                tf.identity(labels))

    # end of parallel
    (pr_net_loss, pr_xe_loss, pr_cs_loss, pr_metaloss, pr_metaacc, pr_acc,
<<<<<<< HEAD
     pr_weight, pr_labels) = self.strategy.experimental_run_v2(
=======
     pr_weight, pr_labels) = self.strategy.run(
>>>>>>> 644f9f8cbfbc56c33eea7af6eb16db4a79e90bf1
         step_fn,
         args=((next(self.train_input_iterator),
                next(self.probe_input_iterator)),))
    # collect device variables
    weights = self.strategy.unwrap(pr_weight)
    weights = tf.concat(weights, axis=0)
    labels = self.strategy.unwrap(pr_labels)
    labels = tf.concat(labels, axis=0)

    mean_acc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_acc)
    mean_metaacc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_metaacc)
    net_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_net_loss)
    xe_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_xe_loss)
    cs_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_cs_loss)
    meta_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_metaloss)

    # The following add variables for tensorboard visualization
    merges = []
    merges.append(tf.summary.scalar('acc/train', mean_acc))
    merges.append(tf.summary.scalar('loss/xemin', xe_loss))
    merges.append(tf.summary.scalar('loss/consistency', cs_loss))
    merges.append(tf.summary.scalar('loss/net', net_loss))
    merges.append(tf.summary.scalar('loss/meta', meta_loss))
    merges.append(tf.summary.scalar('acc/meta', mean_metaacc))

    zw_inds = tf.squeeze(
        tf.where(tf.less_equal(weights, 0), name='zero_weight_index'))
    merges.append(
        tf.summary.scalar(
            'weights/zeroratio',
            tf.math.divide(
                tf.cast(tf.size(zw_inds), tf.float32),
                tf.cast(tf.size(weights), tf.float32))))

    self.epoch_var = tf.cast(
        self.global_step / self.iter_epoch, tf.float32, name='epoch')
    merges.append(tf.summary.scalar('epoch', self.epoch_var))
    merges.append(tf.summary.scalar('learningrate', self.learning_rate))
    summary = tf.summary.merge(merges)

    return [
        net_loss, meta_loss, xe_loss, cs_loss, mean_acc, mean_metaacc, summary,
        weights
    ]
Пример #13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = GroverModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            pad_token_id=config.pad_token_id,
            chop_off_last_token=True,
        )

        total_loss = model.lm_loss()

        if is_training:
            train_op, train_metrics = optimization_adafactor.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps,
                use_tpu)
            tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        else:
            train_op = None
            train_metrics = {}
            tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map,
             initialized_variable_names) = get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    host_call=construct_scalar_host_call(
                        metric_dict=train_metrics,
                        model_dir=params['model_dir'],
                        prefix='training/'),
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    training_hooks=[
                        tf.train.LoggingTensorHook(
                            {'loss': tf.metrics.mean(total_loss)[1]},
                            every_n_iter=100)
                    ],
                    scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(total_loss):
                loss = tf.metrics.mean(values=total_loss)
                return {
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [total_loss])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            gt_logprobs = tf.squeeze(tf.batch_gather(
                model.log_probs, model.target_ids[:, :, None]),
                                     axis=2)

            # Need top-p required under topp sampling!
            better_than_gt = model.log_probs > gt_logprobs[:, :, None]
            top_p_required = tf.reduce_sum(
                tf.cast(better_than_gt, tf.float32) * tf.exp(model.log_probs),
                axis=2)

            # No top-p sampling for now, since this seems to be too slow on TPUs
            if use_tpu:
                predictions = tf.reshape(
                    tf.random.categorical(logits=model.logits_flat,
                                          num_samples=1),
                    get_shape_list(model.target_ids),
                )
            else:
                # Argmax
                # predictions = tf.math.argmax(model.log_probs, axis=-1, output_type=tf.int32)
                predictions = tf.reshape(
                    _top_p_sample(model.logits_flat, num_samples=1,
                                  p=0.99)['sample'],
                    get_shape_list(model.target_ids),
                )
            pred_logprobs = tf.squeeze(tf.batch_gather(model.log_probs,
                                                       predictions[:, :,
                                                                   None]),
                                       axis=2)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    'gt_logprobs': gt_logprobs,
                    'top_p_required': top_p_required,
                    'predictions': predictions,
                    'pred_logprobs': pred_logprobs,
                    'labels': input_ids
                },
                scaffold_fn=scaffold_fn)
        return output_spec
    def _slow_greedy_infer_guess_and_check(self, features, decode_length):
        assert self._hparams.block_size > 0
        assert self._hparams.force_full_predict
        assert self._hparams.sampling_method == "argmax"
        assert self._decode_hparams.batch_size == 1
        assert self._decode_hparams.block_size > 0
        assert self._decode_hparams.block_size <= self._hparams.block_size
        assert self._decode_hparams.guess_and_check_top_k > 0

        inputs_old = features["inputs"]
        assert "targets" not in features

        assert len(features["inputs"].shape) in [3, 4]
        if len(features["inputs"].shape) < 4:
            features["inputs"] = tf.expand_dims(features["inputs"], 2)

        block_size = self._decode_hparams.block_size
        decode_length += tf.shape(features["inputs"])[1]

        def while_exit_cond(result, length):  # pylint: disable=unused-argument
            return tf.logical_and(
                length < decode_length,
                tf.reduce_all(
                    tf.not_equal(result[:, :length, :, :],
                                 text_encoder.EOS_ID)))

        def infer_step(result, length):
            """Inference step."""
            def print_info(result, length, new_length):
                vocab = self.problem_hparams.vocabulary["targets"]
                tf.logging.info(
                    "length=%s new_length=%s length_diff=%s new_suffix=%s",
                    length,
                    new_length,
                    new_length - length,
                    str([
                        vocab._subtoken_id_to_subtoken_string(index)  # pylint: disable=protected-access
                        for index in result[0, -block_size:, 0,
                                            0][:new_length - length]
                    ]).decode("unicode-escape"),
                )

            features["targets"] = tf.pad(result,
                                         [[0, 0], [0, 1], [0, 0], [0, 0]])
            samples, logits, losses = self.sample(features)  # pylint: disable=unused-variable

            _, top_k_indices = tf.nn.top_k(
                logits[:, :-1, :1, :, :],
                k=self._decode_hparams.guess_and_check_top_k)
            in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices),
                                              tf.expand_dims(result, 4)),
                                     axis=4)

            eos_cumsum = tf.cumsum(tf.to_int32(
                tf.equal(result, text_encoder.EOS_ID)),
                                   axis=1)
            after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0)

            correct = tf.logical_and(in_top_k, tf.logical_not(after_eos))
            correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1)
            perfect_cumsum = 1 + tf.range(tf.shape(correct)[1])
            for axis in [0, 2, 3]:
                perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis)

            new_length = tf.reduce_sum(tf.to_int32(
                tf.equal(correct_cumsum, perfect_cumsum)),
                                       axis=1)
            new_length = tf.squeeze(new_length, axis=[0, 1, 2])
            new_length = tf.minimum(new_length, decode_length)

            new_result = tf.concat([
                result[:, :new_length, :, :],
                tf.reshape(samples[:, new_length, :block_size, :],
                           [1, block_size, 1, 1])
            ],
                                   axis=1)

            with tf.control_dependencies(
                [tf.py_func(print_info, [result, length, new_length], [])]):
                new_result = tf.identity(new_result)

            return new_result, new_length

        result = tf.zeros((1, 0, 1, 1), dtype=tf.int64)
        length = tf.squeeze(tf.zeros(1, dtype=tf.int32))

        result, length = tf.while_loop(while_exit_cond,
                                       infer_step, [result, length],
                                       shape_invariants=[
                                           tf.TensorShape([1, None, 1, 1]),
                                           tf.TensorShape([]),
                                       ],
                                       back_prop=False,
                                       parallel_iterations=1)

        result = result[:, :length, :, :]

        features["inputs"] = inputs_old

        return {
            "outputs": result,
            "scores": None,
        }
Пример #15
0
    def mtf_model_fn(self, features, mesh):
        features = copy.copy(features)
        tf.logging.info("features = %s" % features)
        hparams = self._hparams
        activation_dtype = self.set_activation_type()
        is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN

        # Declare all the dimensions
        batch_dim = mtf.Dimension("batch", hparams.batch_size)
        hidden_dim = mtf.Dimension("hidden", hparams.hidden_size)
        filter_dim = mtf.Dimension("filters", hparams.filter_sizes[0])
        rows_dim = mtf.Dimension("rows_size", hparams.rows_size)
        cols_dim = mtf.Dimension("cols_size", hparams.cols_size)
        row_blocks_dim = mtf.Dimension("row_blocks", hparams.row_blocks)
        col_blocks_dim = mtf.Dimension("col_blocks", hparams.col_blocks)
        classes_dim = mtf.Dimension("classes", 10)
        channels_dim = mtf.Dimension("channels", 3)
        one_channel_dim = mtf.Dimension("one_channel", 1)

        inputs = features["inputs"]
        x = mtf.import_tf_tensor(
            mesh,
            tf.reshape(inputs, [
                hparams.batch_size, hparams.row_blocks,
                hparams.rows_size // hparams.row_blocks, hparams.col_blocks,
                hparams.num_channels * hparams.cols_size // hparams.col_blocks,
                hparams.num_channels
            ]),
            mtf.Shape([
                batch_dim, row_blocks_dim, rows_dim, col_blocks_dim, cols_dim,
                channels_dim
            ]))
        x = mtf.transpose(x, [
            batch_dim, row_blocks_dim, col_blocks_dim, rows_dim, cols_dim,
            channels_dim
        ])

        x = mtf.to_float(x)
        x = mtf.layers.conv2d_with_blocks(x,
                                          filter_dim,
                                          filter_size=[3, 3],
                                          strides=[1, 1],
                                          padding="SAME",
                                          h_blocks_dim=None,
                                          w_blocks_dim=col_blocks_dim,
                                          name="initial_filter")

        x = batch_norm_relu(x, is_training)

        # Conv blocks
        # [block - strided block layer - strided block layer] x n
        for layer in range(hparams.num_layers):
            layer_name = "block_layer_%d" % layer
            with tf.variable_scope(layer_name):
                # Residual block layer
                x = block_layer(inputs=x,
                                filters=hparams.filter_sizes[0],
                                blocks=hparams.layer_sizes[0],
                                strides=[1, 1],
                                is_training=is_training,
                                name="block_layer1",
                                row_blocks_dim=None,
                                col_blocks_dim=None)
                x = block_layer(inputs=x,
                                filters=hparams.filter_sizes[1],
                                blocks=hparams.layer_sizes[1],
                                strides=[1, 1],
                                is_training=is_training,
                                name="block_layer2",
                                row_blocks_dim=None,
                                col_blocks_dim=None)
                x = block_layer(inputs=x,
                                filters=hparams.filter_sizes[2],
                                blocks=hparams.layer_sizes[2],
                                strides=[1, 1],
                                is_training=is_training,
                                name="block_layer3",
                                row_blocks_dim=None,
                                col_blocks_dim=None)

        # Calculate the logits and loss.
        out = x
        outputs = mtf.layers.dense(out,
                                   hidden_dim,
                                   reduced_dims=out.shape.dims[-5:],
                                   activation=mtf.relu,
                                   name="dense")

        # We assume fixed vocab size for targets
        labels = tf.squeeze(tf.to_int32(features["targets"]), [2, 3])
        labels = mtf.import_tf_tensor(mesh,
                                      tf.reshape(labels, [hparams.batch_size]),
                                      mtf.Shape([batch_dim]))

        logits = mtf.layers.dense(outputs, classes_dim, name="logits")
        soft_targets = mtf.one_hot(labels, classes_dim, dtype=activation_dtype)
        loss = mtf.layers.softmax_cross_entropy_with_logits(
            logits, soft_targets, classes_dim)

        # Reshape logits so it doesn't break inside t2t.
        logits = mtf.reshape(
            logits, mtf.Shape([batch_dim, one_channel_dim, classes_dim]))
        loss = mtf.reduce_mean(loss)
        return logits, loss
Пример #16
0
  def _plot(self, data, res, name=None):

    img = self._img(data)
    label = self._label(data)
    if label is not None:
      label_one_hot = tf.one_hot(label, depth=self._n_classes)

    _render_activations = functools.partial(  # pylint:disable=invalid-name
        plot.render_activations,
        height=int(img.shape[1]),
        pixels_per_caps=3,
        cmap='viridis')

    mass_explained_by_capsule = tf.reduce_sum(res.posterior_mixing_probs, 1)
    normalized_mass_expplained_by_capsule = mass_explained_by_capsule / tf.reduce_max(
        mass_explained_by_capsule, -1, keepdims=True)  # pylint:disable=line-too-long

    posterior_caps_activation = _render_activations(
        normalized_mass_expplained_by_capsule)  # pylint:disable=line-too-long
    prior_caps_activation = _render_activations(res.caps_presence_prob)

    is_from_capsule = snt.BatchApply(_render_activations)(
        res.posterior_mixing_probs)

    green = res.top_down_rec
    rec_red = res.rec_mode
    rec_green = green.pdf.mode()

    flat_per_caps_rec = res.top_down_per_caps_rec.pdf.mode()
    shape = res.vote.shape[:2].concatenate(flat_per_caps_rec.shape[1:])
    per_caps_rec = tf.reshape(flat_per_caps_rec, shape)
    per_caps_rec = plot.concat_images(
        tf.unstack(per_caps_rec, axis=1), 1, vertical=False)
    one_image = tf.reduce_mean(
        self._img(data, self._prep), axis=-1, keepdims=True)
    one_rec = tf.reduce_mean(rec_red, axis=-1, keepdims=True)
    diff = tf.concat([one_image, one_rec, tf.zeros_like(one_image)], -1)

    used_templates = tf.reduce_mean(res.used_templates, axis=-1, keepdims=True)
    green_templates = tf.reduce_mean(
        green.transformed_templates, axis=-1, keepdims=True)
    templates = tf.concat(
        [used_templates, green_templates,
         tf.zeros_like(used_templates)], -1)

    templates = tf.concat(
        [templates,
         tf.ones_like(templates[:, :, :, :1]), is_from_capsule], 3)

    all_imgs = [
        img, rec_red, rec_green, diff, prior_caps_activation,
        tf.zeros_like(rec_red[:, :, :1]), posterior_caps_activation,
        per_caps_rec
    ] + list(tf.unstack(templates, axis=1))

    for i, img in enumerate(all_imgs):
      if img.shape[-1] == 1:
        all_imgs[i] = tf.image.grayscale_to_rgb(img)

    img_with_templates = plot.concat_images(all_imgs, 1, vertical=False)

    def render_corr(x, y):
      corr = abs(plot.correlation(x, y))
      rendered_corr = tf.expand_dims(_render_activations(corr), 0)
      return plot.concat_images(
          tf.unstack(rendered_corr, axis=1), 3, vertical=False)

    if label is not None:

      posterior_label_corr = render_corr(normalized_mass_expplained_by_capsule,
                                         label_one_hot)
      prior_label_corr = render_corr(res.caps_presence_prob, label_one_hot)
      label_corr = plot.concat_images([prior_label_corr, posterior_label_corr],
                                      3,
                                      vertical=True)
    else:
      label_corr = tf.zeros_like(img)

    n_examples = min(int(shape[0]), 16)
    plot_params = dict(
        img_with_templates=dict(
            grid_height=n_examples,
            zoom=3.,
        ))

    templates = res.templates
    if len(templates.shape) == 5:
      if templates.shape[0] == 1:
        templates = tf.squeeze(templates, 0)

      else:
        templates = templates[:n_examples]
        templates = plot.concat_images(
            tf.unstack(templates, axis=1), 1, vertical=False)
        plot_params['templates'] = dict(grid_height=n_examples)

    plot_dict = dict(
        templates=templates,
        img_with_templates=img_with_templates[:n_examples],
        label_corr=label_corr,
    )

    return plot_dict, plot_params
def pc_encoder(point_cloud, nasmples, is_training, bn_decay=None):
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    point_dim = point_cloud.get_shape()[2].value
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
    point_cloud_transformed = tf.matmul(point_cloud, transform)

    point_cloud_transformed = tf.expand_dims(point_cloud_transformed, -1)
    nn_dis, idx_batch = tf_util.get_knn(point_cloud, 12)

    # Encoder
    net = tf_util.conv2d(point_cloud_transformed, 64, [1, point_dim],
                         padding='VALID', stride=[1, 1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 64, [1, 1],
                         padding='VALID', stride=[1, 1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)
    point_feat_1 = tf_util.conv2d(net, 128, [1, 1],
                                  padding='VALID', stride=[1, 1],
                                  bn=True, is_training=is_training,
                                  scope='conv3', bn_decay=bn_decay)

    print('------------ convPN_1 ------------')
    point_feat = tf_util.conv2d(point_feat_1, 256, [1, 1],
                                padding='VALID', stride=[1, 1],
                                bn=True, is_training=is_training,
                                scope='conv4', bn_decay=bn_decay)
    point_feat = tf_util.conv2d(point_feat, 256, [1, 1],
                                padding='VALID', stride=[1, 1],
                                bn=True, is_training=is_training,
                                scope='conv5', bn_decay=bn_decay)
    feature = tf.squeeze(point_feat, squeeze_dims=2)
    knn_feat = tf_util.cuda_maxpooling(feature, idx_batch)
    knn_feat = tf.expand_dims(knn_feat, axis=2)
    point_feat_2 = tf.concat([point_feat, knn_feat], axis=-1)  # 32 256 1 256

    print('------------ convPN_2 ------------')
    point_feat = tf_util.conv2d(point_feat_2, 256, [1, 1],
                                padding='VALID', stride=[1, 1],
                                bn=True, is_training=is_training,
                                scope='conv6', bn_decay=bn_decay)
    point_feat = tf_util.conv2d(point_feat, 256, [1, 1],
                                padding='VALID', stride=[1, 1],
                                bn=True, is_training=is_training,
                                scope='conv7', bn_decay=bn_decay)
    feature = tf.squeeze(point_feat, squeeze_dims=2)
    knn_feat = tf_util.cuda_maxpooling(feature, idx_batch)
    knn_feat = tf.expand_dims(knn_feat, axis=2)
    point_feat_3 = tf.concat([point_feat, knn_feat], axis=-1)  # 32 256 1 512
    mix_feature = tf.concat([point_feat_1, point_feat_2, point_feat_3], axis=-1)

    # ----------- maxpooling--------------
    global_feature = tf_util.max_pool2d(mix_feature, [num_point, 1], padding='VALID', scope='maxpool_1')
    net = tf.reshape(global_feature, [batch_size, -1])
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc00', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc01', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc02', bn_decay=bn_decay)
    net = tf.reshape(net, [batch_size, -1])
    return net
Пример #18
0
  def body(self, features):
    # Remove dropout if not training
    hparams = self._hparams
    ps_devices = self._ps_devices
    assert hparams.num_model_shards % len(ps_devices) == 0
    shards_per_device = hparams.num_model_shards // len(ps_devices)
    model_devices = [ps_devices[i // shards_per_device]
                     for i in range(hparams.num_model_shards)]
    print("model_devices = %s" % model_devices)
    mp = expert_utils.Parallelism(model_devices, reuse=False)
    vocab_size = self._problem_hparams.vocabulary["targets"].vocab_size
    # squeeze out channels, heights
    targets = features["targets_raw"]
    targets = tf.squeeze(targets, 3)
    targets = tf.squeeze(targets, 2)
    shifted_targets = common_layers.shift_right_2d(targets)
    # Bypass the symbol modality and use a different embedding on each shard.
    decoder_input = mp(
        common_layers.embedding, shifted_targets, vocab_size,
        hparams.hidden_size,
        multiplier=hparams.hidden_size**0.5,
        symbol_dropout_rate=hparams.symbol_dropout)
    decoder_self_attention_bias = mp(
        common_attention.attention_bias_lower_triangle,
        tf.shape(targets)[1])
    if "targets_segmentation" in features:
      # "Packed" dataset - keep the examples from seeing each other.
      targets_segmentation = features["targets_segmentation"]
      targets_position = features["targets_position"]
      decoder_self_attention_bias = mp(
          tf.add, decoder_self_attention_bias,
          mp(common_attention.attention_bias_same_segment,
             targets_segmentation, targets_segmentation))
    else:
      targets_position = None

    if hparams.pos == "timing":
      if targets_position is None:
        decoder_input = mp(common_attention.add_timing_signal_1d, decoder_input)
      else:
        decoder_input = mp(
            common_attention.add_timing_signal_1d_given_position,
            decoder_input, targets_position)

    decoder_input = mp(
        tf.nn.dropout, decoder_input,
        1.0 - hparams.layer_prepostprocess_dropout)
    decoder_output, extra_loss = _super_stack(
        decoder_input, decoder_self_attention_bias, hparams, mp)
    # Bypass the symbol modality and compute logits directly.
    # We compute a different set of logits on each shard, and sum them.
    logits = mp(tf.layers.dense, decoder_output, vocab_size, name="logits")
    logits = expert_utils.all_reduce_ring(logits, mp)
    logits = mp(tf.multiply, logits, mp.n ** -0.5)
    # We now have identical logits on all shards.
    # Shard 0 gets returned to the estimator.
    logits_shard_0 = logits[0]
    logits_shard_0 = tf.expand_dims(logits_shard_0, 2)
    logits_shard_0 = tf.expand_dims(logits_shard_0, 3)
    # On each device, we compute the loss for a part of the batch.
    # This is faster than computing the whole loss on one shard.
    mp, logits = expert_utils.reduce_by_device(mp, logits, lambda l: l[0])
    def _loss_for_shard(logits, targets, shard):
      if mp.n > 1:
        logits = common_layers.approximate_split(logits, mp.n, 0)[shard]
        targets = common_layers.approximate_split(targets, mp.n, 0)[shard]
      return common_layers.padded_cross_entropy(
          logits, targets, hparams.label_smoothing)
    num, denom = mp(_loss_for_shard, logits, targets, range(mp.n))
    # override training loss so that it is not computed externally.
    losses = {"training": tf.add_n(num) / tf.add_n(denom)}
    if extra_loss is not None:
      losses["extra"] = extra_loss
    return logits_shard_0, losses
Пример #19
0
    def build():
        """Builds the Tensorflow graph."""
        inputs, labels, lengths = None, None, None

        if mode in ('train', 'eval'):
            if isinstance(no_event_label, numbers.Number):
                label_shape = []
            else:
                label_shape = [len(no_event_label)]
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                label_shape=label_shape,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        if isinstance(encoder_decoder,
                      magenta.music.OneHotIndexEventSequenceEncoderDecoder):
            expanded_inputs = tf.one_hot(
                tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
                encoder_decoder.input_depth)
        else:
            expanded_inputs = inputs

        dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

        cell = make_rnn_cell(hparams.rnn_layer_sizes,
                             dropout_keep_prob=dropout_keep_prob,
                             attn_length=hparams.attn_length,
                             residual_connections=hparams.residual_connections)

        initial_state = cell.zero_state(hparams.batch_size, tf.float32)

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 expanded_inputs,
                                                 sequence_length=lengths,
                                                 initial_state=initial_state,
                                                 swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        if isinstance(num_classes, numbers.Number):
            num_logits = num_classes
        else:
            num_logits = sum(num_classes)
        logits_flat = tf_slim.layers.linear(outputs_flat, num_logits)

        if mode in ('train', 'eval'):
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            if isinstance(num_classes, numbers.Number):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
                predictions_flat = tf.argmax(logits_flat, axis=1)
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax_cross_entropy = []
                predictions = []
                for i in range(len(num_classes)):
                    softmax_cross_entropy.append(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            labels=labels_flat[:, i],
                            logits=logits_flat[:, logits_offsets[i]:
                                               logits_offsets[i + 1]]))
                    predictions.append(
                        tf.argmax(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            axis=1))
                predictions_flat = tf.stack(predictions, 1)

            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            # Compute the total number of time steps across all sequences in the
            # batch. For some models this will be different from the number of RNN
            # steps.
            def batch_labels_to_num_steps(batch_labels, lengths):
                num_steps = 0
                for labels, length in zip(batch_labels, lengths):
                    num_steps += encoder_decoder.labels_to_num_steps(
                        labels[:length])
                return np.float32(num_steps)

            num_steps = tf.py_func(batch_labels_to_num_steps,
                                   [labels, lengths], tf.float32)

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                loss_per_step = tf.reduce_sum(
                    softmax_cross_entropy) / num_steps
                perplexity_per_step = tf.exp(loss_per_step)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = tf_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                    'metrics/loss_per_step': loss_per_step,
                    'metrics/perplexity_per_step': perplexity_per_step,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = tf_slim.metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                        'metrics/loss_per_step':
                        tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) /
                                        num_steps,
                                        weights=num_steps),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])
                vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
                    vars_to_summarize['metrics/loss_per_step'])

            for var_name, var_value in vars_to_summarize.items():
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            if isinstance(num_classes, numbers.Number):
                softmax_flat = tf.nn.softmax(
                    tf.div(logits_flat, tf.fill([num_classes], temperature)))
                softmax = tf.reshape(softmax_flat,
                                     [hparams.batch_size, -1, num_classes])
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax = []
                for i in range(len(num_classes)):
                    sm = tf.nn.softmax(
                        tf.div(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            tf.fill([num_classes[i]], temperature)))
                    sm = tf.reshape(sm,
                                    [hparams.batch_size, -1, num_classes[i]])
                    softmax.append(sm)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf.nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf.nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
Пример #20
0
 def build_cnn18(self):
     x = self.placeholders['img_inp']
     x = tf.expand_dims(x, 0)
     #224 224
     x = tflearn.layers.conv.conv_2d(x,
                                     16, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     16, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x0 = x
     x = tflearn.layers.conv.conv_2d(x,
                                     32, (3, 3),
                                     strides=2,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     #112 112
     x = tflearn.layers.conv.conv_2d(x,
                                     32, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     32, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x1 = x
     x = tflearn.layers.conv.conv_2d(x,
                                     64, (3, 3),
                                     strides=2,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     #56 56
     x = tflearn.layers.conv.conv_2d(x,
                                     64, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     64, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x2 = x
     x = tflearn.layers.conv.conv_2d(x,
                                     128, (3, 3),
                                     strides=2,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     #28 28
     x = tflearn.layers.conv.conv_2d(x,
                                     128, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     128, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x3 = x
     x = tflearn.layers.conv.conv_2d(x,
                                     256, (5, 5),
                                     strides=2,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     #14 14
     x = tflearn.layers.conv.conv_2d(x,
                                     256, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     256, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x4 = x
     x = tflearn.layers.conv.conv_2d(x,
                                     512, (5, 5),
                                     strides=2,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     #7 7
     x = tflearn.layers.conv.conv_2d(x,
                                     512, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     512, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x = tflearn.layers.conv.conv_2d(x,
                                     512, (3, 3),
                                     strides=1,
                                     activation='relu',
                                     weight_decay=1e-5,
                                     regularizer='L2')
     x5 = x
     #updata image feature
     self.placeholders.update({
         'img_feat':
         [tf.squeeze(x2),
          tf.squeeze(x3),
          tf.squeeze(x4),
          tf.squeeze(x5)]
     })
def attention(query, attend_in, single_dot_in, elements_mask, do_softmax,
              attention_method, flags):
    """Returns the attention mask using the method described by attention_method.

  Args:
    query: Query vector. Shape: [batch_size, query_size]
    attend_in: Values for each item to use for attention. [batch_size *
      elements_per_query, attend_size]
    single_dot_in: Values for each item to use for attention in single dot mode.
      [batch_size * elements_per_query, single_dot_attend_size]
      single_dot_attend_size must be greater than query_size
    elements_mask: Mask for what elements items exist in the input.
    do_softmax: Whether to put the output through softmax.
    attention_method: The attention method to use.
    flags: The input Flags. (Currently unused)

  Returns:
    The attention mask.
  """
    del flags

    elements_item_size = attend_in.shape[1]
    # Use different weights for DNN ontop of Ref Exp, and Elements
    if 'sepDotAtten' == attention_method:
        elements_enc_attend = tf.layers.dense(attend_in, elements_item_size)
        query_attend = tf.layers.dense(query, elements_item_size)

        attention_mask = atten_metric(elements_enc_attend, query_attend,
                                      elements_mask, do_softmax)

    # Use the same weights for DNN ontop of Ref Exp, and Elements
    if 'singDotAtten' == attention_method:
        elements_enc_attend = single_dot_in

        query_attend = tf.concat([
            query,
            tf.zeros([
                tf.shape(query)[0],
                tf.shape(single_dot_in)[1] - tf.shape(query)[1]
            ])
        ], 1)

        # Concat along batch dim, so same weights used for each.
        all_attend = tf.concat([elements_enc_attend, query_attend], 0)
        all_attend = tf.layers.dense(all_attend, elements_item_size,
                                     tf.nn.relu)
        all_attend = tf.layers.dense(all_attend, elements_item_size)

        elements_enc_attend, query_attend = tf.split(
            all_attend,
            [tf.shape(elements_enc_attend)[0],
             tf.shape(query_attend)[0]])

        attention_mask = atten_metric(elements_enc_attend, query_attend,
                                      elements_mask, do_softmax)

    # Combine Ref Exp, and Elements before input to DNN
    if 'combAtten' == attention_method:
        query_tile = tile_ref_enc_to_elements(query, elements_mask)
        attention_mask = tf.concat([attend_in, query_tile], 1)
        attention_mask = tf.layers.dense(attention_mask, elements_item_size,
                                         tf.nn.relu)
        attention_mask = tf.layers.dense(attention_mask, 1)
        attention_mask = tf.squeeze(attention_mask, 1)

        if do_softmax:
            attention_mask = atten_softmax(attention_mask, elements_mask)

    tf.summary.histogram('attention_mask', attention_mask)

    return attention_mask
Пример #22
0
    def test_squeeze(self):
        input = tf.placeholder(shape=(4, 32, 32, 1), dtype=tf.float32)
        output = tf.squeeze(input, axis=[3])

        self._test_conversion('squeeze', [input], [output])
    def mtf_model_fn(self, features, mesh):
        features = copy.copy(features)
        tf.logging.info("features = %s" % features)
        hparams = self._hparams
        activation_dtype = self.activation_type

        # We assume fixed vocab size for targets
        targets = tf.to_int32(features["targets"])

        # Image preprocessing, reshape into a 1D sequence and shift right.
        length = hparams.img_len * hparams.img_len * hparams.num_channels
        targets = tf.reshape(targets, [hparams.batch_size, length])
        shifted_targets = common_layers.shift_right_2d(targets)

        # Declare all the dimensions
        batch_dim = mtf.Dimension("batch", hparams.batch_size)

        def import_to_batch_by_length(x, name):
            return mtf.import_tf_tensor(mesh,
                                        x,
                                        mtf.Shape([batch_dim,
                                                   self.length_dim]),
                                        name=name)

        targets = import_to_batch_by_length(targets, "targets")
        shifted_targets = import_to_batch_by_length(shifted_targets,
                                                    "shifted_targets")

        extra_losses = []

        # Create targets content and position embeddings.
        # Create embedding var for targets and positions and do a gather.
        targets_embedding_var = mtf.get_variable(
            mesh,
            "targets_embedding",
            mtf.Shape([self.targets_vocab_dim, self.model_dim]),
            initializer=tf.random_normal_initializer(),
            activation_dtype=activation_dtype)

        x = mtf.gather(targets_embedding_var, shifted_targets,
                       self.targets_vocab_dim)

        # Add positional embeddings
        x += mtf.reshape(self.create_positional_emb_2d(targets),
                         [self.length_dim, self.model_dim])

        # If conditional and input is given, add the input embedding to the target.
        # TODO(nikip): Verify conditional.
        if self.has_input and not hparams.unconditional:
            inputs = tf.squeeze(tf.to_int32(features["inputs"]), [2, 3])
            inputs = import_to_batch_by_length(inputs, "inputs")

            # Input embeddings
            inputs_embedding_var = mtf.layers.embedding(
                mesh,
                "input_embedding",
                mtf.Shape([self.inputs_vocab_dim, self.model_dim]),
                activation_dtype=activation_dtype)
            inputs_emb = mtf.gather(inputs_embedding_var, inputs,
                                    self.inputs_vocab_dim)
            x += inputs_emb

        # Image Transformer Decoder
        # [ self attention - ffn - residual + dropout] x n
        if hparams.attention_type == "local1d_spatial":
            decoder_output = local_attention1d_spatial_decoder(
                x, self.kv_dim, self.heads_dim, self.feedforward_dim, hparams)
        elif hparams.attention_type == "local2d_spatial":
            decoder_output = local_attention2d_spatial_decoder(
                x, self.kv_dim, self.heads_dim, self.feedforward_dim, hparams)
        elif hparams.attention_type == "local1d":
            decoder_output = local_attention1d_masked_decoder(
                x, self.kv_dim, self.heads_dim, self.feedforward_dim, hparams)
        else:
            raise ValueError("Invalid attention type.")

        # Calculate the logits and loss.
        logits = mtf.layers.dense(decoder_output,
                                  self.outputs_vocab_dim,
                                  name="logits")
        # Need a reshape for logits
        logits = mtf.reshape(
            logits,
            mtf.Shape([batch_dim, self.length_dim, self.outputs_vocab_dim]))
        soft_targets = mtf.one_hot(targets,
                                   self.outputs_vocab_dim,
                                   dtype=activation_dtype)
        loss = mtf.layers.softmax_cross_entropy_with_logits(
            logits, soft_targets, self.outputs_vocab_dim)
        loss = mtf.reduce_mean(loss)
        for l in extra_losses:
            loss += l

        # Reshape logits to original target shape.
        logits = mtf.reshape(
            logits,
            mtf.Shape([
                batch_dim, self.rows_dim, self.orig_cols_dim,
                self.channels_dim, self.outputs_vocab_dim
            ]))

        return logits, loss
Пример #24
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            min_score_thresh=0.2,
                            max_boxes_to_draw=50,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=False):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id, indices):
        """Else branch for generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.

        if use_native_nms:
            top_detection_idx, scores_cls = tf.image.non_max_suppression_with_scores(
                boxes_cls,
                scores_cls,
                max_boxes_to_draw,
                iou_threshold=iou_threshold,
                score_threshold=min_score_thresh,
                soft_nms_sigma=soft_nms_sigma)
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            boxes_cls = tf.gather(boxes_cls, top_detection_idx)
            top_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
        else:
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            all_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
            top_detection_idx = nms_tf(all_detections_cls, iou_threshold)
            top_detections_cls = tf.gather(all_detections_cls,
                                           top_detection_idx)
        height = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        width = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 0] * image_scale,
            top_detections_cls[:, 1] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)

        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices_cls = tf.squeeze(tf.where_v2(tf.equal(classes, c)), axis=-1)
        detections = tf.cond(
            tf.equal(tf.size(indices), 0),
            lambda: detections,
            lambda id=c, id_cls=indices_cls: _else(detections, id, id_cls))
    indices_final = tf.argsort(detections[:, -2], direction='DESCENDING')
    detections = tf.gather(detections,
                           indices_final[:max_boxes_to_draw],
                           name='detection')
    return detections