示例#1
0
def rpn_cls_loss(rpn_cls_score,rpn_labels):
    '''
    Calculate the Region Proposal Network classifier loss. Measures how well 
    the RPN is able to propose regions by the performance of its "objectness" 
    classifier.
    
    Standard cross-entropy loss on logits
    '''
    with tf.variable_scope('rpn_cls_loss'):
        # input shape dimensions
        shape = tf.shape(rpn_cls_score)
        
        # Stack all classification scores into 2D matrix
        rpn_cls_score = tf.transpose(rpn_cls_score,[0,3,1,2])
        rpn_cls_score = tf.reshape(rpn_cls_score,[shape[0],2,shape[3]//2*shape[1],shape[2]])
        rpn_cls_score = tf.transpose(rpn_cls_score,[0,2,3,1])
        rpn_cls_score = tf.reshape(rpn_cls_score,[-1,2])
        
        # Stack labels
        rpn_labels = tf.reshape(rpn_labels,[-1])
        
        # Ignore label=-1 (Neither object nor background: IoU between 0.3 and 0.7)
        rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_labels,-1))),[-1,2])
        rpn_labels = tf.reshape(tf.gather(rpn_labels,tf.where(tf.not_equal(rpn_labels,-1))),[-1])
        
        # Cross entropy error
        rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_labels))
    
    return rpn_cross_entropy
示例#2
0
def print_mask_parameter_counts():
    print("# Mask Parameter Counts")
    print("  - Mask1: {0}".format(
        sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix1, tf.zeros_like(indicator_matrix1)))))))
    print("  - Mask2: {0}".format(
        sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix2, tf.zeros_like(indicator_matrix2)))))))
    print("  - Mask3: {0}".format(
        sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix3, tf.zeros_like(indicator_matrix3)))))))
示例#3
0
def retrieve_seq_length_op3(data, pad_val=0):
    """An op to compute the length of a sequence, the data shape can be [batch_size, n_step(max)] or
    [batch_size, n_step(max), n_features].

    If the data has type of tf.string and pad_val is assigned as empty string (''), this op will compute the
    length of the string sequence.

    Parameters:
    -----------
    data : tensor
        [batch_size, n_step(max)] or [batch_size, n_step(max), n_features] with zero padding on the right hand side.
    pad_val:
        By default 0. If the data is tf.string, please assign this as empty string ('')

    Examples
    -----------
    >>> data = [[[1],[2],[0],[0],[0]],
    >>>         [[1],[2],[3],[0],[0]],
    >>>         [[1],[2],[6],[1],[0]]]
    >>> data = tf.convert_to_tensor(data, dtype=tf.float32)
    >>> length = tl.layers.retrieve_seq_length_op3(data)
    [2, 3, 4]
    >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]],
    >>>         [[2,3],[2,4],[3,2],[0,0],[0,0]],
    >>>         [[3,3],[2,2],[5,3],[1,2],[0,0]]]
    >>> data = tf.convert_to_tensor(data, dtype=tf.float32)
    >>> length = tl.layers.retrieve_seq_length_op3(data)
    [4, 3, 4]
    >>> data = [[1,2,0,0,0],
    >>>         [1,2,3,0,0],
    >>>         [1,2,6,1,0]]
    >>> data = tf.convert_to_tensor(data, dtype=tf.float32)
    >>> length = tl.layers.retrieve_seq_length_op3(data)
    [2, 3, 4]
    >>> data = [['hello','world','','',''],
    >>>         ['hello','world','tensorlayer','',''],
    >>>         ['hello','world','tensorlayer','2.0','']]
    >>> data = tf.convert_to_tensor(data, dtype=tf.string)
    >>> length = tl.layers.retrieve_seq_length_op3(data, pad_val='')
    [2, 3, 4]

    """
    data_shape_size = data.get_shape().ndims
    if data_shape_size == 3:
        return tf.reduce_sum(
            input_tensor=tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32),
            axis=1
        )
    elif data_shape_size == 2:
        return tf.reduce_sum(input_tensor=tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), axis=1)
    elif data_shape_size == 1:
        raise ValueError("retrieve_seq_length_op3: data has wrong shape! Shape got ", data.get_shape().as_list())
    else:
        raise ValueError(
            "retrieve_seq_length_op3: handling data with num of dims %s hasn't been implemented!" % (data_shape_size)
        )
示例#4
0
def padded_sequence_accuracy(logits, labels):
  """Percentage of times that predictions matches labels everywhere (non-0)."""
  with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]):
    logits, labels = _pad_tensors_to_same_length(logits, labels)
    weights = tf.to_float(tf.not_equal(labels, 0))
    outputs = tf.to_int32(tf.argmax(logits, axis=-1))
    padded_labels = tf.to_int32(labels)
    not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
    axis = list(range(1, len(outputs.get_shape())))
    correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
    return correct_seq, tf.constant(1.0)
示例#5
0
def target_mask_op(data, pad_val=0):  # HangSheng: return tensor for mask,if input is tf.string
    """Return tensor for mask, if input is ``tf.string``."""
    data_shape_size = data.get_shape().ndims
    if data_shape_size == 3:
        return tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32)
    elif data_shape_size == 2:
        return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32)
    elif data_shape_size == 1:
        raise ValueError("target_mask_op: data has wrong shape!")
    else:
        raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
示例#6
0
文件: model.py 项目: Hukongtao/models
 def compute_error(self):
   #Sets mask variables and performs batch processing
   self.batch_gold_select = self.batch_print_answer > 0.0
   self.full_column_mask = tf.concat(
       axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
   self.full_processed_column = tf.concat(
       axis=1,
       values=[self.batch_processed_number_column, self.batch_processed_word_column])
   self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
       self.batch_processed_sorted_index_number_column,
       self.batch_processed_sorted_index_word_column
   ])
   self.select_bad_number_mask = tf.cast(
       tf.logical_and(
           tf.not_equal(self.full_processed_column,
                        self.utility.FLAGS.pad_int),
           tf.not_equal(self.full_processed_column,
                        self.utility.FLAGS.bad_number_pre_process)),
       self.data_type)
   self.select_mask = tf.cast(
       tf.logical_not(
           tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int)),
       self.data_type)
   self.select_word_mask = tf.cast(
       tf.logical_not(
           tf.equal(self.batch_word_column_entry_mask,
                    self.utility.dummy_token_id)), self.data_type)
   self.select_full_mask = tf.concat(
       axis=1, values=[self.select_mask, self.select_word_mask])
   self.select_whole_mask = tf.maximum(
       tf.reshape(
           tf.slice(self.select_mask, [0, 0, 0],
                    [self.batch_size, 1, self.max_elements]),
           [self.batch_size, self.max_elements]),
       tf.reshape(
           tf.slice(self.select_word_mask, [0, 0, 0],
                    [self.batch_size, 1, self.max_elements]),
           [self.batch_size, self.max_elements]))
   self.invert_select_full_mask = tf.cast(
       tf.concat(axis=1, values=[
           tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
           tf.equal(self.batch_word_column_entry_mask,
                    self.utility.dummy_token_id)
       ]), self.data_type)
   self.batch_lookup_answer = tf.zeros(tf.shape(self.batch_gold_select))
   self.reset_select = self.select_whole_mask
   self.rows = tf.reduce_sum(self.select_whole_mask, 1)
   self.num_entries = tf.reshape(
       tf.reduce_sum(tf.reduce_sum(self.select_full_mask, 1), 1),
       [self.batch_size])
   self.final_error, self.final_correct = self.batch_process()
   return self.final_error
示例#7
0
    def add_embedding(self):

        #embed=np.load('glove{0}_uniform.npy'.format(self.emb_dim))
        with tf.variable_scope("Embed",regularizer=None):
            embedding=tf.get_variable('embedding',[self.num_emb,
                                                   self.emb_dim]
                        ,initializer=tf.random_uniform_initializer(-0.05,0.05),trainable=True,regularizer=None)
            ix=tf.to_int32(tf.not_equal(self.input,-1))*self.input
            emb_tree=tf.nn.embedding_lookup(embedding,ix)
            emb_tree=emb_tree*(tf.expand_dims(
                        tf.to_float(tf.not_equal(self.input,-1)),2))

            return emb_tree
示例#8
0
    def add_placeholders(self):
        dim2=self.config.maxnodesize
        dim1=self.config.batch_size
        self.input = tf.placeholder(tf.int32,[dim1,dim2],name='input')
        self.treestr = tf.placeholder(tf.int32,[dim1,dim2,2],name='tree')
        self.labels = tf.placeholder(tf.int32,[dim1,dim2],name='labels')
        self.dropout = tf.placeholder(tf.float32,name='dropout')

        self.n_inodes = tf.reduce_sum(tf.to_int32(tf.not_equal(self.treestr,-1)),[1,2])
        self.n_inodes = self.n_inodes/2

        self.num_leaves = tf.reduce_sum(tf.to_int32(tf.not_equal(self.input,-1)),[1])
        self.batch_len = tf.placeholder(tf.int32,name="batch_len")
示例#9
0
    def add_embedding(self):
        #embed=np.load('glove{0}_uniform.npy'.format(self.emb_dim))

        with tf.device('/cpu:0'):
            with tf.variable_scope("Embed"):
                embedding=tf.get_variable('embedding',[self.num_emb,
                                                        self.emb_dim]
                                             ,initializer=
                                             tf.random_uniform_initializer(-0.05,0.05),trainable=True,
                                             regularizer=tf.contrib.layers.l2_regularizer(0.0))
                ix=tf.to_int32(tf.not_equal(self.input,-1))*self.input
                emb = tf.nn.embedding_lookup(embedding,ix)
                emb = emb * tf.to_float(tf.not_equal(tf.expand_dims(self.input,2),-1))
                return emb
示例#10
0
    def _add_rpn_losses(self, sigma_rpn=3.0):
        with tf.variable_scope('loss_' + self._tag) as scope:
            # RPN, class loss
            rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2])
            rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1])
            rpn_select = tf.where(tf.not_equal(rpn_label, -1))
            rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2])
            rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
            rpn_cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label))

            # RPN, bbox loss
            rpn_bbox_pred = self._predictions['rpn_bbox_pred']
            rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets']
            rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights']
            rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights']

            rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3])

            self._losses['rpn_cross_entropy'] = rpn_cross_entropy
            self._losses['rpn_loss_box'] = rpn_loss_box

            self._losses['rpn_loss'] = rpn_loss_box + rpn_cross_entropy

            self._event_summaries.update(self._losses)

        return self._losses['rpn_loss']
示例#11
0
def char_accuracy(predictions, targets, rej_char, streaming=False):
  """Computes character level accuracy.

  Both predictions and targets should have the same shape
  [batch_size x seq_length].

  Args:
    predictions: predicted characters ids.
    targets: ground truth character ids.
    rej_char: the character id used to mark an empty element (end of sequence).
    streaming: if True, uses the streaming mean from the slim.metric module.

  Returns:
    a update_ops for execution and value tensor whose value on evaluation
    returns the total character accuracy.
  """
  with tf.variable_scope('CharAccuracy'):
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())

    targets = tf.to_int32(targets)
    const_rej_char = tf.constant(rej_char, shape=targets.get_shape())
    weights = tf.to_float(tf.not_equal(targets, const_rej_char))
    correct_chars = tf.to_float(tf.equal(predictions, targets))
    accuracy_per_example = tf.div(
        tf.reduce_sum(tf.multiply(correct_chars, weights), 1),
        tf.reduce_sum(weights, 1))
    if streaming:
      return tf.contrib.metrics.streaming_mean(accuracy_per_example)
    else:
      return tf.reduce_mean(accuracy_per_example)
示例#12
0
    def build_loss(self, ohem=False):
        # classification loss
        rpn_cls_score = tf.reshape(self.get_output('rpn_cls_score_reshape'), [-1, 2])  # shape (HxWxA, 2)
        rpn_label = tf.reshape(self.get_output('rpn-data')[0], [-1])  # shape (HxWxA)
        # ignore_label(-1)
        fg_keep = tf.equal(rpn_label, 1)
        rpn_keep = tf.where(tf.not_equal(rpn_label, -1))
        rpn_cls_score = tf.gather(rpn_cls_score, rpn_keep) # shape (N, 2)
        rpn_label = tf.gather(rpn_label, rpn_keep)
        rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=rpn_label,logits=rpn_cls_score)

        # box loss
        rpn_bbox_pred = self.get_output('rpn_bbox_pred') # shape (1, H, W, Ax4)
        rpn_bbox_targets = self.get_output('rpn-data')[1]
        rpn_bbox_inside_weights = self.get_output('rpn-data')[2]
        rpn_bbox_outside_weights = self.get_output('rpn-data')[3]
        rpn_bbox_pred = tf.gather(tf.reshape(rpn_bbox_pred, [-1, 4]), rpn_keep) # shape (N, 4)
        rpn_bbox_targets = tf.gather(tf.reshape(rpn_bbox_targets, [-1, 4]), rpn_keep)
        rpn_bbox_inside_weights = tf.gather(tf.reshape(rpn_bbox_inside_weights, [-1, 4]), rpn_keep)
        rpn_bbox_outside_weights = tf.gather(tf.reshape(rpn_bbox_outside_weights, [-1, 4]), rpn_keep)

        rpn_loss_box_n = tf.reduce_sum(rpn_bbox_outside_weights * self.smooth_l1_dist(
            rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), reduction_indices=[1])

        rpn_loss_box = tf.reduce_sum(rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1)
        rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n)


        model_loss = rpn_cross_entropy +  rpn_loss_box

        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n(regularization_losses) + model_loss

        return total_loss,model_loss, rpn_cross_entropy, rpn_loss_box
示例#13
0
  def dynamic_decode_and_search(self,
                                embedding,
                                start_tokens,
                                end_token,
                                vocab_size,
                                initial_state=None,
                                beam_width=5,
                                length_penalty=0.0,
                                maximum_iterations=250,
                                mode=tf.estimator.ModeKeys.PREDICT,
                                memory=None,
                                memory_sequence_length=None,
                                dtype=None):
    cache = self._init_cache(memory, memory_sequence_length=memory_sequence_length)
    symbols_to_logits_fn = self._symbols_to_logits_fn(embedding, vocab_size, mode)

    outputs, log_probs = beam_search(
        symbols_to_logits_fn,
        start_tokens,
        beam_width,
        maximum_iterations,
        vocab_size,
        length_penalty,
        states=cache,
        eos_id=end_token)
    outputs = tf.slice(outputs, [0, 0, 1], [-1, -1, -1]) # Ignore <s>.

    lengths = tf.not_equal(outputs, 0)
    lengths = tf.cast(lengths, tf.int32)
    lengths = tf.reduce_sum(lengths, axis=-1)

    return (outputs, None, lengths, log_probs)
示例#14
0
def loss(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 3-D tensor
            of shape [batch_size,IMAGE_SIZE,IMAGE_SIZE]

  Returns:
    Loss tensor of type float.
  """
  labels = tf.cast(labels, tf.int64)
  label_shape = labels.get_shape().as_list()
  reshaped_labels = tf.reshape(labels,
                              [label_shape[0]*label_shape[1]*label_shape[2]])
  print(reshaped_labels.get_shape())
  logits_shape =logits.get_shape().as_list()
  reshaped_logits = tf.reshape(logits,
                              [logits_shape[0]*logits_shape[1]*logits_shape[2],
                              logits_shape[3]]) 
  cross_entropy_per_pixel = tf.nn.sparse_softmax_cross_entropy_with_logits(
                                  reshaped_logits, reshaped_labels,
                                  name='cross_entropy_per_pixel')
  no_loss_mask = tf.not_equal(reshaped_labels, -1)

  filtered_cross_entropy = tf.boolean_mask(cross_entropy_per_pixel,
                                           no_loss_mask,
                                           name='no_loss_mask')
  cross_entropy_mean = tf.reduce_mean(filtered_cross_entropy, name='cross_entropy')
#  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
  tf.add_to_collection('losses', cross_entropy_mean)

  return tf.add_n(tf.get_collection('losses'), name='total_loss')
示例#15
0
    def while_exit_cond(result, logits, loss):  # pylint: disable=unused-argument
      """Exit the loop either if reach decode_length or EOS."""
      length = common_layers.shape_list(result)[1]

      not_overflow = length < decode_length

      if self._problem_hparams.stop_at_eos:

        def fn_not_eos():
          return tf.not_equal(  # Check if the last predicted element is a EOS
              tf.squeeze(result[:, -1, :, :]), text_encoder.EOS_ID)

        not_eos = tf.cond(
            # We only check for early stoping if there is at least 1 element (
            # otherwise not_eos will crash)
            tf.not_equal(length, 0),
            fn_not_eos,
            lambda: True,
        )

        return tf.cond(
            tf.equal(batch_size, 1),
            # If batch_size == 1, we check EOS for early stoping
            lambda: tf.logical_and(not_overflow, not_eos),
            # Else, just wait for max length
            lambda: not_overflow)
      return not_overflow
示例#16
0
    def compute_loss(self,emb_batch,curr_batch_size=None):
        outloss=[]
        prediction=[]
        for idx_batch in range(self.config.batch_size):

            tree_states=self.compute_states(emb_batch,idx_batch)
            logits = self.create_output(tree_states)

            labels1=tf.gather(self.labels,idx_batch)
            labels2=tf.reduce_sum(tf.to_int32(tf.not_equal(labels1,-1)))
            labels=tf.gather(labels1,tf.range(labels2))
            loss = self.calc_loss(logits,labels)


            pred = tf.nn.softmax(logits)

            pred_root=tf.gather(pred,labels2-1)


            prediction.append(pred_root)
            outloss.append(loss)

        batch_loss=tf.pack(outloss)
        self.pred = tf.pack(prediction)

        return batch_loss
def get_mask(gt, num_classes, ignore_label):
    less_equal_class = tf.less_equal(gt, num_classes-1)
    not_equal_ignore = tf.not_equal(gt, ignore_label)
    mask = tf.logical_and(less_equal_class, not_equal_ignore)
    indices = tf.squeeze(tf.where(mask), 1)

    return indices
示例#18
0
def measure():
    E = tf.reduce_mean(energy(layers))
    C = tf.reduce_mean(cost(layers))
    y_prediction = tf.argmax(layers[-1], 1)
    error        = tf.reduce_mean(tf.cast(tf.not_equal(y_prediction, tf.cast(y, tf.int64)), tf.float32))

    return E, C, error
示例#19
0
def padded_sequence_accuracy(predictions,
                             labels,
                             weights_fn=common_layers.weights_nonzero):
  """Percentage of times that predictions matches labels everywhere (non-0)."""
  # If the last dimension is 1 then we're using L1/L2 loss.
  if common_layers.shape_list(predictions)[-1] == 1:
    return rounding_sequence_accuracy(
        predictions, labels, weights_fn=weights_fn)
  with tf.variable_scope(
      "padded_sequence_accuracy", values=[predictions, labels]):
    padded_predictions, padded_labels = common_layers.pad_with_zeros(
        predictions, labels)
    weights = weights_fn(padded_labels)

    # Flatten, keeping batch dim (and num_classes dim for predictions)
    # TPU argmax can only deal with a limited number of dimensions
    predictions_shape = common_layers.shape_list(padded_predictions)
    batch_size = predictions_shape[0]
    num_classes = predictions_shape[-1]
    flat_size = common_layers.list_product(
        common_layers.shape_list(padded_labels)[1:])
    padded_predictions = tf.reshape(
        padded_predictions,
        [batch_size, common_layers.list_product(predictions_shape[1:-1]),
         num_classes])
    padded_labels = tf.reshape(padded_labels, [batch_size, flat_size])
    weights = tf.reshape(weights, [batch_size, flat_size])

    outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1))
    padded_labels = tf.to_int32(padded_labels)
    not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
    axis = list(range(1, len(outputs.get_shape())))
    correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
    return correct_seq, tf.constant(1.0)
 def error(self):
   if self.num_out!=1:
     max_labels=tf.argmax(self.y, 1)
   else:
     max_labels=self.y
   mistakes=tf.not_equal(max_labels, tf.to_float(tf.argmax(self.prediction, 1)))
   return tf.reduce_mean(tf.cast(mistakes, tf.float32))
示例#21
0
def classification_costs(logits, labels, name=None):
    """Compute classification cost mean and classification cost per sample

    Assume unlabeled examples have label == -1. For unlabeled examples, cost == 0.
    Compute the mean over all examples.
    Note that unlabeled examples are treated differently in error calculation.
    """
    with tf.name_scope(name, "classification_costs") as scope:
        applicable = tf.not_equal(labels, -1)

        # Change -1s to zeros to make cross-entropy computable
        labels = tf.where(applicable, labels, tf.zeros_like(labels))

        # This will now have incorrect values for unlabeled examples
        per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)

        # Retain costs only for labeled
        per_sample = tf.where(applicable, per_sample, tf.zeros_like(per_sample))

        # Take mean over all examples, not just labeled examples.
        labeled_sum = tf.reduce_sum(per_sample)
        total_count = tf.to_float(tf.shape(per_sample)[0])
        mean = tf.div(labeled_sum, total_count, name=scope)

        return mean, per_sample
示例#22
0
    def rmse(self, vp):

        """
        Root Mean Square Error

        Note that this needs to be evaluated on the rated items only

        Args:
            vp (tensor, float32): inferred output (Network prediction)

        Returns:
            err (tensor, float32): root mean square error

        """

        with tf.name_scope("re"):

            mask = tf.not_equal(self.v, 0)  # selects only the rated items
            n_values = tf.reduce_sum(
                tf.cast(mask, "float32"), axis=1
            )  # number of rated items

            # evaluate the square difference between the inferred and the input data on the rated items
            e = tf.where(
                mask, x=tf.squared_difference(self.v, vp), y=tf.zeros_like(self.v)
            )

            # evaluate the msre
            err = tf.sqrt(
                tf.reduce_mean(tf.div(tf.reduce_sum(e, axis=1), n_values)) / 2
            )

        return err
示例#23
0
  def call(self, x):
    """Get token embeddings of x.

    Args:
      x: An int64 tensor with shape [batch_size, length]
    Returns:
      embeddings: float32 tensor with shape [batch_size, length, embedding_size]
      padding: float32 tensor with shape [batch_size, length] indicating the
        locations of the padding tokens in x.
    """
    with tf.name_scope("embedding"):
      # Create binary mask of size [batch_size, length]
      mask = tf.to_float(tf.not_equal(x, 0))

      if self.method == "gather":
        embeddings = tf.gather(self.shared_weights, x)
      else:  # matmul
        embeddings = tpu_utils.embedding_matmul(
            embedding_table=self.shared_weights,
            values=tf.cast(x, dtype=tf.int32),
            mask=mask
        )
      embeddings *= tf.expand_dims(mask, -1)

      # Scale embedding by the sqrt of the hidden size
      embeddings *= self.hidden_size ** 0.5

      return embeddings
示例#24
0
def train_speech_to_text_network():
    logit = speech_to_text_network()

    # CTC loss
    indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.))
    target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, shape=tf.cast(tf.shape(Y), tf.int64))
    loss = tf.nn.ctc_loss(logit, target, sequence_len, time_major=False)
    # optimizer
    lr = tf.Variable(0.001, dtype=tf.float32, trainable=False)
    optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99)
    var_list = [t for t in tf.trainable_variables()]
    gradient = optimizer.compute_gradients(loss, var_list=var_list)
    optimizer_op = optimizer.apply_gradients(gradient)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver(tf.global_variables())

        for epoch in range(16):
            sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch)))

            global pointer
            pointer = 0
            for batch in range(n_batch):
                batches_wavs, batches_labels = get_next_batches(batch_size)
                train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels})
                print(epoch, batch, train_loss)
            if epoch % 5 == 0:
                saver.save(sess, 'speech.module', global_step=epoch)
    def fast_rcnn_minibatch(self, reference_boxes):
        with tf.variable_scope('fast_rcnn_minibatch'):

            reference_boxes_mattached_gtboxes, object_mask, label = \
                self.fast_rcnn_find_positive_negative_samples(reference_boxes)

            positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1])

            num_of_positives = tf.minimum(tf.shape(positive_indices)[0],
                                          tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32))

            positive_indices = tf.random_shuffle(positive_indices)
            positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives])

            negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1])
            num_of_negatives = tf.minimum(tf.shape(negative_indices)[0],
                                          self.fast_rcnn_minibatch_size - num_of_positives)

            negative_indices = tf.random_shuffle(negative_indices)
            negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives])

            minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0)
            minibatch_indices = tf.random_shuffle(minibatch_indices)

            minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes,
                                                                    minibatch_indices)
            object_mask = tf.gather(object_mask, minibatch_indices)
            label = tf.gather(label, minibatch_indices)
            label_one_hot = tf.one_hot(label, self.num_classes + 1)

            return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
    def loss(self, logits, labels, regularization):
        """Adds to the inference model the layers required to generate loss."""
        with tf.name_scope('loss'):
            with tf.name_scope('var_loss'):
                labels = tf.cast(labels, tf.float32)
                shape = labels.get_shape()

                same_class = tf.boolean_mask(logits, tf.equal(labels, tf.ones(shape)))
                diff_class = tf.boolean_mask(logits, tf.not_equal(labels, tf.ones(shape)))
                same_mean, same_var = tf.nn.moments(same_class, [0])
                diff_mean, diff_var = tf.nn.moments(diff_class, [0])
                var_loss = same_var + diff_var

            with tf.name_scope('mean_loss'):
                mean_loss = self.lamda * tf.where(tf.greater(self.mu - (same_mean - diff_mean), 0),
                                                  self.mu - (same_mean - diff_mean), 0)

            with tf.name_scope('regularization'):
                regularization *= tf.add_n(self.regularizers)

            loss = var_loss + mean_loss + regularization

            # Summaries for TensorBoard.
            tf.summary.scalar('loss/total', loss)
            with tf.name_scope('averages'):
                averages = tf.train.ExponentialMovingAverage(0.9)
                op_averages = averages.apply([var_loss, mean_loss, regularization, loss])
                tf.summary.scalar('loss/avg/var_loss', averages.average(var_loss))
                tf.summary.scalar('loss/avg/mean_loss', averages.average(mean_loss))
                tf.summary.scalar('loss/avg/regularization', averages.average(regularization))
                tf.summary.scalar('loss/avg/total', averages.average(loss))
                with tf.control_dependencies([op_averages]):
                    loss_average = tf.identity(averages.average(loss), name='control')
            return loss, loss_average
示例#27
0
  def _add_losses_ohem_nms(self, sigma_rpn=3.0):
    with tf.variable_scope('loss_' + self._tag) as scope:
      # RPN, class loss
      rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2])
      rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1])
      rpn_select = tf.where(tf.not_equal(rpn_label, -1))
      rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2])
      rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
      rpn_cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label))

      # RPN, bbox loss
      rpn_bbox_pred = self._predictions['rpn_bbox_pred']
      rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets']
      rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights']
      rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights']

      rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                          rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3])

      # RCNN, class loss
      cls_score = self._predictions["cls_score"]
      label = tf.reshape(self._proposal_targets["labels"], [-1])

      rfcn_cls_score = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=tf.reshape(cls_score, [-1, self._num_classes]), labels=label)

      # RCNN, bbox loss
      bbox_pred = self._predictions['bbox_pred']
      bbox_targets = self._proposal_targets['bbox_targets']
      bbox_inside_weights = self._proposal_targets['bbox_inside_weights']
      bbox_outside_weights = self._proposal_targets['bbox_outside_weights']
      loss_box_vector = self._smooth_l1_loss_vector(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)

      # ohem
      rois_boxes = self._proposal_targets['rois']
      loss_before_nms = rfcn_cls_score + loss_box_vector
      ohem_indexes = tf.image.non_max_suppression(rois_boxes[:, 1:5], loss_before_nms, cfg.TRAIN.OHEM_B, cfg.TRAIN.OHEM_NMS_THRESH)

      rfcn_cls_score = tf.gather(rfcn_cls_score, ohem_indexes)
      loss_box_vector = tf.gather(loss_box_vector, ohem_indexes)

      cross_entropy = tf.reduce_mean(rfcn_cls_score)
      loss_box = tf.reduce_mean(loss_box_vector)

      self._losses['cross_entropy'] = cross_entropy
      self._losses['loss_box'] = loss_box
      self._losses['rpn_cross_entropy'] = rpn_cross_entropy
      self._losses['rpn_loss_box'] = rpn_loss_box

      self._losses['rpn_loss'] = rpn_loss_box + rpn_cross_entropy
      self._losses['class_loss'] = cross_entropy + loss_box
      loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
      self._losses['total_loss'] = loss

      self._losses['ohem_indexes_counts'] = ohem_indexes
      self._event_summaries.update(self._losses)

    return loss
示例#28
0
def padded_accuracy(logits, labels):
  """Percentage of times that predictions matches labels on non-0s."""
  with tf.variable_scope("padded_accuracy", values=[logits, labels]):
    logits, labels = _pad_tensors_to_same_length(logits, labels)
    weights = tf.to_float(tf.not_equal(labels, 0))
    outputs = tf.to_int32(tf.argmax(logits, axis=-1))
    padded_labels = tf.to_int32(labels)
    return tf.to_float(tf.equal(outputs, padded_labels)), weights
示例#29
0
 def errors(self, y):
     if  len(y.get_shape())!= len(self.y_pred.get_shape()):
         raise TypeError('y should have the same shape as self.y_pred',
             ('y', y.type, 'y_pred', self.y_pred.type))
     if y.dtype in [tf.int32, tf.int16, tf.int8]:
         return tf.reduce_mean(tf.not_equal(self.y_pred, y))
     else:
         raise NotImplementedError()
示例#30
0
def errors(logits, labels, name=None):
    """Compute error mean and whether each unlabeled example is erroneous

    Assume unlabeled examples have label == -1.
    Compute the mean error over unlabeled examples.
    Mean error is NaN if there are no unlabeled examples.
    Note that unlabeled examples are treated differently in cost calculation.
    """
    with tf.name_scope(name, "errors") as scope:
        applicable = tf.not_equal(labels, -1)
        labels = tf.boolean_mask(labels, applicable)
        logits = tf.boolean_mask(logits, applicable)
        predictions = tf.argmax(logits, -1)
        labels = tf.cast(labels, tf.int64)
        per_sample = tf.to_float(tf.not_equal(predictions, labels))
        mean = tf.reduce_mean(per_sample, name=scope)
        return mean, per_sample
示例#31
0
 def _append_eow(self, sequences):
     """Append EOW character after end every given sequence."""
     sequences_rev = tf.reverse_sequence(sequences, tf.reduce_sum(tf.cast(tf.not_equal(sequences, 0), tf.int32), axis=1), 1)
     sequences_rev_eow = tf.pad(sequences_rev, [[0, 0], [1, 0]], constant_values=MorphoDataset.Factor.EOW)
     return tf.reverse_sequence(sequences_rev_eow, tf.reduce_sum(tf.cast(tf.not_equal(sequences_rev_eow, 0), tf.int32), axis=1), 1)
示例#32
0
    def step(self, time, inputs, state, name=None):
        """Perform a decoding step.

        Args:
          time: scalar `int32` tensor.
          inputs: A (structure of) input tensors.
          state: A (structure of) state tensors and TensorArrays.
          name: Name scope for any created operations.

        Returns:
          `(outputs, next_state, next_inputs, finished)`.
        """
        batch_size = self._batch_size
        beam_width = self._beam_width
        end_token = self._end_token
        length_penalty_weight = self._length_penalty_weight

        with ops.name_scope(name, "BeamSearchDecoderStep",
                            (time, inputs, state)):
            cell_state = state.cell_state
            inputs = nest.map_structure(
                lambda inp: self._merge_batch_beams(inp, s=inp.shape[2:]),
                inputs)
            cell_state = nest.map_structure(self._maybe_merge_batch_beams,
                                            cell_state, self._cell.state_size)
            cell_outputs, next_cell_state = self._cell(inputs, cell_state)

            # finished = tf.Print(state.finished, [state.finished, 'finished', time], summarize=100)
            # not_finished = tf.Print(not_finished, [not_finished, 'not_finished', time], summarize=100)
            # cell_state.last_choice shape = [batch_size * beam_width]
            next_choices = gen_array_ops.gather_v2(self.lookup_table,
                                                   cell_state.last_choice,
                                                   axis=0)
            not_finished = tf.not_equal(next_choices[:, 0], end_token)
            next_next_choices = gen_array_ops.gather_v2(self.lookup_table,
                                                        next_choices[:, 0],
                                                        axis=0)
            will_finish = tf.logical_and(
                not_finished, tf.equal(next_next_choices[:, 0], end_token))

            def move(will_finish, last_choice, cell_outputs):
                # cell_outputs = tf.Print(cell_outputs, [cell_outputs, 'cell_outputs', time], summarize=1000)
                # will_finish = tf.Print(will_finish, [will_finish, 'will_finish', time], summarize=100)
                attention_score = self._step_method(last_choice)
                attention_score = attention_score + cell_outputs
                # final = tf.Print(final, [final, 'finalll', time], summarize=1000)
                return tf.where(will_finish, attention_score, cell_outputs)

            if self._output_layer is not None:
                cell_outputs = self._output_layer(cell_outputs)
                # will_finish = tf.Print(will_finish, [will_finish, 'will_finish, beam_search', time], summarize=100)
                cell_outputs = tf.cond(
                    tf.reduce_any(will_finish),
                    false_fn=lambda: cell_outputs,
                    true_fn=lambda: move(will_finish, cell_state.last_choice,
                                         cell_outputs))

            if self.hie:
                cell_outputs = self._mask_outputs_by_lable(
                    cell_outputs, cell_state.last_choice)

                # cell_state.last_choice shape = [batch_size*beam_width,]

            cell_outputs = nest.map_structure(
                lambda out: self._split_batch_beams(out, out.shape[1:]),
                cell_outputs)

            next_cell_state = nest.map_structure(self._maybe_split_batch_beams,
                                                 next_cell_state,
                                                 self._cell.state_size)

            beam_search_output, beam_search_state = _beam_search_step(
                time=time,
                logits=cell_outputs,
                next_cell_state=next_cell_state,
                beam_state=state,
                batch_size=batch_size,
                beam_width=beam_width,
                end_token=end_token,
                length_penalty_weight=length_penalty_weight)

            finished = beam_search_state.finished

            # replace the father ids
            sample_ids = beam_search_output.predicted_ids
            next_cell_state = beam_search_state.cell_state
            next_cell_state = next_cell_state._replace(last_choice=sample_ids)
            beam_search_state = beam_search_state._replace(
                cell_state=next_cell_state)

            # sample_ids shape = [batch_size, beam_width]
            next_inputs = control_flow_ops.cond(
                math_ops.reduce_all(finished), lambda: self._start_inputs,
                lambda: self._embedding_fn(sample_ids))

        return (beam_search_output, beam_search_state, next_inputs, finished)
示例#33
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.eval_split,
                                               dataset_dir=FLAGS.dataset_dir)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.eval_crop_size,
                                      FLAGS.eval_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.eval_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.eval_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'

        # Define the evaluation metric.
        metric_map = {}
        metric_map[predictions_tag] = tf.metrics.mean_iou(predictions,
                                                          labels,
                                                          dataset.num_classes,
                                                          weights=weights)

        metrics_to_values, metrics_to_updates = (
            tf.contrib.metrics.aggregate_metric_map(metric_map))

        for metric_name, metric_value in six.iteritems(metrics_to_values):
            slim.summaries.add_scalar_summary(metric_value,
                                              metric_name,
                                              print_summary=True)

        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

        tf.logging.info('Eval num images %d', dataset.num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.eval_batch_size, num_batches)

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            eval_op=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            eval_interval_secs=FLAGS.eval_interval_secs)
示例#34
0
  def call(self, inputs):
    """Calculate target logits or inferred target sequences.

    Args:
      inputs: input tensor list of size 1 or 2.
        First item, inputs: int tensor with shape [batch_size, input_length].
        Second item (optional), targets: None or int tensor with shape
          [batch_size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          outputs: [batch_size, decoded length]
          scores: [batch_size, float]}
      Even when float16 is used, the output tensor(s) are always float32.

    Raises:
      NotImplementedError: If try to use padded decode method on CPU/GPUs.
    """
    if len(inputs) == 2:
      inputs, targets = inputs[0], inputs[1]
    else:
      # Decoding path.
      inputs, targets = inputs[0], None

      # TODO(hongkuny): The check is not necessary. Fix this part.
      if self._padded_decode:
        if not self._num_replicas:
          raise NotImplementedError(
              "Padded decoding on CPU/GPUs is not supported.")
        decode_batch_size = int(self._decode_batch_size / self._num_replicas)
        inputs.set_shape([decode_batch_size, self._decode_max_length])

    with tf.name_scope("Transformer"):
      attention_bias = model_utils.get_padding_bias(inputs)
      attention_bias = tf.cast(attention_bias, self._dtype)
      with tf.name_scope("encode"):
        # Prepare inputs to the layer stack by adding positional encodings and
        # applying dropout.
        embedded_inputs = self.embedding_lookup(inputs)
        embedding_mask = tf.cast(
            tf.not_equal(inputs, 0), self.embedding_lookup.embeddings.dtype)
        embedded_inputs *= tf.expand_dims(embedding_mask, -1)
        embedded_inputs = tf.cast(embedded_inputs, self._dtype)

        # Attention_mask generation.
        input_shape = tf_utils.get_shape_list(inputs, expected_rank=2)
        attention_mask = tf.cast(
            tf.reshape(
                tf.not_equal(inputs, 0), [input_shape[0], 1, input_shape[1]]),
            dtype=inputs.dtype)
        broadcast_ones = tf.ones(
            shape=[input_shape[0], input_shape[1], 1], dtype=inputs.dtype)
        attention_mask = broadcast_ones * attention_mask

        with tf.name_scope("add_pos_encoding"):
          pos_encoding = self.position_embedding(inputs=embedded_inputs)
          pos_encoding = tf.cast(pos_encoding, self._dtype)
          encoder_inputs = embedded_inputs + pos_encoding

        encoder_inputs = self.encoder_dropout(encoder_inputs)

        encoder_outputs = self.encoder_layer(
            encoder_inputs, attention_mask=attention_mask)

      if targets is None:
        encoder_decoder_attention_bias = attention_bias
        encoder_outputs = tf.cast(encoder_outputs, self._dtype)
        if self._padded_decode:
          batch_size = encoder_outputs.shape.as_list()[0]
          input_length = encoder_outputs.shape.as_list()[1]
        else:
          batch_size = tf.shape(encoder_outputs)[0]
          input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = input_length + self._extra_decode_length
        encoder_decoder_attention_bias = tf.cast(encoder_decoder_attention_bias,
                                                 self._dtype)

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length)

        # Create initial set of IDs that will be passed to symbols_to_logits_fn.
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        # pylint: disable=g-complex-comprehension
        init_decode_length = (max_decode_length if self._padded_decode else 0)
        num_heads = self._num_heads
        dim_per_head = self._hidden_size // num_heads

        cache = {
            str(layer): {
                "key":
                    tf.zeros([
                        batch_size, init_decode_length, num_heads, dim_per_head
                    ],
                             dtype=self._dtype),
                "value":
                    tf.zeros([
                        batch_size, init_decode_length, num_heads, dim_per_head
                    ],
                             dtype=self._dtype)
            } for layer in range(self._num_layers)
        }

        # pylint: enable=g-complex-comprehension

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        # Use beam search to find the top beam_size sequences and scores.
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self._vocab_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            max_decode_length=max_decode_length,
            eos_id=EOS_ID,
            padded_decode=self._padded_decode,
            dtype=self._dtype)

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}

      else:
        with tf.name_scope("decode"):
          decoder_inputs = self.embedding_lookup(targets)
          embedding_mask = tf.cast(
              tf.not_equal(targets, 0), self.embedding_lookup.embeddings.dtype)
          decoder_inputs *= tf.expand_dims(embedding_mask, -1)
          decoder_inputs = tf.cast(decoder_inputs, self._dtype)
          with tf.name_scope("shift_targets"):
            # Shift targets to the right, and remove the last element
            decoder_inputs = tf.pad(decoder_inputs,
                                    [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
          with tf.name_scope("add_pos_encoding"):
            length = tf.shape(decoder_inputs)[1]
            pos_encoding = self.position_embedding(decoder_inputs)
            pos_encoding = tf.cast(pos_encoding, self._dtype)
            decoder_inputs += pos_encoding

          decoder_inputs = self.decoder_dropout(decoder_inputs)

          decoder_shape = tf_utils.get_shape_list(
              decoder_inputs, expected_rank=3)
          batch_size = decoder_shape[0]
          decoder_length = decoder_shape[1]

          self_attention_mask = tf.linalg.band_part(
              tf.ones([length, length], dtype=tf.float32), -1, 0)
          self_attention_mask = tf.reshape(self_attention_mask,
                                           [1, length, length])
          self_attention_mask = tf.tile(self_attention_mask, [batch_size, 1, 1])

          attention_mask = tf.cast(
              tf.expand_dims(tf.not_equal(inputs, 0), axis=1),
              dtype=inputs.dtype)
          attention_mask = tf.tile(attention_mask, [1, decoder_length, 1])

          outputs = self.decoder_layer(
              decoder_inputs,
              encoder_outputs,
              memory_mask=self_attention_mask,
              target_mask=attention_mask)
          logits = embedding_linear(self.embedding_lookup.embeddings, outputs)
          logits = tf.cast(logits, tf.float32)

        return logits
示例#35
0
    def build_loss(self, ohem=False):
        # RPN
        # classification loss
        rpn_cls_score = tf.reshape(self.get_output('rpn_cls_score_reshape'), [-1, 2])  # shape (HxWxA, 2)
        rpn_label = tf.reshape(self.get_output('rpn-data')[0], [-1])  # shape (HxWxA)
        # ignore_label(-1)
        fg_keep = tf.equal(rpn_label, 1)
        rpn_keep = tf.where(tf.not_equal(rpn_label, -1))
        rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_keep), [-1, 2]) # shape (N, 2)
        rpn_label = tf.reshape(tf.gather(rpn_label, rpn_keep), [-1])
        rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)
        rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n)

        # box loss
        rpn_bbox_pred = self.get_output('rpn_bbox_pred') # shape (1, H, W, Ax4)
        rpn_bbox_targets = self.get_output('rpn-data')[1]
        rpn_bbox_inside_weights = self.get_output('rpn-data')[2]
        rpn_bbox_outside_weights = self.get_output('rpn-data')[3]
        rpn_bbox_pred = tf.reshape(tf.gather(tf.reshape(rpn_bbox_pred, [-1, 4]), rpn_keep), [-1, 4]) # shape (N, 4)
        rpn_bbox_targets = tf.reshape(tf.gather(tf.reshape(rpn_bbox_targets, [-1, 4]), rpn_keep), [-1, 4])
        rpn_bbox_inside_weights = tf.reshape(tf.gather(tf.reshape(rpn_bbox_inside_weights, [-1, 4]), rpn_keep), [-1, 4])
        rpn_bbox_outside_weights = tf.reshape(tf.gather(tf.reshape(rpn_bbox_outside_weights, [-1, 4]), rpn_keep), [-1, 4])

        rpn_loss_box_n = tf.reduce_sum(self.smooth_l1_dist(
            rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), axis=[1])

        # rpn_loss_n = tf.reshape(rpn_cross_entropy_n + rpn_loss_box_n * 5, [-1])

        if ohem:
            # k = tf.minimum(tf.shape(rpn_cross_entropy_n)[0] / 2, 300)
            # # k = tf.shape(rpn_loss_n)[0] / 2
            # rpn_loss_n, top_k_indices = tf.nn.top_k(rpn_cross_entropy_n, k=k, sorted=False)
            # rpn_cross_entropy_n = tf.gather(rpn_cross_entropy_n, top_k_indices)
            # rpn_loss_box_n = tf.gather(rpn_loss_box_n, top_k_indices)

            # strategy: keeps all the positive samples
            fg_ = tf.equal(rpn_label, 1)
            bg_ = tf.equal(rpn_label, 0)
            pos_inds = tf.where(fg_)
            neg_inds = tf.where(bg_)
            rpn_cross_entropy_n_pos = tf.reshape(tf.gather(rpn_cross_entropy_n, pos_inds), [-1])
            rpn_cross_entropy_n_neg = tf.reshape(tf.gather(rpn_cross_entropy_n, neg_inds), [-1])
            top_k = tf.cast(tf.minimum(tf.shape(rpn_cross_entropy_n_neg)[0], 300), tf.int32)
            rpn_cross_entropy_n_neg, _ = tf.nn.top_k(rpn_cross_entropy_n_neg, k=top_k)
            rpn_cross_entropy = tf.reduce_sum(rpn_cross_entropy_n_neg) / (tf.reduce_sum(tf.cast(bg_, tf.float32)) + 1.0) \
                                + tf.reduce_sum(rpn_cross_entropy_n_pos) / (tf.reduce_sum(tf.cast(fg_, tf.float32)) + 1.0)

            rpn_loss_box_n = tf.reshape(tf.gather(rpn_loss_box_n, pos_inds), [-1])
            # rpn_cross_entropy_n = tf.concat(0, (rpn_cross_entropy_n_pos, rpn_cross_entropy_n_neg))

        # rpn_loss_box = 1 * tf.reduce_mean(rpn_loss_box_n)
        rpn_loss_box = tf.reduce_sum(rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1.0)

        # R-CNN
        # classification loss
        cls_score = self.get_output('cls_score') # (R, C+1)
        label = tf.reshape(self.get_output('roi-data')[1], [-1]) # (R)
        cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)

        # bounding box regression L1 loss
        bbox_pred = self.get_output('bbox_pred') # (R, (C+1)x4)
        bbox_targets = self.get_output('roi-data')[2] # (R, (C+1)x4)
        # each element is {0, 1}, represents background (0), objects (1)
        bbox_inside_weights = self.get_output('roi-data')[3] # (R, (C+1)x4)
        bbox_outside_weights = self.get_output('roi-data')[4] # (R, (C+1)x4)

        loss_box_n = tf.reduce_sum( \
            bbox_outside_weights * self.smooth_l1_dist(bbox_inside_weights * (bbox_pred - bbox_targets)), \
            axis=[1])

        loss_n = loss_box_n + cross_entropy_n
        loss_n = tf.reshape(loss_n, [-1])

        # if ohem:
        #     # top_k = 100
        #     top_k = tf.minimum(tf.shape(loss_n)[0] / 2, 500)
        #     loss_n, top_k_indices = tf.nn.top_k(loss_n, k=top_k, sorted=False)
        #     loss_box_n = tf.gather(loss_box_n, top_k_indices)
        #     cross_entropy_n = tf.gather(cross_entropy_n, top_k_indices)

        loss_box = tf.reduce_mean(loss_box_n)
        cross_entropy = tf.reduce_mean(cross_entropy_n)

        loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box

        # add regularizer
        if cfg.TRAIN.WEIGHT_DECAY > 0:
            regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            loss = tf.add_n(regularization_losses) + loss

        return loss, cross_entropy, loss_box, rpn_cross_entropy, rpn_loss_box
示例#36
0
            label = features['label']
            index = features['index']
            value = features['value']

            dense_feature = tf.sparse_to_dense(
                tf.sparse_tensor_to_dense(index),
                [
                    num_features,
                ],
                #                               tf.constant([33762578, 1], dtype=tf.int64),
                tf.sparse_tensor_to_dense(value))

            dense_feature = tf.reshape(dense_feature, [num_features, 1])
            dotProduct = tf.matmul(tf.transpose(w), dense_feature)
            y = tf.cast(label, tf.float32)
            error = tf.not_equal(tf.sign(dotProduct), y)

    ################### TEST ENDS #############################################
        with tf.Session("grpc://vm-32-%d:2222" %
                        (FLAGS.task_index + 1)) as sess:
            # only one client initializes the variable
            if FLAGS.task_index == 0:
                sess.run(tf.initialize_all_variables())

    # start queue runners
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            num_examples = 1000
            for i in xrange(0, 10000):
                sess.run(w)
                if ((i % 1000) == 0):
示例#37
0
 def error(self):
     mistakes = tf.not_equal(tf.argmax(self.target, 1),
                             tf.argmax(self.prediction, 1))
     return tf.reduce_mean(tf.cast(mistakes, tf.float32))
示例#38
0
def add_softmax_cross_entropy_loss_for_each_scale(
        scales_to_logits,
        labels,
        num_classes,
        dataset,
        loss_weight=1.0,
        upsample_logits=True,
        scope=None,
        enable_class_balancing=False):
    """Adds softmax cross entropy loss for logits of each scale.

  Args:
    scales_to_logits: A map from logits names for different scales to logits.
      The logits have shape [batch, logits_height, logits_width, num_classes].
    labels: Groundtruth labels with shape [batch, image_height, image_width, 1].
    num_classes: Integer, number of target classes.
    ignore_label: Integer, label to ignore.
    loss_weight: Float, loss weight.
    upsample_logits: Boolean, upsample logits or not.
    scope: String, the scope for the loss.

  Raises:
    ValueError: Label or logits is None.
  """
    if labels is None:
        raise ValueError('No label for softmax cross entropy loss.')

    for scale, logits in six.iteritems(scales_to_logits):
        loss_scope = None
        if scope:
            loss_scope = '%s_%s' % (scope, scale)

        if upsample_logits:
            # Label is not downsampled, and instead we upsample logits.
            logits = tf.image.resize_bilinear(logits,
                                              tf.shape(labels)[1:3],
                                              align_corners=True)
            scaled_labels = labels
        else:
            # Label is downsampled to the same size as logits.
            scaled_labels = tf.image.resize_nearest_neighbor(
                labels, tf.shape(logits)[1:3], align_corners=True)

        scaled_labels = tf.reshape(scaled_labels, shape=[-1])

        one_hot_labels = slim.one_hot_encoding(scaled_labels,
                                               num_classes,
                                               on_value=1.0,
                                               off_value=0.0)

        if enable_class_balancing:
            tf.logging.info('Using class balancing for loss function.')
            if dataset.cls_to_percentage is None:
                raise ValueError(
                    'Class balancing for {} currently not supported'.format(
                        dataset.name))

            class_weights = dataset.get_class_weights(
                dataset.labels_to_class, dataset.cls_to_percentage)
            class_weights = tf.constant(class_weights)
            weights = tf.reduce_sum(tf.multiply(one_hot_labels, class_weights),
                                    1)
        else:
            weights = tf.to_float(
                tf.not_equal(scaled_labels,
                             dataset.ignore_label)) * loss_weight
        tf.losses.softmax_cross_entropy(one_hot_labels,
                                        tf.reshape(logits,
                                                   shape=[-1, num_classes]),
                                        weights=weights,
                                        scope=loss_scope)
示例#39
0
    def compute_mask(self, inputs, mask=None):
        if not self.mask_zero:
            return None

        return tf.not_equal(inputs, 0)
示例#40
0
 def decode_sparse(self, include_stop_tokens=True):
     dense_symbols, logprobs = self.decode_dense()
     mask = tf.not_equal(dense_symbols, self.stop_token)
     if include_stop_tokens:
         mask = tf.concat(1, [tf.ones_like(mask[:, :1]), mask[:, :-1]])
     return sparse_boolean_mask(dense_symbols, mask), logprobs
 def error(self):
     mistakes = tf.not_equal(
         tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
     error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
     tf.summary.scalar("error", error)
     return error
示例#42
0
  def _parse_train_data(self, data):
    """Parse data for ShapeMask training."""
    classes = data['groundtruth_classes']
    boxes = data['groundtruth_boxes']
    masks = data['groundtruth_instance_masks']
    is_crowds = data['groundtruth_is_crowd']
    # Skips annotations with `is_crowd` = True.
    if self._skip_crowd_during_training and self._is_training:
      num_groundtrtuhs = tf.shape(classes)[0]
      with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
        indices = tf.cond(
            tf.greater(tf.size(is_crowds), 0),
            lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
            lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
      classes = tf.gather(classes, indices)
      boxes = tf.gather(boxes, indices)
      masks = tf.gather(masks, indices)

    # Gets original image and its size.
    image = data['image']
    image_shape = tf.shape(image)[0:2]

    # If not using category, makes all categories with id = 0.
    if not self._use_category:
      classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

    # Normalizes image with mean and std pixel values.
    image = input_utils.normalize_image(image)

    # Flips image randomly during training.
    if self._aug_rand_hflip:
      image, boxes, masks = input_utils.random_horizontal_flip(
          image, boxes, masks)

    # Converts boxes from normalized coordinates to pixel coordinates.
    boxes = box_utils.denormalize_boxes(boxes, image_shape)

    # Resizes and crops image.
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
        self._output_size,
        aug_scale_min=self._aug_scale_min,
        aug_scale_max=self._aug_scale_max)
    image_scale = image_info[2, :]
    offset = image_info[3, :]

    # Resizes and crops boxes and masks.
    boxes = input_utils.resize_and_crop_boxes(
        boxes, image_scale, self._output_size, offset)
    masks = input_utils.resize_and_crop_masks(
        tf.expand_dims(masks, axis=-1), image_scale, self._output_size, offset)
    masks = tf.squeeze(masks, axis=-1)

    # Filters out ground truth boxes that are all zeros.
    indices = input_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    masks = tf.gather(masks, indices)

    # Assigns anchors.
    input_anchor = anchor.Anchor(
        self._min_level, self._max_level, self._num_scales,
        self._aspect_ratios, self._anchor_size, self._output_size)
    anchor_labeler = anchor.AnchorLabeler(
        input_anchor, self._match_threshold, self._unmatched_threshold)
    (cls_targets,
     box_targets,
     num_positives) = anchor_labeler.label_anchors(
         boxes,
         tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

    # Sample groundtruth masks/boxes/classes for mask branch.
    num_masks = tf.shape(masks)[0]
    mask_shape = tf.shape(masks)[1:3]

    # Pad sampled boxes/masks/classes to a constant batch size.
    padded_boxes = input_utils.pad_to_fixed_size(boxes, self._num_sampled_masks)
    padded_classes = input_utils.pad_to_fixed_size(
        classes, self._num_sampled_masks)
    padded_masks = input_utils.pad_to_fixed_size(masks, self._num_sampled_masks)

    # Randomly sample groundtruth masks for mask branch training. For the image
    # without groundtruth masks, it will sample the dummy padded tensors.
    rand_indices = tf.random.uniform(
        [self._num_sampled_masks],
        minval=0,
        maxval=tf.maximum(num_masks, 1),
        dtype=tf.dtypes.int32)
    sampled_boxes = tf.gather(padded_boxes, rand_indices)
    sampled_classes = tf.gather(padded_classes, rand_indices)
    sampled_masks = tf.gather(padded_masks, rand_indices)
    # Jitter the sampled boxes to mimic the noisy detections.
    sampled_boxes = box_utils.jitter_boxes(
        sampled_boxes, noise_scale=self._box_jitter_scale)

    # Compute mask targets in feature crop. A feature crop fully contains a
    # sampled box.
    mask_outer_boxes = box_utils.compute_outer_boxes(
        sampled_boxes, mask_shape, scale=self._outer_box_scale)
    norm_mask_outer_boxes = box_utils.normalize_boxes(
        mask_outer_boxes, mask_shape)

    # Set sampled_masks shape to [batch_size, height, width, 1].
    sampled_masks = tf.expand_dims(sampled_masks, axis=-1)
    mask_targets = tf.image.crop_and_resize(
        sampled_masks,
        norm_mask_outer_boxes,
        box_ind=tf.range(self._num_sampled_masks),
        crop_size=[self._mask_crop_size, self._mask_crop_size],
        method='bilinear',
        extrapolation_value=0,
        name='train_mask_targets')
    mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                            tf.ones_like(mask_targets),
                            tf.zeros_like(mask_targets))
    mask_targets = tf.squeeze(mask_targets, axis=-1)

    # If bfloat16 is used, casts input image to tf.bfloat16.
    if self._use_bfloat16:
      image = tf.cast(image, dtype=tf.bfloat16)

    # Packs labels for model_fn outputs.
    labels = {
        'cls_targets': cls_targets,
        'box_targets': box_targets,
        'anchor_boxes': input_anchor.multilevel_boxes,
        'num_positives': num_positives,
        'image_info': image_info,
        # For ShapeMask.
        'mask_boxes': sampled_boxes,
        'mask_outer_boxes': mask_outer_boxes,
        'mask_targets': mask_targets,
        'mask_classes': sampled_classes,
        'mask_is_valid': tf.cast(tf.not_equal(num_masks, 0), tf.int32)
    }
    return image, labels
示例#43
0
    def compute_loss(self, y_true1, y_pred):
        '''
        Compute the loss of the SSD model prediction against the ground truth.

        Arguments:
            y_true1 (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12  (NEW Here)+5(gt label,xmin,ymin,xmax,ymax))`,
                where `#boxes` is the total number of boxes that the model predicts
                per image. Be careful to make sure that the index of each given
                box in `y_true` is the same as the index for the corresponding
                box in `y_pred`. The last axis must have length `#classes + 12` and contain
                `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]`
                in this order, including the background class. The last eight entries of the
                last axis are not used by this function and therefore their contents are
                irrelevant, they only exist so that `y_true` has the same shape as `y_pred`,
                where the last four entries of the last axis contain the anchor box
                coordinates, which are needed during inference. Important: Boxes that
                you want the cost function to ignore need to have a one-hot
                class vector of all zeros.
            y_pred (Keras tensor): The model prediction. The shape is identical
                to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`.
                The last axis must contain entries in the format
                `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`.

        Returns:
            A scalar, the total multitask loss for classification and localization.
        '''
        # arm total loss
        y_true = y_true1[:, :, :-5]

        self.neg_pos_ratio = tf.constant(self.neg_pos_ratio)
        self.n_neg_min = tf.constant(self.n_neg_min)
        self.alpha = tf.constant(self.alpha)

        batch_size = tf.shape(y_pred)[0]  # Output dtype: tf.int32
        n_boxes = tf.shape(
            y_pred
        )[1]  # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image, not the number of boxes per cell.

        positives_arm = tf.reduce_sum(y_true[:, :, 1:self.n_class],
                                      axis=2,
                                      keepdims=True)
        y_true_arm = y_true[:, :, 0:1]
        y_true_arm = tf.concat([y_true_arm, positives_arm], axis=-1)

        # 1: Compute the losses for class and box predictions for every box.
        classification_loss = tf.to_float(
            self.log_loss(y_true_arm[:, :, :],
                          y_pred[:, :,
                                 0:2]))  # Output shape: (batch_size, n_boxes)
        localization_loss = tf.to_float(
            self.smooth_L1_loss(
                y_true[:, :, -12:-8],
                y_pred[:, :, 2:6]))  # Output shape: (batch_size, n_boxes)

        # 2: Compute the classification losses for the positive and negative targets.
        # Create masks for the positive and negative ground truth classes.
        negatives = y_true_arm[:, :,
                               0]  # Tensor of shape (batch_size, n_boxes)
        positives = y_true_arm[:, :,
                               1]  # Tensor of shape (batch_size, n_boxes)

        # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch.
        n_positive = tf.reduce_sum(positives)
        # n_positive = tf.Print(n_positive, [n_positive],
        #                        message='Debug message arm_n_positive:',
        #                        first_n=10000, summarize=100000)
        # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item
        # (Keras loss functions must output one scalar loss value PER batch item, rather than just
        # one scalar for the entire batch, that's why we're not summing across all axes).
        pos_class_loss = tf.reduce_sum(
            classification_loss * positives,
            axis=-1)  # Tensor of shape (batch_size,)
        # Compute the classification loss for the negative default boxes (if there are any).
        # First, compute the classification loss for all negative boxes.
        neg_class_loss_all = classification_loss * negatives  # Tensor of shape (batch_size, n_boxes)
        n_neg_losses = tf.count_nonzero(
            neg_class_loss_all, dtype=tf.int32
        )  # The number of non-zero loss entries in `neg_class_loss_all`
        # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification
        # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is
        # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with
        # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative
        # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive
        # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation.
        # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`,
        # is at most the number of negative boxes for which there is a positive classification loss.

        # Compute the number of negative examples we want to account for in the loss.
        # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller).
        # 这里是计算,最小的topk个负样本的loss值的坐标,方便后面取出,数量是self.neg_pos_ratio*正样本的数量
        # (Here is the coordinate of the loss value of the smallest topk negative sample, which is easy to take out later.total num is self.neg_pos_ratio*npositive )
        n_negative_keep = tf.minimum(
            tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive),
                       self.n_neg_min), n_neg_losses)

        # In the unlikely case when either (1) there are no negative ground truth boxes at all
        # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`.
        def f1():
            return tf.zeros([batch_size])

        # Otherwise compute the negative loss.
        def f2():
            # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
            # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
            # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.
            # To do this, we reshape `neg_class_loss_all` to 1D...
            neg_class_loss_all_1D = tf.reshape(
                neg_class_loss_all,
                [-1])  # Tensor of shape (batch_size * n_boxes,)
            # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
            values, indices = tf.nn.top_k(
                neg_class_loss_all_1D, k=n_negative_keep,
                sorted=False)  # We don't need them sorted.
            # ...and with these indices we'll create a mask...
            negatives_keep = tf.scatter_nd(
                indices=tf.expand_dims(indices, axis=1),
                updates=tf.ones_like(indices, dtype=tf.int32),
                shape=tf.shape(neg_class_loss_all_1D
                               ))  # Tensor of shape (batch_size * n_boxes,)
            negatives_keep = tf.to_float(
                tf.reshape(negatives_keep,
                           [batch_size, n_boxes
                            ]))  # Tensor of shape (batch_size, n_boxes)
            # ...and use it to keep only those boxes and mask all other classification losses
            neg_class_loss = tf.reduce_sum(
                classification_loss * negatives_keep,
                axis=-1)  # Tensor of shape (batch_size,)
            return neg_class_loss

        neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1,
                                 f2)
        class_loss_arm = pos_class_loss + neg_class_loss  # Tensor of shape (batch_size,)
        # new 只计算正样本loss
        # 3: Compute the localization loss for the positive targets.
        #    We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to).
        loc_loss_arm = tf.reduce_sum(localization_loss * positives,
                                     axis=-1)  # Tensor of shape (batch_size,)
        total_loss_arm = (class_loss_arm + self.alpha *
                          loc_loss_arm) / tf.maximum(1.0, n_positive)
        ########################odm loss###########################
        # # arm预测的loc 将其decode作为新的gtbox坐标(The loc predicted by arm takes its decode as a new gtbox coordinate )
        y_pred_decoded_raw = y_pred[:, :, 2:14]
        #(0-12):gt(cx, cy ,w, h) prior(cx ,cy ,w, h) variance(0.1,0.1,0.2,0.2)
        # xmin, ymin, xmax, ymax decode
        hpref = tf.exp(y_pred_decoded_raw[:, :, 3:4] *
                       y_pred_decoded_raw[:, :, -1:])
        wpref = tf.exp(
            y_pred_decoded_raw[:, :, 2:3] * y_pred_decoded_raw[:, :, -2:-1]
        )  # exp(ln(w(pred)/w(anchor)) / w_variance * w_variance) == w(pred) / w(anchor), exp(ln(h(pred)/h(anchor)) / h_variance * h_variance) == h(pred) / h(anchor)

        hpref = hpref * y_pred_decoded_raw[:, :, -5:-4]
        wpref = wpref * y_pred_decoded_raw[:, :, -6:-5]

        cypref = y_pred_decoded_raw[:, :, 1:
                                    2] * y_pred_decoded_raw[:, :, -3:
                                                            -2] * y_pred_decoded_raw[:, :,
                                                                                     -5:
                                                                                     -4]
        cxpref = y_pred_decoded_raw[:, :, 0:
                                    1] * y_pred_decoded_raw[:, :, -4:
                                                            -3] * y_pred_decoded_raw[:, :,
                                                                                     -6:
                                                                                     -5]

        cypref = cypref + y_pred_decoded_raw[:, :, -7:-6]
        cxpref = cxpref + y_pred_decoded_raw[:, :, -8:-7]

        xmin_a = (cxpref - wpref / 2.0) * 320  # Set xmin

        ymin_a = (cypref - hpref / 2.0) * 320  # Set ymin

        xmax_a = (cxpref + wpref / 2.0) * 320  # Set xmax
        ymax_a = (cypref + hpref / 2.0) * 320  # Set ymax

        vol_anchors = (xmax_a - xmin_a) * (ymax_a - ymin_a)
        # 重新匹配所有gtbox和arm loc deode后的坐标,即将arm loc的坐标encode为gt,然后作为refine后的true给odm部分计算loss
        # Rematch all the coordinates after gtbox and arm loc deode, that is, the coordinate encode of arm loc is gt, and then calculate loss to the odm part as true after refine
        # gt_labels存放一个batch里面所有gtboxxe和class,(batch_size,anchors_id,(class,xmin, ymin, xmax, ymax))
        # Gt_labels holds all gtboxes and class in a batch
        gt_bboxes = y_true1[:, :, self.n_class + 12:]
        gt_labels = y_true1[:, :, self.n_class + 12:self.n_class + 13]
        #
        gt_num_max = self.gt_num_max

        # # 初始化各参数
        # Initialization parameters
        feat_labels = tf.cast(
            tf.zeros_like(y_true1[:, :, 0:1]), tf.int32
        )  # 存放默认框匹配的GTbox标签(Store refine anchor  matching gtbox tags )
        feat_scores = tf.zeros_like(
            y_true1[:, :, 0:1]
        )  # 存放默认框与匹配的GTbox的IOU(交并比)(Store refine anchor  matching gtbox iou )
        feat_matched = tf.cast(
            tf.zeros_like(y_true1[:, :, 0:1]), tf.int32
        )  #存放后续过滤等操作的标记样本,作为mask操作的判断依据(Store tag samples for subsequent filtering and other operations as the basis for judging mask operations )
        feat_gtnum = tf.cast(tf.zeros_like(y_true1[:, :, 0:1]), tf.int32)
        feat_ymin = tf.zeros_like(
            y_true1[:, :, 0:1]
        )  # 存放默认框匹配到的GTbox的坐标信息(Store the coordinate information of the refine anchor matching gtbox)
        feat_xmin = tf.zeros_like(y_true1[:, :, 0:1])
        feat_ymax = tf.zeros_like(y_true1[:, :, 0:1])
        feat_xmax = tf.zeros_like(y_true1[:, :, 0:1])

        def jaccard_with_anchors(label, bbox):  # 计算重叠度函数(cal iou)
            # 计算iou
            int_xmin = tf.maximum(label[:, :, 0:1], bbox[:, :, 0:1])
            int_ymin = tf.maximum(label[:, :, 1:2], bbox[:, :, 1:2])
            int_xmax = tf.minimum(label[:, :, 2:3], bbox[:, :, 2:3])
            int_ymax = tf.minimum(label[:, :, 3:4], bbox[:, :, 3:4])

            h = tf.maximum(int_ymax - int_ymin, 0.)
            w = tf.maximum(int_xmax - int_xmin, 0.)
            # Volumes.
            inter_vol = h * w

            union_vol = vol_anchors - inter_vol + (
                label[:, :, 2:3] - label[:, :, 0:1]) * (label[:, :, 3:4] -
                                                        label[:, :, 1:2])
            # iou scores
            jaccard = tf.div(inter_vol, union_vol)
            return jaccard

        def condition(
                i,
                feat_labels,
                feat_scores,
                feat_gtnum,  # 循环条件
                feat_xmin,
                feat_ymin,
                feat_xmax,
                feat_ymax):
            # 循环每个image内所有gt box(Loop through all gt box within each image)
            r = tf.less(
                tf.cast(i, dtype=tf.float32), gt_num_max
            )  # tf.shape(labels)GTbox num,if i<=tf.shape(labels) return True
            return r

        def body(
                i,
                feat_labels,
                feat_scores,
                feat_gtnum,  # 循环执行主体
                feat_xmin,
                feat_ymin,
                feat_xmax,
                feat_ymax):
            """
            寻找每个GTbox与所有anchor的iou,根据每次iou的分数更新iou,大于上一步iou的就存入新的iou以及其它相应的标记和坐标值
            Find each gtbox with all anchor's iou, updates iou, greater than the previous iou's score based on each iou score save new iou and other
            corresponding marking and coordinate values
            """
            # Jaccard score.

            label = tf.concat([
                gt_bboxes[:, i:i + 1, 1:2], gt_bboxes[:, i:i + 1, 2:3],
                gt_bboxes[:, i:i + 1, 3:4], gt_bboxes[:, i:i + 1, 4:5]
            ],
                              axis=-1)
            bbox = tf.concat([
                xmin_a[:, :, 0:1], ymin_a[:, :, 0:1], xmax_a[:, :, 0:1],
                ymax_a[:, :, 0:1]
            ],
                             axis=-1)

            jaccard = jaccard_with_anchors(
                label, bbox
            )  # 计算每个batch的真实框与与arm decode生成的所有框的交并比(Calculate the intersection of the gt box of each batch with all boxes generated by the arm decode )

            # Mask: check threshold + scores + no annotations + num_classes.
            mask = tf.greater(
                jaccard, feat_scores
            )  # 交并比是否比之前匹配的GTbox大(Intersection is larger than previous matching gtbox )
            mask1 = tf.equal(y_true_arm[:, :, 0:1], 1)
            mask1 = tf.logical_and(mask1,
                                   tf.greater_equal(y_pred[:, :, 0:1], 0.99))

            mask1 = tf.logical_not(mask1)
            mask = tf.logical_and(mask, mask1)
            imask = tf.cast(mask, tf.int32)  # 转型
            fmask = tf.cast(mask, tf.float32)  # dtype float32

            feat_labels = imask * tf.cast(
                gt_labels[:, i:i + 1, 0:1], tf.int32
            ) + (
                1 - imask
            ) * feat_labels  # 当imask为1时更新标签(1 - imask)即把交并比大的位置的mask变成0,其他位置变为1,变为0的位置更新标记值
            feat_gtnum = imask * tf.cast(i, tf.int32) + (
                1 - imask
            ) * feat_gtnum  # When imask is 1, the update tag (1-imask) changes the mask of the intersection and larger position to 0, the other positions to 1, and the position to 0 to update the tag value

            feat_scores = tf.where(mask, jaccard, feat_scores)

            feat_xmin = fmask * label[:, :, 0:1] + (
                1 - fmask
            ) * feat_xmin  # 当fmask为1.0时更新坐标信息(Update coordinate information when fmask is 1.0 )
            feat_ymin = fmask * label[:, :, 1:2] + (1 - fmask) * feat_ymin
            feat_xmax = fmask * label[:, :, 2:3] + (1 - fmask) * feat_xmax
            feat_ymax = fmask * label[:, :, 3:4] + (1 - fmask) * feat_ymax

            return [
                i + 1, feat_labels, feat_scores, feat_gtnum, feat_xmin,
                feat_ymin, feat_xmax, feat_ymax
            ]

        i = 0

        [
            i, feat_labels, feat_scores, feat_gtnum, feat_xmin, feat_ymin,
            feat_xmax, feat_ymax
        ] = tf.while_loop(
            condition,
            body,  # tf.while_loop是一个循环函数condition是循环条件,body是循环体
            [
                i,
                feat_labels,
                feat_scores,
                feat_gtnum,  # 第三项是参数
                feat_xmin,
                feat_ymin,
                feat_xmax,
                feat_ymax
            ])

        def condition2(i, feat_labels, feat_scores, feat_matched, feat_gtnum,
                       feat_xmin, feat_ymin, feat_xmax, feat_ymax):

            r = tf.less(
                tf.cast(i, dtype=tf.float32), gt_num_max
            )  # tf.shape(labels)GTbox的个数,当i<=tf.shape(labels)是返回True

            return r

        def body2(
                i,
                feat_labels,
                feat_scores,
                feat_matched,
                feat_gtnum,  # 循环执行主体
                feat_xmin,
                feat_ymin,
                feat_xmax,
                feat_ymax):
            """这一步操作和上一步类似,不过是为每个gtbox匹配一个最大iou的anchor,同时标记这个anchor,方便第二步为每个anchor匹配一个gtbox
            This step is similar to the previous step, except that the anchor, that matches a maximum iou for each gtbox is tagged at the same
            time that the anchor, is convenient for the second step to match a gtbox for each anchor
            """
            #找寻每一个gtbox的最大iou anchor,然后标记成1保留,方便后续阈值过滤(Find the maximum iouanchor, for each gtbox and mark it as 1 reserved for subsequent threshold filtering )

            mask = tf.equal(feat_gtnum, i)
            # 取出feat_scores里面对应每个gtbox的iou(Take out the iou of each gtbox in the feat_scores )
            tmp = tf.where(mask, feat_scores, tf.zeros_like(feat_scores))
            # 计算每个gtbox的iou最大的anchor(Calculate the maximum anchor of the iou for each gtbox)
            max_score = tf.reduce_max(tf.reshape(tmp, shape=[1, -1]))
            # 将其坐标做成模板(Make its coordinates a template )
            mask = tf.equal(tmp, max_score)
            mask = tf.logical_and(mask, tf.greater(tmp, 0))

            mask = tf.logical_and(mask, tf.not_equal(feat_matched, 1))
            # 该模板在feat_matched里面标记为1(The template is marked 1 in the feat_matched )
            imask = tf.cast(mask, tf.int32)  # 转型

            feat_matched = imask * tf.cast(1, tf.int32) + feat_matched  #

            return [
                i + 1, feat_labels, feat_scores, feat_matched, feat_gtnum,
                feat_xmin, feat_ymin, feat_xmax, feat_ymax
            ]

        i = 0
        [
            i, feat_labels, feat_scores, feat_matched, feat_gtnum, feat_xmin,
            feat_ymin, feat_xmax, feat_ymax
        ] = tf.while_loop(
            condition2,
            body2,  # tf.while_loop是一个循环函数condition是循环条件,body是循环体
            [
                i, feat_labels, feat_scores, feat_matched, feat_gtnum,
                feat_xmin, feat_ymin, feat_xmax, feat_ymax
            ])

        mask = tf.equal(feat_matched, 1)

        mask = tf.logical_or(mask, tf.greater_equal(feat_scores, 0.5))
        feat_labels = tf.where(mask, feat_labels, tf.zeros_like(feat_labels))

        feat_xmin = tf.where(mask, feat_xmin, tf.zeros_like(feat_xmin))
        feat_ymin = tf.where(mask, feat_ymin, tf.zeros_like(feat_ymin))
        feat_xmax = tf.where(mask, feat_xmax, tf.zeros_like(feat_xmax))
        feat_ymax = tf.where(mask, feat_ymax, tf.zeros_like(feat_ymax))

        feat_matched = tf.where(mask, 2 * tf.ones_like(feat_matched),
                                feat_matched)

        # Transform to center / size. 转换回中心坐标以及宽高(Converted back to center coordinates and width and height )
        feat_cy = (feat_ymax + feat_ymin) / 2. / 320
        feat_cx = (feat_xmax + feat_xmin) / 2. / 320
        feat_h = (feat_ymax - feat_ymin) / 320.
        feat_w = (feat_xmax - feat_xmin) / 320.

        prior_scaling = [0.1, 0.1, 0.2, 0.2]

        feat_cx = (feat_cx - cxpref) / (wpref * prior_scaling[0])
        feat_cy = (feat_cy - cypref) / (
            hpref * prior_scaling[1]
        )  # refine框中心与匹配的真实框中心坐标偏差(Central coordinate deviation between refine anchor and matching gt Box )

        feat_w = tf.log(tf.maximum(
            (feat_w) / (wpref), 1e-15)) / prior_scaling[2]
        feat_h = tf.log(tf.maximum((feat_h) / (hpref), 1e-15)) / prior_scaling[
            3]  # 高和宽的偏差(Deviation of height and width )

        feat_labels1 = tf.cast(tf.one_hot(feat_labels, self.n_class, axis=-1),
                               dtype=tf.int32)

        feat_labels_reshape = tf.reshape(
            feat_labels1,
            shape=[tf.shape(feat_labels1)[0],
                   tf.shape(feat_labels1)[1], -1])
        # 生成新的y_true用来计算odm部分的loss(Generate a new y_true to calculate the loss of the odm section )
        y_refine = tf.concat([
            tf.cast(feat_labels_reshape, dtype=tf.float32), feat_cx, feat_cy,
            feat_w, feat_h, cxpref, cypref, wpref, hpref, y_pred[:, :, 10:14]
        ],
                             axis=-1)

        # odm total loss
        classification_loss = tf.to_float(
            self.log_loss(
                y_refine[:, :, :-12],
                y_pred[:, :, 14:-12]))  # Output shape: (batch_size, n_boxes)

        localization_loss = tf.to_float(
            self.smooth_L1_loss(
                y_refine[:, :, self.n_class:self.n_class + 4],
                y_pred[:, :, -12:-8]))  # Output shape: (batch_size, n_boxes)

        # 2: Compute the classification losses for the positive and negative targets.
        # Create masks for the positive and negative ground truth classes.
        negatives = y_refine[:, :, 0]  # Tensor of shape (batch_size, n_boxes)
        positives = tf.to_float(
            tf.reduce_max(y_refine[:, :, 1:self.n_class],
                          axis=-1))  # Tensor of shape (batch_size, n_boxes)

        mask = tf.equal(y_true_arm[:, :, 0], 1)
        mask = tf.logical_and(mask, tf.greater_equal(y_pred[:, :, 0], 0.99))
        # # 过滤负样本中iou>0.3的样本,这部分样本不计入loss(Filter samples with iou > 0. 3 in negative samples, which are not included in loss)
        mask1 = tf.not_equal(feat_matched[:, :, 0], 2)
        mask1 = tf.logical_and(mask1,
                               tf.greater_equal(feat_scores[:, :, 0], 0.3))
        mask = tf.logical_or(mask, mask1)

        # 将正负样本中满足mask条件的样本过滤掉,不计算loss回传更新参数(Filter out samples satisfying mask condition in positive and negative samples without calculating loss return update parameters)
        positives = tf.where(mask, tf.zeros_like(positives), positives)
        negatives = tf.where(mask, tf.zeros_like(negatives), negatives)

        n_positive = tf.reduce_sum(positives)

        # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item
        # (Keras loss functions must output one scalar loss value PER batch item, rather than just
        # one scalar for the entire batch, that's why we're not summing across all axes).
        pos_class_loss = tf.reduce_sum(
            classification_loss * positives,
            axis=-1)  # Tensor of shape (batch_size,)
        # Compute the classification loss for the negative default boxes (if there are any).
        # First, compute the classification loss for all negative boxes.
        neg_class_loss_all = classification_loss * negatives  # Tensor of shape (batch_size, n_boxes)
        n_neg_losses = tf.count_nonzero(
            neg_class_loss_all, dtype=tf.int32
        )  # The number of non-zero loss entries in `neg_class_loss_all`
        # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification
        # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is
        # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with
        # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative
        # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive
        # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation.
        # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`,
        # is at most the number of negative boxes for which there is a positive classification loss.

        # Compute the number of negative examples we want to account for in the loss.
        # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller).
        n_negative_keep = tf.minimum(
            tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive),
                       self.n_neg_min), n_neg_losses)

        # In the unlikely case when either (1) there are no negative ground truth boxes at all
        # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`.
        def f1():
            return tf.zeros([batch_size])

        # Otherwise compute the negative loss.
        def f2():
            # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
            # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
            # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.

            # To do this, we reshape `neg_class_loss_all` to 1D...
            neg_class_loss_all_1D = tf.reshape(
                neg_class_loss_all,
                [-1])  # Tensor of shape (batch_size * n_boxes,)
            # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
            values, indices = tf.nn.top_k(
                neg_class_loss_all_1D, k=n_negative_keep,
                sorted=False)  # We don't need them sorted.
            # ...and with these indices we'll create a mask...
            negatives_keep = tf.scatter_nd(
                indices=tf.expand_dims(indices, axis=1),
                updates=tf.ones_like(indices, dtype=tf.int32),
                shape=tf.shape(neg_class_loss_all_1D
                               ))  # Tensor of shape (batch_size * n_boxes,)
            negatives_keep = tf.to_float(
                tf.reshape(negatives_keep,
                           [batch_size, n_boxes
                            ]))  # Tensor of shape (batch_size, n_boxes)
            # ...and use it to keep only those boxes and mask all other classification losses
            neg_class_loss = tf.reduce_sum(
                classification_loss * negatives_keep,
                axis=-1)  # Tensor of shape (batch_size,)
            return neg_class_loss

        neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1,
                                 f2)
        class_loss_odm = pos_class_loss + neg_class_loss  # Tensor of shape (batch_size,)

        # 3: Compute the localization loss for the positive targets.
        #    We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to).
        loc_loss_odm = tf.reduce_sum(localization_loss * positives,
                                     axis=-1)  # Tensor of shape (batch_size,)
        # 4: Compute the total loss.
        total_loss_odm = (class_loss_odm +
                          self.alpha * loc_loss_odm) / tf.maximum(
                              1.0, n_positive)  # In case `n_positive == 0`

        # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case
        # because the relevant criterion to average our loss over is the number of positive boxes in the batch
        # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging
        # over the batch size, we'll have to multiply by it.
        # 将两部分loss相加(Add two parts of loss )
        total_loss = (total_loss_odm +
                      total_loss_arm) * tf.to_float(batch_size)

        return total_loss
示例#44
0
    def build():
        """Builds the Tensorflow graph."""
        inputs, labels, lengths = None, None, None

        if mode in ('train', 'eval'):
            if isinstance(no_event_label, numbers.Number):
                label_shape = []
            else:
                label_shape = [len(no_event_label)]
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                label_shape=label_shape,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        if isinstance(encoder_decoder,
                      magenta.music.OneHotIndexEventSequenceEncoderDecoder):
            expanded_inputs = tf.one_hot(
                tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
                encoder_decoder.input_depth)
        else:
            expanded_inputs = inputs

        dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

        if hparams.use_cudnn:
            outputs, initial_state, final_state = make_cudnn(
                expanded_inputs,
                hparams.rnn_layer_sizes,
                hparams.batch_size,
                mode,
                dropout_keep_prob=dropout_keep_prob,
                residual_connections=hparams.residual_connections)

        else:
            cell = make_rnn_cell(
                hparams.rnn_layer_sizes,
                dropout_keep_prob=dropout_keep_prob,
                attn_length=hparams.attn_length,
                residual_connections=hparams.residual_connections)

            initial_state = cell.zero_state(hparams.batch_size, tf.float32)

            outputs, final_state = tf.nn.dynamic_rnn(
                cell,
                inputs,
                sequence_length=lengths,
                initial_state=initial_state,
                swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        if isinstance(num_classes, numbers.Number):
            num_logits = num_classes
        else:
            num_logits = sum(num_classes)
        logits_flat = contrib_layers.linear(outputs_flat, num_logits)

        if mode in ('train', 'eval'):
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            if isinstance(num_classes, numbers.Number):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
                predictions_flat = tf.argmax(logits_flat, axis=1)
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax_cross_entropy = []
                predictions = []
                for i in range(len(num_classes)):
                    softmax_cross_entropy.append(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            labels=labels_flat[:, i],
                            logits=logits_flat[:, logits_offsets[i]:
                                               logits_offsets[i + 1]]))
                    predictions.append(
                        tf.argmax(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            axis=1))
                predictions_flat = tf.stack(predictions, 1)

            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            # Compute the total number of time steps across all sequences in the
            # batch. For some models this will be different from the number of RNN
            # steps.
            def batch_labels_to_num_steps(batch_labels, lengths):
                num_steps = 0
                for labels, length in zip(batch_labels, lengths):
                    num_steps += encoder_decoder.labels_to_num_steps(
                        labels[:length])
                return np.float32(num_steps)

            num_steps = tf.py_func(batch_labels_to_num_steps,
                                   [labels, lengths], tf.float32)

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                loss_per_step = tf.reduce_sum(
                    softmax_cross_entropy) / num_steps
                perplexity_per_step = tf.exp(loss_per_step)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = contrib_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                    'metrics/loss_per_step': loss_per_step,
                    'metrics/perplexity_per_step': perplexity_per_step,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                        'metrics/loss_per_step':
                        tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) /
                                        num_steps,
                                        weights=num_steps),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])
                vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
                    vars_to_summarize['metrics/loss_per_step'])

            for var_name, var_value in six.iteritems(vars_to_summarize):
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            if isinstance(num_classes, numbers.Number):
                softmax_flat = tf.nn.softmax(
                    tf.div(logits_flat, tf.fill([num_classes], temperature)))
                softmax = tf.reshape(softmax_flat,
                                     [hparams.batch_size, -1, num_classes])
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax = []
                for i in range(len(num_classes)):
                    sm = tf.nn.softmax(
                        tf.div(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            tf.fill([num_classes[i]], temperature)))
                    sm = tf.reshape(sm,
                                    [hparams.batch_size, -1, num_classes[i]])
                    softmax.append(sm)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf_nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf_nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
示例#45
0
 def cond(i, base_state, high_states, prev_y, prev_emb, y_array):
     return tf.logical_and(
         tf.less(i, self.translation_maxlen),
         tf.reduce_any(tf.not_equal(prev_y, 0)))
示例#46
0
    def model_fn(features, targets, mode):
        """Creates the prediction, loss, and train ops.

    Args:
      features: A dictionary of tensors keyed by the feature name.
      targets: A tensor representing the labels (targets).
      mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

    Returns:
      A tuple consisting of the prediction, loss, and train_op.
    """
        # Deep-copy the model hparams between modes to eliminate
        # side-effects caused by abuse of the linked problem_hparams
        # objects which are used to share modality objects between
        # problems.  We do not want to share the modality objects between
        # modes, since the modality objects may decide to do something
        # mode-specific.  A better fix would be to stop abusing the
        # hparams in this way and instead use a separate dictionary to
        # share the modality objects between problems.  This dictionary
        # could be created once per mode and passed to the constructor of
        # t2t_model.
        my_hp = copy.deepcopy(hparams)
        if mode == tf.contrib.learn.ModeKeys.INFER:
            if FLAGS.decode_interactive:
                features = _interactive_input_tensor_to_features_dict(
                    features, my_hp)
            elif FLAGS.decode_from_file:
                features = _decode_input_tensor_to_features_dict(
                    features, my_hp)
        # A dictionary containing:
        #  - problem_choice: A Tensor containing an integer indicating which problem
        #                    was selected for this run.
        #  - predictions: A Tensor containing the model's output predictions.
        run_info = dict()
        run_info["problem_choice"] = features["problem_choice"]

        if targets is not None:
            features["targets"] = targets

        dp = devices.data_parallelism()

        # Add input statistics for incoming features.
        with tf.name_scope("input_stats"):
            for (k, v) in six.iteritems(features):
                if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1:
                    tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n)
                    tf.summary.scalar("%s_length" % k, tf.shape(v)[1])
                    nonpadding = tf.to_float(tf.not_equal(v, 0))
                    tf.summary.scalar("%s_nonpadding_tokens" % k,
                                      tf.reduce_sum(nonpadding))
                    tf.summary.scalar("%s_nonpadding_fraction" % k,
                                      tf.reduce_mean(nonpadding))

        tf.get_variable_scope().set_initializer(initializer())
        train = mode == tf.contrib.learn.ModeKeys.TRAIN

        # Get multi-problem logits and loss based on features["problem_choice"].
        loss_variable_names = []

        def nth_model(n):
            """Build the model for the n-th problem, plus some added variables."""
            model_class = registry.model(model)(
                my_hp, mode, my_hp.problems[n], n, dp,
                devices.ps_devices(all_workers=True))
            if mode == tf.contrib.learn.ModeKeys.INFER:
                return model_class.infer(
                    features,
                    beam_size=FLAGS.decode_beam_size,
                    top_beams=(FLAGS.decode_beam_size
                               if FLAGS.decode_return_beams else 1),
                    last_position_only=FLAGS.decode_use_last_position_only,
                    alpha=FLAGS.decode_alpha,
                    decode_length=FLAGS.decode_extra_length)
            # In distributed mode, we build graph for problem=0 and problem=worker_id.
            skipping_is_on = my_hp.problem_choice == "distributed" and train
            problem_worker_id = FLAGS.worker_id % len(my_hp.problems)
            skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id
            # On worker 0 also build graph for problems <= 1.
            # TODO(lukaszkaiser): why is this hack needed for variables init? Repair.
            skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1)
            if (FLAGS.eval_run_autoregressive
                    and mode == tf.contrib.learn.ModeKeys.EVAL):
                sharded_logits, losses_dict = model_class.eval_autoregressive(
                    features)
            else:
                sharded_logits, losses_dict = model_class.model_fn(
                    features, skip=(skipping_is_on and skip_this_one))
            with tf.variable_scope("losses_avg"):
                total_loss, ops = 0.0, []
                for loss_key, loss_value in six.iteritems(losses_dict):
                    loss_name = "problem_%d/%s_loss" % (n, loss_key)
                    loss_moving_avg = tf.get_variable(loss_name,
                                                      initializer=100.0,
                                                      trainable=False)
                    loss_variable_names.append(loss_name)
                    ops.append(
                        loss_moving_avg.assign(loss_moving_avg * 0.9 +
                                               loss_value * 0.1))
                    total_loss += loss_value
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    # Total loss was already constructed on input.
                    loss_moving_avg = tf.get_variable("problem_%d/total_loss" %
                                                      n)
                ops.append(
                    loss_moving_avg.assign(loss_moving_avg * 0.9 +
                                           total_loss * 0.1))
            with tf.variable_scope(
                    "train_stats"):  # Count steps for this problem.
                problem_steps = tf.get_variable("problem_%d_steps" % n,
                                                initializer=0,
                                                trainable=False)
                ops.append(problem_steps.assign_add(1))
            with tf.control_dependencies(ops):  # Make sure the ops run.
                # Ensure the loss is a scalar here.
                total_loss = tf.reshape(total_loss, [],
                                        name="total_loss_control_id")
            return [total_loss
                    ] + sharded_logits  # Need to flatten for cond later.

        result_list = input_fn_builder.cond_on_index(
            nth_model, features["problem_choice"], 0,
            len(my_hp.problems) - 1)

        if mode == tf.contrib.learn.ModeKeys.INFER:
            # Beam search in sequence model returns both decodes withe key "outputs"
            # and scores with they key "scores". If return list is a dict, we expect
            # that it will have keys "outputs", a tensor of int32 and scores, a
            # tensor of floats. This is useful if we want to return scores from
            # estimator.predict
            if not isinstance(result_list, dict):
                ret = {"outputs": result_list}, None, None
            else:
                ret = {
                    "outputs": result_list["outputs"],
                    "scores": result_list["scores"]
                }, None, None
            if "inputs" in features:
                ret[0]["inputs"] = features["inputs"]
            if "infer_targets" in features:
                ret[0]["targets"] = features["infer_targets"]
            return ret

        sharded_logits, total_loss = result_list[1:], result_list[0]
        if mode == tf.contrib.learn.ModeKeys.EVAL:
            logits = tf.concat(sharded_logits, 0)
            if FLAGS.eval_print:
                logits = tf.Print(logits, [features["inputs"], logits],
                                  "EVAL PRINT",
                                  summarize=10000)
            # For evaluation, return the logits layer as our predictions.
            run_info["predictions"] = logits
            train_op = None
            return run_info, total_loss, None

        assert mode == tf.contrib.learn.ModeKeys.TRAIN

        # Some training statistics.
        with tf.name_scope("training_stats"):
            learning_rate = my_hp.learning_rate * learning_rate_decay()
            learning_rate /= math.sqrt(float(FLAGS.worker_replicas))
            tf.summary.scalar("learning_rate", learning_rate)
            global_step = tf.to_float(tf.contrib.framework.get_global_step())
            for n in xrange(len(my_hp.problems)):
                names_and_vars = []
                with tf.variable_scope("losses_avg", reuse=True):
                    total_loss_var = tf.get_variable("problem_%d/total_loss" %
                                                     n)
                    names_and_vars.append(("total_loss", total_loss_var))
                with tf.variable_scope("losses_avg", reuse=True):
                    for loss_name in loss_variable_names:
                        if loss_name.startswith("problem_%d/" % n):
                            loss_var = tf.get_variable(loss_name)
                            loss_suffix = loss_name[loss_name.index("/") + 1:]
                            names_and_vars.append((loss_suffix, loss_var))
                for (loss_name, loss_var) in names_and_vars:
                    tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name),
                                      loss_var)
                with tf.variable_scope("train_stats", reuse=True):
                    nth_steps = tf.get_variable("problem_%d_steps" % n,
                                                dtype=tf.int32)
                tf.summary.scalar("problem_%d_frequency" % n,
                                  tf.to_float(nth_steps) / (global_step + 1.0))

        # Log trainable weights and add decay.
        total_size, weight_decay_loss = 0, 0.0
        all_weights = {v.name: v for v in tf.trainable_variables()}
        for v_name in sorted(list(all_weights)):
            v = all_weights[v_name]
            v_size = int(np.prod(np.array(v.shape.as_list())))
            tf.logging.info("Weight    %s\tshape    %s\tsize    %d",
                            v.name[:-2].ljust(80),
                            str(v.shape).ljust(20), v_size)
            total_size += v_size
            if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1:
                # Add weight regularization if set and the weight is not a bias (dim>1).
                with tf.device(v._ref().device):  # pylint: disable=protected-access
                    v_loss = tf.nn.l2_loss(v) / v_size
                weight_decay_loss += v_loss
            is_body = len(v_name) > 5 and v_name[:5] == "body/"
            if my_hp.weight_noise > 0.0 and is_body:
                # Add weight noise if set in my_hp.
                with tf.device(v._ref().device):  # pylint: disable=protected-access
                    scale = learning_rate * 0.001
                    noise = tf.truncated_normal(
                        v.shape) * my_hp.weight_noise * scale
                    noise_op = v.assign_add(noise)
                with tf.control_dependencies([noise_op]):
                    total_loss = tf.identity(total_loss)
        tf.logging.info("Total trainable variables size: %d", total_size)
        if my_hp.weight_decay > 0.0:
            total_loss += weight_decay_loss * my_hp.weight_decay
        total_loss = tf.identity(total_loss, name="total_loss")

        # Define the train_op for the TRAIN mode.
        opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp)
        tf.logging.info("Computing gradients for global model_fn.")
        opt_summaries = ["learning_rate", "loss"]
        if hparams.summarize_grads:
            opt_summaries.extend(["gradients", "gradient_norm"])
        train_op = tf.contrib.layers.optimize_loss(
            name="training",
            loss=total_loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=learning_rate,
            clip_gradients=my_hp.clip_grad_norm or None,
            gradient_noise_scale=hparams.grad_noise_scale or None,
            optimizer=opt,
            summaries=opt_summaries,
            colocate_gradients_with_ops=True)

        # Remove summaries that will fail to run because they are in conditionals.
        # TODO(cwhipkey): Test with this code removed, later in 2017.
        summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES)
        for i in range(len(summaries) - 1, -1, -1):
            if summaries[i].name.startswith("cond_"):
                del summaries[i]

        tf.logging.info("Global model_fn finished.")
        return run_info, total_loss, train_op
示例#47
0
def encoding_graph_mt(encoder_output_src, features, mode, params):
    if mode != "train":
        params.residual_dropout = 0.0
        params.attention_dropout = 0.0
        params.relu_dropout = 0.0
        params.label_smoothing = 0.0

    n = params.sc_num
    batch_size = tf.shape(features["source"])[0]

    dtype = tf.get_variable_scope().dtype
    hidden_size = params.hidden_size

    src_mask = tf.sequence_mask(features["source_length"],
                                maxlen=tf.shape(features["source"])[1],
                                dtype=dtype or tf.float32)
    src_attn_bias = layers.attention.attention_bias(src_mask, "masking",
                                                    dtype=dtype)

    max_len = 0

    mt_seqs = []
    for i in range(n):
        mt_seqs.append(features["mt_%d" % i])
        max_len = tf.maximum(max_len, tf.shape(mt_seqs[i])[1])

    for i in range(n):
        mt_seqs[i] = tf.concat([mt_seqs[i], tf.zeros([batch_size, max_len-tf.shape(mt_seqs[i])[1]], dtype=tf.int32)], axis=1)

    mt_lens = []
    for i in range(n):
        mt_lens.append(features["mt_length_%d" % i])

    mt_seq = tf.concat(mt_seqs, axis=0)
    mt_len = tf.concat(mt_lens, axis=0)

    mt_mask = tf.sequence_mask(mt_len, maxlen=max_len, dtype=dtype or tf.float32)

    tvocab = params.vocabulary["target"]
    tgt_vocab_size = len(tvocab)

    initializer = tf.random_normal_initializer(0.0, params.hidden_size ** -0.5)

    if params.shared_source_target_embedding:
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tgt_embedding = tf.get_variable("weights",
                                            [tgt_vocab_size, hidden_size],
                                            initializer=initializer)
    else:
        tgt_embedding = tf.get_variable("target_embedding",
                                        [tgt_vocab_size, hidden_size],
                                        initializer=initializer)

    bias = tf.get_variable("mt_bias", [hidden_size])

    inputs = tf.gather(tgt_embedding, mt_seq)

    if params.multiply_embedding_mode == "sqrt_depth":
        inputs = inputs * (hidden_size ** 0.5)

    inputs = inputs * tf.expand_dims(mt_mask, -1)

    encoder_input = tf.nn.bias_add(inputs, bias)
    enc_attn_bias = layers.attention.attention_bias(mt_mask, "masking",
                                                    dtype=dtype)
    if params.position_info_type == 'absolute':
        encoder_input = layers.attention.add_timing_signal(encoder_input)

    if params.residual_dropout:
        keep_prob = 1.0 - params.residual_dropout
        encoder_input = tf.nn.dropout(encoder_input, keep_prob)

    all_layer_outputs = transformer_encoder(encoder_input, enc_attn_bias, params, scope="mt_encoder", get_all_layer=True,
                                             memory_src=encoder_output_src, mem_bias_src=src_attn_bias)
    all_layer_outputs = tf.stack(all_layer_outputs, axis=1) # (bs, nl, lk, hs)

    #
    mt_seq = tf.reshape(mt_seq, [n, batch_size, max_len])
    mt_seq = tf.transpose(mt_seq, [1, 0, 2])
    mt_seq = tf.reshape(mt_seq, [batch_size, n*max_len])

    all_layer_outputs = tf.reshape(all_layer_outputs, [n, batch_size, params.num_encoder_layers, max_len, hidden_size])
    all_layer_outputs = tf.transpose(all_layer_outputs, [1, 2, 0, 3, 4])
    all_layer_outputs = tf.reshape(all_layer_outputs, [batch_size, params.num_encoder_layers, n*max_len, hidden_size])
    encoder_output = all_layer_outputs[:,-1,:,:] # [bs, n*lk, hs]
    
    mt_mask = tf.reshape(mt_mask, [n, batch_size, max_len])
    mt_mask = tf.transpose(mt_mask, [1, 0, 2])
    mt_mask = tf.reshape(mt_mask, [batch_size, n*max_len])
    enc_attn_bias = layers.attention.attention_bias(mt_mask, "masking", dtype=dtype)  # (bs, 1, 1, lk)

    dot_product = tf.matmul(all_layer_outputs, all_layer_outputs, transpose_b=True) # (bs, nl, lk, lk)
    dot_sim = dot_product * (hidden_size ** -0.5) # (bs, nl, lk, lk)

    dot_sim = dot_sim + enc_attn_bias
    dot_sim = tf.reshape(dot_sim, [batch_size, params.num_encoder_layers, n*max_len, n, max_len])
    dot_sim = tf.nn.softmax(dot_sim, axis=-1)
    dot_sim = tf.reshape(dot_sim, [batch_size, params.num_encoder_layers, n*max_len, n*max_len]) # (bs, nl, lk, lk)
    similarity = dot_sim

    tag_vector = tf.concat([tf.ones([max_len, 1])*i for i in range(n)], axis=0) # (lk, 1)
    mt_mt_mask = tag_vector - tf.transpose(tag_vector) # (lk, lk)
    mt_mt_mask = tf.cast(tf.not_equal(mt_mt_mask, 0), tf.float32) # (lk, lk)
    mt_mt_mask = tf.expand_dims(mt_mt_mask, axis=0) # (1, lk, lk)
    mt_mt_mask = mt_mt_mask * tf.expand_dims(mt_mask, axis=1) # (bs, lk, lk)
    mt_mt_mask = tf.expand_dims(mt_mt_mask, axis=1) # (bs, 1, lk, lk)

    similarity = similarity * mt_mt_mask # (bs, nl, lk, lk)
    #

    return mt_seq, encoder_output, enc_attn_bias, similarity
示例#48
0
        tf.gather(logit,
                  tf.where(tf.equal(0, rem))[:, 0]) -
        tf.gather(logit,
                  tf.where(tf.equal(1, rem))[:, 0])) -
                          tf.gather(logit,
                                    tf.where(tf.equal(2, rem))[:, 0]))**2)

    #reg = tf.reduce_mean(tf.abs(tf.abs(tf.gather(logit, tf.where(tf.equal(0, rem))[:, 0]) -
    #tf.gather(logit, tf.where(tf.equal(1, rem))[:, 0])) -
    #tf.gather(logit, tf.where(tf.equal(2, rem))[:, 0])))

    loss = tf.losses.sigmoid_cross_entropy([[1]], logit)
    triplet_loss = tf.losses.sigmoid_cross_entropy(
        [[1]], tf.gather(
            logit,
            tf.where(tf.not_equal(
                2, rem))[:,
                         0]))  #* 1/i_f #+ tf.losses.get_regularization_loss()

learning_rate = tf.placeholder(tf.float32, [], name="learning_rate")
trainer = tf.train.AdamOptimizer(learning_rate)

grads_and_vars = trainer.compute_gradients(loss)
train_op = trainer.apply_gradients(grads_and_vars)

grads_and_vars_if = trainer.compute_gradients(loss - lambda_ * i_f)
train_op_if = trainer.apply_gradients(grads_and_vars_if)

grads_and_vars_if_sq = trainer.compute_gradients(loss + lambda_ * (1 - i_f)**2)
train_op_if_sq = trainer.apply_gradients(grads_and_vars_if)

grads_and_vars_triplets = trainer.compute_gradients(triplet_loss +
示例#49
0
def predict_all_labels(embedding, num_clusters, kmeans_iterations,
                       prototype_features, prototype_semantic_labels,
                       prototype_instance_labels, k_in_nearest_neighbors,
                       panoptic_label_divisor, class_has_instances_list):
    """Predicts panoptic, semantic, and instance labels using the vMF embedding.

  Args:
    embedding: A 4-D float tensor with shape
      `[batch, height, width, embedding_dim]`.
    num_clusters: A list of 2 integers for number of clusters in y and x axes.
    kmeans_iterations: Number of iterations for the k-means clustering.
    prototype_features: A 2-D float tensor for trained prototype features with
      shape `[num_prototypes, embedding_dim]`.
    prototype_semantic_labels: A 1-D integer tensor for trained prototype
      semantic labels with length `[num_prototypes]`.
    prototype_instance_labels: A 1-D integer tensor for trained prototype
      instance labels with length `[num_prototypes]`.
    k_in_nearest_neighbors: The number of nearest neighbors to search,
      or k in k-nearest neighbors.
    panoptic_label_divisor: An integer constant to separate semantic and
      instance labels from panoptic labels.
    class_has_instances_list: A list of thing classes, which have instances.

  Returns:
    panoptic_predictions: A 1-D integer tensor for pixel panoptic predictions.
    semantic_predictions: A 1-D integer tensor for pixel semantic predictions.
    instance_predictions: A 1-D integer tensor for pixel instance predictions.
  """
    # Generate location features and combine them with embedding features.
    shape = embedding.get_shape().as_list()
    location_features = common_utils.generate_location_features(
        [shape[1], shape[2]], 'float')
    location_features = tf.expand_dims(location_features, 0)
    embedding_with_location = tf.concat([embedding, location_features], 3)
    embedding_with_location = common_utils.normalize_embedding(
        embedding_with_location)

    # Kmeans clustering.
    cluster_labels = common_utils.kmeans(embedding_with_location, num_clusters,
                                         kmeans_iterations)
    test_prototypes = common_utils.calculate_prototypes_from_labels(
        embedding, cluster_labels)

    # Predict semantic and instance labels.
    semantic_predictions, instance_predictions = predict_semantic_instance_labels(
        cluster_labels, test_prototypes, prototype_features,
        prototype_semantic_labels, prototype_instance_labels,
        k_in_nearest_neighbors)

    # Refine instance labels.
    class_has_instances_list = tf.reshape(class_has_instances_list,
                                          [1, 1, 1, -1])
    instance_predictions = tf.where(
        tf.reduce_all(tf.not_equal(tf.expand_dims(semantic_predictions, 3),
                                   class_has_instances_list),
                      axis=3), tf.zeros_like(instance_predictions),
        instance_predictions)

    # Combine semantic and panoptic predictions as panoptic predictions.
    panoptic_predictions = (semantic_predictions * panoptic_label_divisor +
                            instance_predictions)

    return (panoptic_predictions, semantic_predictions, instance_predictions,
            cluster_labels)
示例#50
0
    def test_top_k_top_p_filtering(self):
        logits = tf.convert_to_tensor(
            [
                [
                    8.2220991,  # 3rd highest value; idx. 0
                    -0.5620044,
                    5.23229752,
                    4.0386393,
                    -6.8798378,
                    -0.54785802,
                    -3.2012153,
                    2.92777176,
                    1.88171953,
                    7.35341276,  # 5th highest value; idx. 9
                    8.43207833,  # 2nd highest value; idx. 10
                    -9.85711836,
                    -5.96209236,
                    -1.13039161,
                    -7.1115294,
                    -0.8369633,
                    -5.3186408,
                    7.06427407,
                    0.81369344,
                    -0.82023817,
                    -5.9179796,
                    0.58813443,
                    -6.99778438,
                    4.71551189,
                    -0.18771637,
                    7.44020759,  # 4th highest value; idx. 25
                    9.38450987,  # 1st highest value; idx. 26
                    2.12662941,
                    -9.32562038,
                    2.35652522,
                ],  # cummulative prob of 5 highest values <= 0.6
                [
                    0.58425518,
                    4.53139238,
                    -5.57510464,
                    -6.28030699,
                    -7.19529503,
                    -4.02122551,
                    1.39337037,
                    -6.06707057,
                    1.59480517,
                    -9.643119,
                    0.03907799,
                    0.67231762,
                    -8.88206726,
                    6.27115922,  # 4th highest value; idx. 13
                    2.28520723,
                    4.82767506,
                    4.30421368,
                    8.8275313,  # 2nd highest value; idx. 17
                    5.44029958,  # 5th highest value; idx. 18
                    -4.4735794,
                    7.38579536,  # 3rd highest value; idx. 20
                    -2.91051663,
                    2.61946077,
                    -2.5674762,
                    -9.48959302,
                    -4.02922645,
                    -1.35416918,
                    9.67702323,  # 1st highest value; idx. 27
                    -5.89478553,
                    1.85370467,
                ],  # cummulative prob of 5 highest values <= 0.6
            ],
            dtype=tf.float32,
        )

        non_inf_expected_idx = tf.convert_to_tensor(
            [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]], dtype=tf.int32,
        )  # expected non filtered idx as noted above

        non_inf_expected_output = tf.convert_to_tensor(
            [8.222099, 7.3534126, 8.432078, 7.4402075, 9.38451, 6.271159, 8.827531, 5.4402995, 7.3857956, 9.677023],
            dtype=tf.float32,
        )  # expected non filtered values as noted above

        output = tf_top_k_top_p_filtering(logits, top_k=10, top_p=0.6, min_tokens_to_keep=4)

        non_inf_output = output[output != -float("inf")]
        non_inf_idx = tf.cast(
            tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))), dtype=tf.int32,
        )

        tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12)
        tf.debugging.assert_equal(non_inf_idx, non_inf_expected_idx)
def build_simple_vte_model_relu_hi(premise_input,
                                   hypothesis_input,
                                   img_features_input,
                                   dropout_input,
                                   num_tokens,
                                   num_labels,
                                   embeddings,
                                   embeddings_size,
                                   train_embeddings,
                                   rnn_hidden_size,
                                   multimodal_fusion_hidden_size,
                                   classification_hidden_size):
    premise_length = tf.cast(
        tf.reduce_sum(
            tf.cast(tf.not_equal(premise_input, tf.zeros_like(premise_input, dtype=tf.int32)), tf.int64),
            1
        ),
        tf.int32
    )
    hypothesis_length = tf.cast(
        tf.reduce_sum(
            tf.cast(tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64),
            1
        ),
        tf.int32
    )
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings
        )
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings
        )
    hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input)
    lstm_cell = DropoutWrapper(
        tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
        input_keep_prob=dropout_input,
        output_keep_prob=dropout_input
    )
    hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=hypothesis_embeddings,
        sequence_length=hypothesis_length,
        dtype=tf.float32
    )
    normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1)
    img_hidden_layer = tf.nn.dropout(
        tf.contrib.layers.fully_connected(normalized_img_features, multimodal_fusion_hidden_size),
        keep_prob=dropout_input
    )
    hypothesis_hidden_layer = tf.nn.dropout(
        tf.contrib.layers.fully_connected(hypothesis_final_states.h, multimodal_fusion_hidden_size),
        keep_prob=dropout_input
    )
    hypothesis_img_multimodal_fusion = tf.multiply(hypothesis_hidden_layer, img_hidden_layer)
    first_layer = tf.nn.dropout(
        tf.contrib.layers.fully_connected(hypothesis_img_multimodal_fusion, classification_hidden_size),
        keep_prob=dropout_input
    )
    second_layer = tf.nn.dropout(
        tf.contrib.layers.fully_connected(first_layer, classification_hidden_size),
        keep_prob=dropout_input
    )
    third_layer = tf.nn.dropout(
        tf.contrib.layers.fully_connected(second_layer, classification_hidden_size),
        keep_prob=dropout_input
    )

    return tf.contrib.layers.fully_connected(
        third_layer,
        num_labels,
        activation_fn=None
    )
示例#52
0
    def train_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs):
        # TODO: Modify target_charseqs by appending EOW; only the version with appended EOW is used from now on.
        print("Train batch called")
        target_charseqs = self._append_eow(target_charseqs)
        with tf.GradientTape() as tape:
            # TODO: Embed source charseqs
            embedded = self._model.source_embeddings(source_charseqs)
            # TODO: Run self._model.source_rnn on the embedded sequences, returning outputs in `source_states`.
            source_states = self._model.source_rnn(embedded)
            # Copy the source_states to corresponding batch places, and then flatten it
            source_mask = tf.not_equal(source_charseq_ids, 0)
            source_states = tf.boolean_mask(tf.gather(source_states, source_charseq_ids), source_mask)
            targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), source_mask)

            # tape.watch(self._model.variables)
            class DecoderTraining(decoder.BaseDecoder):
                @property
                def batch_size(self):
                    # TODO: Return batch size of self._source_states, using tf.shape
                    return tf.shape(self._source_states)[0]
                @property
                def output_size(self):
                    # TODO: Return number of the generated logits
                    return tf.shape(targets)[1]
                @property
                def output_dtype(self):
                    # TODO: Return the type of the generated logits
                    return tf.float32

                def initialize(self, layer_inputs, initial_state=None, **kwargs):
                    self._model, self._source_states, self._targets = layer_inputs
                    # TODO: Define `finished` as a vector of self.batch_size of `False` [see tf.fill].
                    # TODO: Define `inputs` as a vector of self.batch_size of MorphoDataset.Factor.BOW [see tf.fill],
                    # embedded using self._model.target_embedding
                    # TODO: Define `states` as self._source_states
                    finished = tf.fill([self.batch_size],False)
                    inputs = self._model.target_embedding(tf.fill([self.batch_size],MorphoDataset.Factor.BOW))
                    states = self._source_states
                    return finished, inputs, states

                def step(self, time, inputs, states):
                    # TODO: Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating
                    # `outputs, [states]`.
                    # TODO: Overwrite `outputs` by passing them through self._model.target_output_layer,
                    # TODO: Define `next_inputs` by embedding `time`-th words from `self._targets`.
                    # TODO: Define `finished` as True if `time`-th word from `self._targets` is EOW, False otherwise.
                    # Again, no == or !=.
                    outputs, [states] = self._model.target_rnn_cell(inputs=inputs,states=[states])
                    outputs = self._model.target_output_layer(outputs)
                    next_inputs = self._model.target_embedding(self._targets[:, time])
                    finished = tf.equal(self._targets[:, time], MorphoDataset.Factor.EOW)
                    return outputs, states, next_inputs, finished

            output_layer, _, _ = DecoderTraining()([self._model, source_states, targets])
            # print(self._model.variables)
            # TODO: Compute loss. Use only nonzero `targets` as a mask.
            mask = tf.not_equal(targets,0)
            loss = self._loss(targets,output_layer,mask)
        gradients = tape.gradient(loss, self._model.variables)
        self._optimizer.apply_gradients(zip(gradients, self._model.variables))

        tf.summary.experimental.set_step(self._optimizer.iterations)
        with self._writer.as_default():
            for name, metric in self._metrics_training.items():
                metric.reset_states()
                if name == "loss": metric(loss)
                else: metric(targets, output_layer, tf.not_equal(targets, 0))
                tf.summary.scalar("train/{}".format(name), metric.result())
        predictions = tf.math.argmax(output_layer, axis=2)
        return predictions
示例#53
0
    def symbols_to_logits_fn(ids, i, cache):
      """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences. int tensor with shape [batch_size *
          beam_size, i + 1].
        i: Loop index.
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
      # Set decoder input to the last generated IDs
      decoder_input = ids[:, -1:]

      # Preprocess decoder input by getting embeddings and adding timing signal.
      # decoder_input = self.embedding_softmax_layer(decoder_input)
      source_decoder_input = decoder_input
      decoder_input = self.embedding_lookup(decoder_input)
      embedding_mask = tf.cast(
          tf.not_equal(source_decoder_input, 0),
          self.embedding_lookup.embeddings.dtype)
      decoder_input *= tf.expand_dims(embedding_mask, -1)

      if self._padded_decode:
        timing_signal_shape = timing_signal.shape.as_list()
        decoder_input += tf.slice(timing_signal, [i, 0],
                                  [1, timing_signal_shape[1]])

        bias_shape = decoder_self_attention_bias.shape.as_list()
        self_attention_bias = tf.slice(
            decoder_self_attention_bias, [0, 0, i, 0],
            [bias_shape[0], bias_shape[1], 1, bias_shape[3]])
      else:
        decoder_input += timing_signal[i:i + 1]

        self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]

      decoder_shape = tf_utils.get_shape_list(decoder_input, expected_rank=3)
      batch_size = decoder_shape[0]
      decoder_length = decoder_shape[1]

      attention_bias = cache.get("encoder_decoder_attention_bias")
      attention_bias = tf.where(attention_bias < 0,
                                tf.zeros_like(attention_bias),
                                tf.ones_like(attention_bias))
      attention_bias = tf.squeeze(attention_bias, axis=[1])
      attention_mask = tf.tile(attention_bias, [1, decoder_length, 1])

      self_attention_bias = tf.where(self_attention_bias < 0,
                                     tf.zeros_like(self_attention_bias),
                                     tf.ones_like(self_attention_bias))
      self_attention_bias = tf.squeeze(self_attention_bias, axis=[1])
      self_attention_mask = tf.tile(self_attention_bias, [batch_size, 1, 1])

      decoder_outputs = self.decoder_layer(
          decoder_input,
          cache.get("encoder_outputs"),
          memory_mask=self_attention_mask,
          target_mask=attention_mask,
          cache=cache,
          decode_loop_step=i if self._padded_decode else None)

      logits = embedding_linear(self.embedding_lookup.embeddings,
                                decoder_outputs)
      logits = tf.squeeze(logits, axis=[1])
      return logits, cache
示例#54
0
    def model_fn(self, features: Dict[str, tf.Tensor],
                 labels: Dict[str, tf.Tensor], mode: tf.estimator.ModeKeys,
                 params: Dict[str, Any]) -> tf.estimator.EstimatorSpec:
        """Model creation function for a GeneNet segmentation network.

        Args:
            features (Dict[str, tf.Tensor]): Dictionary of input Tensors.
            labels (Dict[str, tf.Tensor]): Dictionary of label Tensors.
            mode (tf.estimator.ModeKeys): Estimator mode.
            params (Dict[str, Any]): Additional model hyperparameters.

        Returns:
            (tf.estimator.EstimatorSpec): GeneNet network EstimatorSpec.
        """
        logger.debug(f'Creating a model_fn on device {self.device}')
        with tf.device(self.device):
            # Get batch size from input shape
            batch_size = tf.shape(features['input'])[0]

            # Build from the GeneGraph. `tensor_map` maps `self.gene_graph`
            # Genes, as well as the special strings 'input', 'classes',
            # and 'probabilities' to TensorFlow Tensors.
            with tf.variable_scope(self.name):
                tensor_map = self.gene_graph.build(features, mode)

            # Update the trainable parameter count
            self.n_trainable_params = int(
                np.sum([
                    np.prod(v.get_shape().as_list())
                    for v in tf.trainable_variables(self.name)
                ]))

            if params['make_summaries']:
                # If True, attach variable summaries to each ConvolutionGene
                for key in tensor_map:
                    if isinstance(key, ConvolutionGene):
                        variable_summary(tensor_map[key])

            # Get the output from the last gene in the gene graph's `genes`
            # OrderedDict
            output_gene = list(self.gene_graph.genes.values())[-1]
            n_classes = output_gene.n_classes
            # Logits are the output of the final PredictorGene in the GeneGraph
            logits = tensor_map[output_gene]

            logger.debug(f'Received logits with shape {logits.get_shape()}')
            if mode != tf.estimator.ModeKeys.PREDICT:
                logger.debug(f'Received labels with shape '
                             f'{labels["label"].get_shape()}')

            with tf.name_scope('classes'):
                classes = tf.argmax(input=logits, axis=1, name='classes')
                tensor_map['classes'] = classes

            with tf.name_scope('probabilities'):
                probabilities = tf.nn.softmax(logits,
                                              axis=1,
                                              name='probabilities')
                tensor_map['probabilities'] = probabilities

            # Both predictions (for PREDICT and EVAL modes)
            predictions = {'classes': classes, 'probabilities': probabilities}

            # Create summary ops for the tensor associated with each Gene or
            # str in params['image_settings'].
            image_settings: Sequence[ImageSettings] = []
            if 'image_settings' in params:
                image_settings = params['image_settings']
            for key, summary_params in image_settings:
                # Get the tensor associated with each key
                target_tensor = tensor_map[key]
                # Create an image summary
                image_summary(target_tensor, summary_params)

            # For a forward pass, no need to build optimization ops
            if mode == tf.estimator.ModeKeys.PREDICT:

                # Add the corner feature to predictions, for easy reconstruction
                # of large images from patches in PREDICT mode
                predictions['corner'] = features['corners']

                # Create an EstimatorSpec
                spec = tf.estimator.EstimatorSpec(mode=mode,
                                                  predictions=predictions)

                logger.debug('Created PREDICT EstimatorSpec')

                return spec

            # Calculate loss: per-voxel weighted cross-entropy
            with tf.name_scope('loss'):
                # Cross-entropy from logits. Note the transpose to convert
                # channels-first data into channels-last data

                if mode == tf.estimator.ModeKeys.TRAIN:
                    # During training, use per-voxel cross-entropy weighting
                    # plus regularization terms

                    c_lr_2d = params['c_lr_2d']

                    # Get the output from 'predictor_2d' Gene
                    predictor_2d_gene = list(self.gene_graph.genes.values())[1]
                    logits_2d = tensor_map[predictor_2d_gene]

                    losses = []
                    loss_weights = [1, c_lr_2d]

                    for l, logit in enumerate([logits, logits_2d]):

                        # Experiment-specific tweak: add a loss term from the 2d
                        # predictor as well

                        if logit.get_shape().ndims == 4:
                            xentropy = \
                                tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    labels=labels['label'],
                                    logits=tf.transpose(logit,
                                                        [0, 2, 3, 1],
                                                        name=f'transpose_{l}'),
                                    name=f'softmax_xentropy_{l}')
                        else:
                            xentropy = \
                                tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    labels=labels['label'],
                                    logits=tf.transpose(logit, [0, 2, 3, 4, 1],
                                                        name=f'transpose_{l}'),
                                    name=f'softmax_xentropy_{l}')

                        # Impose a weight floor
                        # Get weight floor
                        weight_floor = features['weight_floor'][0]
                        # Treat zeroed areas differently - they shouldn't be
                        # included in loss calculations
                        weight = labels['weight']
                        nonzero_weights = tf.cast(tf.not_equal(weight, 0),
                                                  dtype=weight.dtype)
                        weight += tf.multiply(nonzero_weights, weight_floor)
                        weights = tf.add(labels['weight'], weight_floor)
                        weighted_xentropy = tf.multiply(
                            weights, xentropy, name=f'weighted_xentropy_{l}')
                        # Sum voxel loss values
                        losses.append(loss_weights[l] * tf.reduce_sum(
                            weighted_xentropy, name=f'sum_xentropy_{l}'))

                    loss = tf.math.add_n(losses, name='total_xentropy')
                    # Add regularization terms, weighted to ignore zeroed-out
                    # areas
                    frac_nonzero = tf.reduce_mean(nonzero_weights)
                    loss += frac_nonzero * tf.losses.get_regularization_loss()

                else:
                    # Experiment-specific tweak: add a loss term from the 2d
                    # predictor as well

                    if logits.get_shape().ndims == 4:
                        xentropy = \
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                labels=labels['label'],
                                logits=tf.transpose(logits,
                                                    [0, 2, 3, 1],
                                                    name='transpose'),
                                name='softmax_xentropy')
                    else:
                        xentropy = \
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                labels=labels['label'],
                                logits=tf.transpose(logits, [0, 2, 3, 4, 1],
                                                    name='transpose'),
                                name='softmax_xentropy')

                    # For eval, use per-voxel cross entropy summed across all
                    # voxels
                    loss = tf.reduce_sum(xentropy, name='sum_xentropy')

            # Build training op
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Get training hyperparameters
                learning_rate = 10**params['log_learning_rate']
                decay_steps = 10**params['log_decay_steps']
                exponential_decay_rate = params['exponential_decay_rate']
                beta1 = 1 - 10**params['log_alpha1']
                beta2 = 1 - 10**params['log_alpha2']
                epsilon = 10**params['log_epsilon']

                with tf.name_scope('train'):
                    lr = tf.train.exponential_decay(
                        learning_rate=learning_rate,
                        global_step=tf.train.get_global_step(),
                        decay_steps=decay_steps,
                        decay_rate=exponential_decay_rate,
                        staircase=True)
                    optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                                       beta1=beta1,
                                                       beta2=beta2,
                                                       epsilon=epsilon,
                                                       name='adam')
                    train_op = optimizer.minimize(
                        loss=loss, global_step=tf.train.get_global_step())

                    # Create an EstimatorSpec
                    spec = tf.estimator.EstimatorSpec(mode=mode,
                                                      loss=loss,
                                                      train_op=train_op)

                    logger.debug('Created TRAIN EstimatorSpec')

                    # Minimize the loss in TRAIN mode
                    return spec

            # Build evaluation op
            with tf.name_scope('eval'):

                # Add evaluation metrics
                # noinspection PyUnresolvedReferences
                flat_labels = tf.layers.flatten(labels['label'])
                flat_labels = \
                    tf.reshape(
                        tensor=flat_labels,
                        shape=[batch_size*flat_labels.get_shape()[1]])
                flat_predictions = tf.layers.flatten(predictions['classes'])
                flat_predictions = tf.reshape(
                    tensor=flat_predictions,
                    shape=[batch_size * flat_predictions.get_shape()[1]])

                eval_ops = {
                    'accuracy':
                    tf.metrics.accuracy(labels=labels['label'],
                                        predictions=predictions['classes'],
                                        name='accuracy'),
                    'mean_iou':
                    tf.metrics.mean_iou(labels=labels['label'],
                                        predictions=predictions['classes'],
                                        num_classes=n_classes),
                    'adj_rand_idx':
                    _adj_rand_idx_metric_op(flat_labels, flat_predictions)
                }

                # Create an EstimatorSpec
                spec = tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  eval_metric_ops=eval_ops)

                logger.debug('Created EVAL EstimatorSpec')

                return spec
    def call(
        self,
        inputs,
        attention_mask=None,
        langs=None,
        token_type_ids=None,
        position_ids=None,
        lengths=None,
        cache=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        training=False,
    ):
        # removed: src_enc=None, src_len=None
        if isinstance(inputs, (tuple, list)):
            input_ids = inputs[0]
            attention_mask = inputs[1] if len(inputs) > 1 else attention_mask
            langs = inputs[2] if len(inputs) > 2 else langs
            token_type_ids = inputs[3] if len(inputs) > 3 else token_type_ids
            position_ids = inputs[4] if len(inputs) > 4 else position_ids
            lengths = inputs[5] if len(inputs) > 5 else lengths
            cache = inputs[6] if len(inputs) > 6 else cache
            head_mask = inputs[7] if len(inputs) > 7 else head_mask
            inputs_embeds = inputs[8] if len(inputs) > 8 else inputs_embeds
            output_attentions = inputs[9] if len(
                inputs) > 9 else output_attentions
            output_hidden_states = inputs[10] if len(
                inputs) > 10 else output_hidden_states
            return_dict = inputs[11] if len(inputs) > 11 else return_dict
            assert len(inputs) <= 12, "Too many inputs."
        elif isinstance(inputs, (dict, BatchEncoding)):
            input_ids = inputs.get("input_ids")
            attention_mask = inputs.get("attention_mask", attention_mask)
            langs = inputs.get("langs", langs)
            token_type_ids = inputs.get("token_type_ids", token_type_ids)
            position_ids = inputs.get("position_ids", position_ids)
            lengths = inputs.get("lengths", lengths)
            cache = inputs.get("cache", cache)
            head_mask = inputs.get("head_mask", head_mask)
            inputs_embeds = inputs.get("inputs_embeds", inputs_embeds)
            output_attentions = inputs.get("output_attentions",
                                           output_attentions)
            output_hidden_states = inputs.get("output_hidden_states",
                                              output_hidden_states)
            return_dict = inputs.get("return_dict", return_dict)
            assert len(inputs) <= 12, "Too many inputs."
        else:
            input_ids = inputs

        output_attentions = output_attentions if output_attentions is not None else self.output_attentions
        output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states
        return_dict = return_dict if return_dict is not None else self.return_dict

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            bs, slen = shape_list(input_ids)
        elif inputs_embeds is not None:
            bs, slen = shape_list(inputs_embeds)[:2]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        if lengths is None:
            if input_ids is not None:
                lengths = tf.reduce_sum(tf.cast(tf.not_equal(
                    input_ids, self.pad_index),
                                                dtype=tf.int32),
                                        axis=1)
            else:
                lengths = tf.convert_to_tensor([slen] * bs, tf.int32)
        # mask = input_ids != self.pad_index

        # check inputs
        # assert shape_list(lengths)[0] == bs
        tf.debugging.assert_equal(
            shape_list(lengths)[0], bs
        ), f"Expected batch size {shape_list(lengths)[0]} and received batch size {bs} mismatched"
        # assert lengths.max().item() <= slen
        # input_ids = input_ids.transpose(0, 1)  # batch size as dimension 0
        # assert (src_enc is None) == (src_len is None)
        # if src_enc is not None:
        #     assert self.is_decoder
        #     assert src_enc.size(0) == bs

        # generate masks
        mask, attn_mask = get_masks(slen,
                                    lengths,
                                    self.causal,
                                    padding_mask=attention_mask)
        # if self.is_decoder and src_enc is not None:
        #     src_mask = torch.arange(src_len.max(), dtype=torch.long, device=lengths.device) < src_len[:, None]

        # position_ids
        if position_ids is None:
            position_ids = tf.expand_dims(tf.range(slen), axis=0)
        else:
            # assert shape_list(position_ids) == [bs, slen]  # (slen, bs)
            tf.debugging.assert_equal(
                shape_list(position_ids), [bs, slen]
            ), f"Position id shape {shape_list(position_ids)} and input shape {[bs, slen]} mismatched"
            # position_ids = position_ids.transpose(0, 1)

        # langs
        if langs is not None:
            # assert shape_list(langs) == [bs, slen]  # (slen, bs)
            tf.debugging.assert_equal(
                shape_list(langs), [bs, slen]
            ), f"Lang shape {shape_list(langs)} and input shape {[bs, slen]} mismatched"
            # langs = langs.transpose(0, 1)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # attention_probs has shape bsz x n_heads x N x N
        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x qlen x klen]
        if head_mask is not None:
            raise NotImplementedError
        else:
            head_mask = [None] * self.n_layers

        # do not recompute cached elements
        if cache is not None and input_ids is not None:
            _slen = slen - cache["slen"]
            input_ids = input_ids[:, -_slen:]
            position_ids = position_ids[:, -_slen:]
            if langs is not None:
                langs = langs[:, -_slen:]
            mask = mask[:, -_slen:]
            attn_mask = attn_mask[:, -_slen:]

        # embeddings
        if inputs_embeds is None:
            inputs_embeds = self.embeddings(input_ids)

        tensor = inputs_embeds + self.position_embeddings(position_ids)

        if langs is not None and self.use_lang_emb:
            tensor = tensor + self.lang_embeddings(langs)
        if token_type_ids is not None:
            tensor = tensor + self.embeddings(token_type_ids)

        tensor = self.layer_norm_emb(tensor)
        tensor = self.dropout(tensor, training=training)
        tensor = tensor * mask[..., tf.newaxis]

        # hidden_states and attentions cannot be None in graph mode.
        hidden_states = ()
        attentions = ()

        # transformer layers
        for i in range(self.n_layers):
            # LayerDrop
            dropout_probability = tf.random.uniform([1], 0, 1)

            if training and tf.less(dropout_probability, self.layerdrop):
                continue

            if output_hidden_states:
                hidden_states = hidden_states + (tensor, )

            # self attention
            if not self.pre_norm:
                attn_outputs = self.attentions[i](tensor,
                                                  attn_mask,
                                                  None,
                                                  cache,
                                                  head_mask[i],
                                                  output_attentions,
                                                  training=training)
                attn = attn_outputs[0]

                if output_attentions:
                    attentions = attentions + (attn_outputs[1], )

                attn = self.dropout(attn, training=training)
                tensor = tensor + attn
                tensor = self.layer_norm1[i](tensor)
            else:
                tensor_normalized = self.layer_norm1[i](tensor)
                attn_outputs = self.attentions[i](tensor_normalized,
                                                  attn_mask,
                                                  None,
                                                  cache,
                                                  head_mask[i],
                                                  output_attentions,
                                                  training=training)
                attn = attn_outputs[0]

                if output_attentions:
                    attentions = attentions + (attn_outputs[1], )

                attn = self.dropout(attn, training=training)
                tensor = tensor + attn

            # encoder attention (for decoder only)
            # if self.is_decoder and src_enc is not None:
            #     attn = self.encoder_attn[i](tensor, src_mask, kv=src_enc, cache=cache)
            #     attn = F.dropout(attn, p=self.dropout, training=self.training)
            #     tensor = tensor + attn
            #     tensor = self.layer_norm15[i](tensor)

            # FFN
            if not self.pre_norm:
                tensor = tensor + self.ffns[i](tensor)
                tensor = self.layer_norm2[i](tensor)
            else:
                tensor_normalized = self.layer_norm2[i](tensor)
                tensor = tensor + self.ffns[i](tensor_normalized)

            tensor = tensor * mask[..., tf.newaxis]

        # Add last hidden state
        if output_hidden_states:
            hidden_states = hidden_states + (tensor, )

        # update cache length
        if cache is not None:
            cache["slen"] += tensor.size(1)

        # move back sequence length to dimension 0
        # tensor = tensor.transpose(0, 1)

        # Set to None here if the output booleans are at False
        hidden_states = hidden_states if output_hidden_states else None
        attentions = attentions if output_attentions else None

        if not return_dict:
            return tuple(v for v in [tensor, hidden_states, attentions]
                         if v is not None)

        return TFBaseModelOutput(last_hidden_state=tensor,
                                 hidden_states=hidden_states,
                                 attentions=attentions)
示例#56
0
    def build_model(self):

        features = self.features
        #print(features)
        captions = self.captions
        #print(captions)
        batch_size = tf.shape(features)[0]

        captions_in = captions[:, :self.T]
        #print(captions_in)
        captions_out = captions[:, 1:]
        #print(captions_out)
        mask = tf.to_float(tf.not_equal(captions_out, self._null))
        #print(mask)

        # batch normalize feature vectors
        features = self._batch_norm(features,
                                    mode='train',
                                    name='conv_features')
        #print(features)
        h = self._get_initial_lstm(features=features)
        #print(c,h)
        x = self._word_embedding(inputs=captions_in)
        #print(x)
        features_proj = self._project_features(features=features)
        vg = self._get_vg(features=features)
        #print(features_proj)
        loss = 0.0
        alpha_list = []
        gru_cell = tf.nn.rnn_cell.GRUCell(num_units=self.H)
        #print(lstm_cell)

        for t in range(self.T):
            context, alpha = self._attention_layer(features,
                                                   features_proj,
                                                   h,
                                                   reuse=(t != 0))
            #print(context,alpha)
            alpha_list.append(alpha[:, :])

            #print(len(alpha_list))
            if self.selector:
                context, beta = self._selector(context, h, reuse=(t != 0))

            with tf.variable_scope('lstm', reuse=(t != 0)):
                #print(x[:,t,:])
                _, h = gru_cell(inputs=tf.concat([x[:, t, :], vg], 1), state=h)

            # context, alpha = self._attention_layer(features, features_proj, h ,reuse=(t!=0))
            # alpha_list.append(alpha[:,:])
            logits = self._decode_lstm(x[:, t, :],
                                       h,
                                       context,
                                       dropout=self.dropout,
                                       reuse=(t != 0))

            loss += tf.reduce_sum(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=captions_out[:, t], logits=logits) * mask[:, t])

        if self.alpha_c > 0:
            print("In Alpha")
            alphas = tf.transpose(tf.stack(alpha_list), (1, 0, 2))  # (N, T, L)
            alphas_all = tf.reduce_sum(alphas, 1)  # (N, L)
            #print(alphas_all)
            alpha_reg = self.alpha_c * tf.reduce_sum(
                (self.T / self.L - alphas_all)**2)
            loss += alpha_reg

        return loss / tf.to_float(batch_size)
示例#57
0
def extract_trained_prototypes(
        embedding,  #seems like only extracting prototypes for a single image ho
        location_features,
        cluster_labels,
        num_clusters,
        kmeans_iterations,
        panoptic_labels,
        panoptic_label_divisor,
        ignore_label,
        evaluate_semantic_or_panoptic):
    """Extracts the trained prototypes in an image.

  Args:
    embedding: A 2-D float tensor with shape `[pixels, embedding_dim]`.
    location_features: A 2-D float tensor for location features with shape
      `[pixels, 2]`.
    cluster_labels: A 1-D integer tensor for cluster labels for all pixels.
    num_clusters: An integer scalar for total number of clusters.
    kmeans_iterations: Number of iterations for the k-means clustering.
    panoptic_labels: A 1-D integer tensor for panoptic labels for all pixels.
    panoptic_label_divisor: An integer constant to separate semantic and
      instance labels from panoptic labels.
    ignore_label: The semantic label to ignore.
    evaluate_semantic_or_panoptic: A boolean that specifies whether to evaluate
      semantic or panoptic segmentation.

  Returns:
    prototype_features: A 2-D float tensor for prototype features with shape
      `[num_prototypes, embedding_dim]`.
    prototype_labels: A 1-D integer tensor for prototype labels.
  """
    # Collect pixels of valid semantic classes.
    valid_pixels = tf.where(
        tf.not_equal(panoptic_labels // panoptic_label_divisor, ignore_label))
    panoptic_labels = tf.squeeze(tf.gather(panoptic_labels, valid_pixels),
                                 axis=1)
    cluster_labels = tf.squeeze(tf.gather(cluster_labels, valid_pixels),
                                axis=1)
    embedding = tf.squeeze(tf.gather(embedding, valid_pixels), axis=1)
    location_features = tf.squeeze(tf.gather(location_features, valid_pixels),
                                   axis=1)

    # Generate cluster labels via kmeans clustering.
    embedding_with_location = tf.concat([embedding, location_features], 1)
    embedding_with_location = common_utils.normalize_embedding(
        embedding_with_location)
    cluster_labels = common_utils.kmeans_with_initial_labels(
        embedding_with_location, cluster_labels, num_clusters,
        kmeans_iterations)
    _, cluster_labels = tf.unique(cluster_labels)

    if evaluate_semantic_or_panoptic == 'panoptic':
        # Calculate semantic and unique instance labels for all pixels.
        label_mapping, unique_panoptic_labels = tf.unique(panoptic_labels)

        # Find pixels of majority classes.
        select_pixels, majority_labels = find_majority_label_index(
            unique_panoptic_labels, cluster_labels)
    else:
        # Find pixels of majority semantic classes.
        semantic_labels = panoptic_labels // panoptic_label_divisor
        select_pixels, majority_labels = find_majority_label_index(
            semantic_labels, cluster_labels)

    cluster_labels = tf.squeeze(tf.gather(cluster_labels, select_pixels),
                                axis=1)
    embedding = tf.squeeze(tf.gather(embedding, select_pixels), axis=1)

    # Calculate the majority semantic and instance label for each prototype.
    if evaluate_semantic_or_panoptic == 'panoptic':
        prototype_panoptic_labels = tf.gather(label_mapping, majority_labels)
        prototype_semantic_labels = (prototype_panoptic_labels //
                                     panoptic_label_divisor)
        prototype_instance_labels = majority_labels
    else:
        prototype_semantic_labels = majority_labels
        prototype_instance_labels = tf.zeros_like(majority_labels)

    # Calculate the prototype features.
    prototype_features = common_utils.calculate_prototypes_from_labels(
        embedding, cluster_labels)

    return (prototype_features, prototype_semantic_labels,
            prototype_instance_labels)
示例#58
0
    def build(self, features):
        src_ids = features['src_ids']
        trg_ids = None
        self.batch_size = tf.shape(src_ids)[0]
        if self.is_training:
            trg_ids = features['trg_ids']

        with tf.variable_scope('src_encoder'):
            self.shared_tensors['src_ids'] = src_ids
            src_mask = tf.cast(tf.equal(src_ids, self.data.vocab.pad_id),
                               tf.float32)
            src_bias = common_attention.attention_bias_ignore_padding(src_mask)
            self.shared_tensors['src_bias'] = src_bias
            self.shared_tensors['src_mask'] = src_mask

            src_embs = self._embedding_fn(src_ids)
            src_embs = common_attention.add_timing_signal_1d(src_embs)

            if 'syntax_gen' in self.flags.control_mode:
                template_comp_ids = features['template_comp_ids']

                # print_op = tf.print("template_comp_ids output:", template_comp_ids)
                # with tf.control_dependencies([print_op]):
                #     template_comp_ids = tf.identity(template_comp_ids)

                template_embs = self._embedding_fn(
                    template_comp_ids,
                    self.shared_tensors['syntax_embedding_table'])
                template_scale = tf.get_variable(
                    'template_scale',
                    shape=[1, self.flags.syntax_level, 1, 1],
                    trainable=True,
                    dtype=tf.float32)
                template_embs *= template_scale
                template_embs = tf.reduce_mean(template_embs, axis=1)
                src_embs += template_embs

            if 'gpt2' in self.flags.model_mode:
                src_outputs = model.gpt2_encoder(self.hparams,
                                                 src_embs,
                                                 encoder_bias=src_bias)
            elif 't2t' in self.flags.model_mode:
                src_outputs = transformer.transformer_encoder(
                    src_embs, src_bias, self.hparams)
            elif 'bert' in self.flags.model_mode:
                bert_model = BertModel(
                    config=BertConfig.from_json_file(
                        self.flags.bert_config_file),
                    is_training=self.is_training,
                    input_ids=src_ids,
                    input_mask=1.0 - src_mask,
                    embeddings=self.shared_tensors['word_embedding_table'])
                src_outputs = bert_model.get_sequence_output()
            else:
                raise ValueError('model_mode not known.')

            self.shared_tensors['src_outputs'] = src_outputs

            if self.flags.control_mode:
                control_ids = features['control_ids']
                control_mask = tf.cast(
                    tf.equal(control_ids, self.data.vocab.pad_id), tf.float32)
                control_bias = common_attention.attention_bias_ignore_padding(
                    control_mask)
                control_embs = self._embedding_fn(control_ids)

                if 'gpt2' in self.flags.model_mode:
                    control_outputs = model.gpt2_encoder(
                        self.hparams, control_embs, encoder_bias=control_bias)
                elif 't2t' in self.flags.model_mode or 'bert' in self.flags.model_mode:
                    control_outputs = transformer.transformer_encoder(
                        control_embs,
                        control_bias,
                        self.hparams,
                        name='control_encoder')
                else:
                    raise ValueError('model_mode not known.')
                self.shared_tensors['control_vec'] = features['control_vec']
                self.shared_tensors['control_outputs'] = control_outputs
                self.shared_tensors['control_bias'] = control_bias
                self.shared_tensors['extra_vec'] = features['extra_vec']

            # if 'syntax_gen' in self.flags.control_mode:
            #     template_comp_ids = features['template_comp_ids']
            #     template_comp_outputs, template_comp_bias = self.encode_syntax_template(template_comp_ids)
            #     self.shared_tensors['template_comp_outputs'] = template_comp_outputs
            #     self.shared_tensors['template_comp_bias'] = template_comp_bias

        batch_go = tf.tile(
            tf.expand_dims(self._embedding_fn(self.data.vocab.go_id), axis=0),
            [self.batch_size, 1])
        batch_go_id = tf.tile(
            tf.constant(self.data.vocab.go_id, tf.int32, shape=[
                1,
            ]), [self.batch_size])
        self.shared_tensors['batch_go'] = batch_go
        self.shared_tensors['batch_go_id'] = batch_go_id

        batch_syntax_go = tf.tile(
            tf.expand_dims(self._embedding_fn(self.data.syntax_vocab.go_id),
                           axis=0), [self.batch_size, 1])
        batch_syntax_go_id = tf.tile(
            tf.constant(self.data.syntax_vocab.go_id, tf.int32, shape=[
                1,
            ]), [self.batch_size])
        self.shared_tensors['batch_syntax_go'] = batch_syntax_go
        self.shared_tensors['batch_syntax_go_id'] = batch_syntax_go_id

        outputs = {}
        outputs['src_ids'] = src_ids

        if self.flags.control_mode:
            outputs["control_vec"] = self.shared_tensors['control_vec']
        # if 'predict' in self.flags.control_mode:
        #     control_vec, outputs = self.classify(
        #         outputs,
        #         self.shared_tensors['control_vec'],
        #         "fix_predict" in self.flags.control_mode)
        #     self.shared_tensors['control_vec'] = control_vec
        if self.flags.control_mode:
            if "flatten" not in self.flags.control_mode:
                # print_op = tf.print("Debug output:", self.shared_tensors['control_vec'])
                # with tf.control_dependencies([print_op]):
                #     self.shared_tensors['control_vec'] = tf.identity(self.shared_tensors['control_vec'])

                dupicate_copies = self.flags.dimension // self.data.control_vec_len
                batch_size = self.flags.train_batch_size if self.is_training else self.flags.eval_batch_size
                control_vec = tf.concat([
                    tf.reshape(
                        tf.transpose(
                            tf.tile(
                                tf.expand_dims(
                                    self.shared_tensors['control_vec'][o, :],
                                    axis=0), [dupicate_copies, 1])),
                        [1, self.flags.dimension]) for o in range(batch_size)
                ],
                                        axis=0)
                more_control_vec = tf.zeros([
                    batch_size,
                    self.flags.dimension % self.data.control_vec_len
                ])
                if not self.is_training and self.flags.beam_search_size > 1:
                    more_control_vec = tf.zeros([
                        batch_size * self.flags.beam_search_size,
                        self.flags.dimension % self.data.control_vec_len
                    ])
                self.shared_tensors['control_vec'] = tf.concat(
                    [control_vec, more_control_vec], axis=1)
            else:
                score = tf.expand_dims(self.shared_tensors['control_vec'],
                                       axis=-1)
                score = tf.tile(score, [1, 1, self.flags.dimension])
                self.shared_tensors['control_vec'] = score
        if "encoder" in self.flags.control_mode:
            src_outputs = self.update_embedding(src_outputs, False)
            self.shared_tensors['src_outputs'] = src_outputs

        with tf.variable_scope("trg_decoder"):
            if self.is_training:
                # Generate syntax
                if 'syntax_gen' in self.flags.control_mode:
                    syntax_losses = []
                    template_simp_ids = features['template_simp_ids']

                    # print_op = tf.print("template_simp_ids output:", template_simp_ids)
                    # with tf.control_dependencies([print_op]):
                    #     template_simp_ids = tf.identity(template_simp_ids)

                    template_simp_ids_layers = tf.unstack(template_simp_ids,
                                                          axis=1)
                    for l_id in range(self.flags.syntax_level):
                        template_simp_ids_layer = template_simp_ids_layers[
                            l_id]

                        # print_op = tf.print("template_simp_ids_layer %s output:" % l_id, template_simp_ids_layer)
                        # with tf.control_dependencies([print_op]):
                        #     template_simp_ids_layer = tf.identity(template_simp_ids_layer)

                        template_simp_ids_layer_list = tf.unstack(
                            template_simp_ids_layer, axis=1)
                        template_simp_ids_layer_inp_list = [
                            batch_syntax_go_id
                        ] + template_simp_ids_layer_list[:-1]
                        template_simp_emb_list = self._embedding_fn(
                            template_simp_ids_layer_inp_list,
                            self.shared_tensors['syntax_embedding_table'])
                        template_simp_emb = tf.stack(template_simp_emb_list,
                                                     axis=1)

                        template_mask = tf.cast(
                            tf.equal(template_simp_ids_layers[0],
                                     self.data.vocab.pad_id), tf.float32)
                        template_bias = common_attention.attention_bias_ignore_padding(
                            template_mask)

                        if l_id == 0:
                            self.shared_tensors[
                                'template_prev_simp_outputs'] = None
                            self.shared_tensors['template_simp_bias'] = None
                        else:
                            template_simp_prev_ids_layers = template_simp_ids_layers[:
                                                                                     l_id]
                            template_simp_prev_ids = tf.stack(
                                template_simp_prev_ids_layers, axis=1)
                            template_simp_prev_embs = self._embedding_fn(
                                template_simp_prev_ids,
                                self.shared_tensors['syntax_embedding_table'])
                            cur_template_scale = template_scale[:, :l_id, :, :]
                            template_simp_prev_embs *= cur_template_scale
                            template_simp_prev_embs = tf.reduce_mean(
                                template_simp_prev_embs, axis=1)
                            template_simp_outputs, template_simp_bias = self.encode_syntax_template(
                                template_simp_prev_embs, template_bias)
                            self.shared_tensors[
                                'template_prev_simp_outputs'] = template_simp_outputs
                            self.shared_tensors[
                                'template_simp_bias'] = template_simp_bias

                        syntax_outputs = self.decode_syntax_template(
                            template_simp_emb)

                        syntax_logits = tf.nn.conv1d(
                            syntax_outputs,
                            tf.expand_dims(
                                self.shared_tensors['proj_syntax_w'], axis=0),
                            1, 'SAME') + tf.expand_dims(tf.expand_dims(
                                self.shared_tensors['proj_syntax_b'], axis=0),
                                                        axis=0)
                        # syntax_gen = tf.argmax(syntax_logits, axis=-1)
                        syntax_weight = tf.cast(
                            tf.not_equal(template_simp_ids_layer,
                                         self.data.syntax_vocab.pad_id),
                            tf.float32)
                        syntax_loss = sequence_loss(
                            logits=syntax_logits,
                            targets=template_simp_ids_layer,
                            weights=syntax_weight)
                        syntax_losses.append(syntax_loss)

                    outputs['loss_syntax'] = tf.add_n(syntax_losses)
                    outputs['perplexity_syntax'] = tf.exp(
                        outputs['loss_syntax'])
                    tf.summary.scalar("loss_syntax", outputs['loss_syntax'])
                    tf.summary.scalar("perplexity_syntax",
                                      outputs['perplexity_syntax'])

                    template_simp_prev_ids_layers = template_simp_ids_layers
                    template_simp_prev_ids = tf.stack(
                        template_simp_prev_ids_layers, axis=1)
                    template_simp_prev_embs = self._embedding_fn(
                        template_simp_prev_ids,
                        self.shared_tensors['syntax_embedding_table'])
                    cur_template_scale = template_scale
                    template_simp_prev_embs *= cur_template_scale
                    template_simp_prev_embs = tf.reduce_mean(
                        template_simp_prev_embs, axis=1)
                    template_simp_outputs, template_simp_bias = self.encode_syntax_template(
                        template_simp_prev_embs, template_bias)
                    self.shared_tensors[
                        'template_simp_outputs'] = template_simp_outputs
                    self.shared_tensors[
                        'template_simp_bias'] = template_simp_bias

                # Generate sentence
                trg_ids_list = tf.unstack(trg_ids, axis=1)
                trg_input_ids_list = [batch_go_id] + trg_ids_list[:-1]
                trg_emb_list = self._embedding_fn(trg_input_ids_list)
                trg_input_ids = tf.stack(trg_input_ids_list, axis=1)
                trg_output_ids = tf.stack(trg_ids_list, axis=1)
                trg_emb = tf.stack(trg_emb_list, axis=1)

                decoder_outputs = self.decode_srcs_to_trgs(
                    trg_emb=trg_emb,
                    trg_input_ids=trg_input_ids,
                    outputs=outputs)
                word_logits = tf.nn.conv1d(
                    decoder_outputs,
                    tf.expand_dims(self.shared_tensors['proj_word_w'], axis=0),
                    1, 'SAME') + tf.expand_dims(tf.expand_dims(
                        self.shared_tensors['proj_word_b'], axis=0),
                                                axis=0)
                word_gen = tf.argmax(word_logits, axis=-1)
                outputs['gen'] = word_gen
                outputs['logits'] = word_logits

                weight = tf.cast(
                    tf.not_equal(trg_output_ids, self.data.vocab.pad_id),
                    tf.float32)
                loss = sequence_loss(logits=word_logits,
                                     targets=trg_output_ids,
                                     weights=weight)
                outputs['loss_decoder'] = loss
                outputs['perplexity_decoder'] = tf.exp(loss)
                tf.summary.scalar("loss_decoder", outputs['loss_decoder'])
                tf.summary.scalar("perplexity_decoder",
                                  outputs['perplexity_decoder'])
                # if 'predict' in self.flags.control_mode:
                #     # outputs['loss_length'] = outputs['loss_length']
                #     # outputs['loss_syntax'] = outputs['loss_syntax']
                #     # outputs['loss'] += outputs['loss_split']
                #     outputs["loss_pred"] = outputs['loss_length'] + outputs['loss_syntax'] + outputs['loss_split']
                #     tf.summary.scalar("loss_length", outputs['loss_length'])
                #     tf.summary.scalar("loss_syntax", outputs['loss_syntax'])
                #     tf.summary.scalar("loss_split", outputs['loss_split'])

            else:
                outputs['gen_src_syntax_ids'] = features['template_comp_ids']
                confident_scores = []
                self._tile_variables()

                if 'syntax_gen' in self.flags.control_mode:

                    def symbol_to_syntax_logits_fn(gen_ids):
                        cur_ids = tf.concat([
                            tf.expand_dims(batch_syntax_go_id, axis=-1),
                            gen_ids[:, 1:]
                        ],
                                            axis=1)
                        cur_embs = tf.nn.embedding_lookup(
                            self.shared_tensors['syntax_embedding_table'],
                            cur_ids)
                        cur_outputs = self.decode_syntax_template(cur_embs)
                        cur_logit = tf.matmul(
                            cur_outputs[:, -1, :],
                            self.shared_tensors['proj_syntax_w']
                        ) + self.shared_tensors['proj_syntax_b']
                        return cur_logit

                    template_simp_prev_ids_layers = []
                    for l_id in range(self.flags.syntax_level):
                        if l_id == 0:
                            self.shared_tensors[
                                'template_prev_simp_outputs'] = None
                            self.shared_tensors['template_simp_bias'] = None
                        else:
                            template_simp_prev_ids = tf.stack(
                                template_simp_prev_ids_layers, axis=1)
                            template_simp_prev_embs = self._embedding_fn(
                                template_simp_prev_ids,
                                self.shared_tensors['syntax_embedding_table'])
                            cur_template_scale = template_scale[:, :l_id, :, :]
                            template_simp_prev_embs *= cur_template_scale
                            template_simp_prev_embs = tf.reduce_mean(
                                template_simp_prev_embs, axis=1)

                            template_mask = tf.cast(
                                tf.equal(template_simp_prev_ids_layers[-1],
                                         self.data.vocab.pad_id), tf.float32)
                            template_bias = common_attention.attention_bias_ignore_padding(
                                template_mask)

                            template_simp_outputs, template_simp_bias = self.encode_syntax_template(
                                template_simp_prev_embs, template_bias)
                            self.shared_tensors[
                                'template_prev_simp_outputs'] = template_simp_outputs
                            self.shared_tensors[
                                'template_simp_bias'] = template_simp_bias

                        beam_ids, beam_score = beam_search.beam_search(
                            symbols_to_logits_fn=symbol_to_syntax_logits_fn,
                            initial_ids=tf.ones([self.flags.eval_batch_size],
                                                tf.int32) *
                            self.data.syntax_vocab.go_id,
                            beam_size=self.flags.beam_search_size,
                            decode_length=self.flags.max_syntax_trg_len,
                            vocab_size=self.data.syntax_vocab.size(),
                            alpha=0.6,
                            eos_id=self.data.syntax_vocab.eos_id)
                        top_beam_ids = beam_ids[:, 0, 1:]
                        top_beam_ids = tf.pad(
                            top_beam_ids,
                            [[0, 0],
                             [
                                 0, self.flags.max_syntax_trg_len -
                                 tf.shape(top_beam_ids)[1]
                             ]])
                        confident_score = -beam_score[:, 0] / tf.to_float(
                            tf.shape(top_beam_ids)[1])

                        confident_scores.append(confident_score)
                        # outputs['gen_src_syntax_ids'] = features['template_comp_ids']
                        # outputs['gen_trg_syntax_ids'] = top_beam_ids
                        # outputs['gen_trg_syntax_scores'] = confident_score
                        template_simp_prev_ids_layers.append(top_beam_ids)

                    template_simp_prev_ids = tf.stack(
                        template_simp_prev_ids_layers, axis=1)
                    outputs['gen_trg_syntax_ids'] = template_simp_prev_ids
                    outputs['gen_trg_syntax_scores'] = tf.add_n(
                        confident_scores)
                    template_simp_prev_embs = self._embedding_fn(
                        template_simp_prev_ids,
                        self.shared_tensors['syntax_embedding_table'])
                    template_simp_prev_embs *= template_scale
                    template_simp_prev_embs = tf.reduce_mean(
                        template_simp_prev_embs, axis=1)

                    template_mask = tf.cast(
                        tf.equal(template_simp_prev_ids_layers[-1],
                                 self.data.vocab.pad_id), tf.float32)
                    template_bias = common_attention.attention_bias_ignore_padding(
                        template_mask)
                    template_simp_outputs, template_simp_bias = self.encode_syntax_template(
                        template_simp_prev_embs, template_bias)
                    self.shared_tensors[
                        'template_simp_outputs'] = template_simp_outputs
                    self.shared_tensors[
                        'template_simp_bias'] = template_simp_bias

                def symbol_to_logits_fn(gen_ids):
                    cur_ids = tf.concat(
                        [tf.expand_dims(batch_go_id, axis=-1), gen_ids[:, 1:]],
                        axis=1)
                    cur_embs = tf.nn.embedding_lookup(
                        self.shared_tensors['word_embedding_table'], cur_ids)
                    cur_outputs = self.decode_srcs_to_trgs(
                        trg_emb=cur_embs, trg_input_ids=cur_ids)
                    cur_logit = tf.matmul(
                        cur_outputs[:,
                                    -1, :], self.shared_tensors['proj_word_w']
                    ) + self.shared_tensors['proj_word_b']
                    return cur_logit

                beam_ids, beam_score = beam_search.beam_search(
                    symbols_to_logits_fn=symbol_to_logits_fn,
                    initial_ids=tf.ones([self.flags.eval_batch_size],
                                        tf.int32) * self.data.vocab.go_id,
                    beam_size=self.flags.beam_search_size,
                    decode_length=self.flags.max_trg_len,
                    vocab_size=self.data.vocab.size() +
                    len(self.data.vocab.more_tokens),
                    alpha=0.6,
                    eos_id=self.data.vocab.eos_id)
                top_beam_ids = beam_ids[:, 0, 1:]
                top_beam_ids = tf.pad(
                    top_beam_ids,
                    [[0, 0],
                     [0, self.flags.max_trg_len - tf.shape(top_beam_ids)[1]]])
                confident_score = -beam_score[:, 0] / tf.to_float(
                    tf.shape(top_beam_ids)[1])
                outputs['gen_trg_ids'] = top_beam_ids
                outputs['gen_trg_scores'] = confident_score
                if self.flags.control_mode:
                    outputs['control_ids'] = features['control_ids']

        return outputs
示例#59
0
    def process_dataset(self, *row_parts):
        row_parts = list(row_parts)
        if self.use_multilanguage:
            language_id = row_parts[0]
            row_parts = row_parts[1]
        else:
            language_id = None

        word = row_parts[0]  # (, )

        if not self.is_evaluating and self.config.RANDOM_CONTEXTS:
            all_contexts = tf.stack(row_parts[1:])
            all_contexts_padded = tf.concat([all_contexts, [self.context_pad]],
                                            axis=-1)
            index_of_blank_context = tf.where(
                tf.equal(all_contexts_padded, self.context_pad))
            num_contexts_per_example = tf.reduce_min(index_of_blank_context)

            # if there are less than self.max_contexts valid contexts, still sample self.max_contexts
            safe_limit = tf.cast(
                tf.maximum(num_contexts_per_example, self.config.MAX_CONTEXTS),
                tf.int32)
            rand_indices = tf.random_shuffle(
                tf.range(safe_limit))[:self.config.MAX_CONTEXTS]
            contexts = tf.gather(all_contexts, rand_indices)  # (max_contexts,)
        else:
            contexts = row_parts[1:(self.config.MAX_CONTEXTS +
                                    1)]  # (max_contexts,)

        # contexts: (max_contexts, )
        split_contexts = tf.string_split(contexts,
                                         delimiter=',',
                                         skip_empty=False)
        sparse_split_contexts = tf.sparse.SparseTensor(
            indices=split_contexts.indices,
            values=split_contexts.values,
            dense_shape=[self.config.MAX_CONTEXTS, 3])
        dense_split_contexts = tf.reshape(
            tf.sparse.to_dense(sp_input=sparse_split_contexts,
                               default_value=Common.PAD),
            shape=[self.config.MAX_CONTEXTS, 3])  # (batch, max_contexts, 3)

        split_target_labels = tf.string_split(tf.expand_dims(word, -1),
                                              delimiter='|')
        target_dense_shape = [
            1,
            tf.maximum(tf.to_int64(self.config.MAX_TARGET_PARTS),
                       split_target_labels.dense_shape[1] + 1)
        ]
        sparse_target_labels = tf.sparse.SparseTensor(
            indices=split_target_labels.indices,
            values=split_target_labels.values,
            dense_shape=target_dense_shape)
        dense_target_label = tf.reshape(
            tf.sparse.to_dense(sp_input=sparse_target_labels,
                               default_value=Common.PAD), [-1])
        index_of_blank = tf.where(tf.equal(dense_target_label, Common.PAD))
        target_length = tf.reduce_min(index_of_blank)
        dense_target_label = dense_target_label[:self.config.MAX_TARGET_PARTS]
        clipped_target_lengths = tf.clip_by_value(
            target_length,
            clip_value_min=0,
            clip_value_max=self.config.MAX_TARGET_PARTS)
        target_word_labels = tf.concat(
            [self.target_table.lookup(dense_target_label), [0]],
            axis=-1)  # (max_target_parts + 1) of int

        path_source_strings = tf.slice(
            dense_split_contexts, [0, 0],
            [self.config.MAX_CONTEXTS, 1])  # (max_contexts, 1)
        flat_source_strings = tf.reshape(path_source_strings,
                                         [-1])  # (max_contexts)
        split_source = tf.string_split(
            flat_source_strings, delimiter='|',
            skip_empty=False)  # (max_contexts, max_name_parts)

        sparse_split_source = tf.sparse.SparseTensor(
            indices=split_source.indices,
            values=split_source.values,
            dense_shape=[
                self.config.MAX_CONTEXTS,
                tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS),
                           split_source.dense_shape[1])
            ])
        dense_split_source = tf.sparse.to_dense(
            sp_input=sparse_split_source,
            default_value=Common.PAD)  # (max_contexts, max_name_parts)
        dense_split_source = tf.slice(dense_split_source, [0, 0],
                                      [-1, self.config.MAX_NAME_PARTS])
        path_source_indices = self.subtoken_table.lookup(
            dense_split_source)  # (max_contexts, max_name_parts)
        path_source_lengths = tf.reduce_sum(
            tf.cast(tf.not_equal(dense_split_source, Common.PAD), tf.int32),
            -1)  # (max_contexts)

        path_strings = tf.slice(dense_split_contexts, [0, 1],
                                [self.config.MAX_CONTEXTS, 1])
        flat_path_strings = tf.reshape(path_strings, [-1])
        split_path = tf.string_split(flat_path_strings,
                                     delimiter='|',
                                     skip_empty=False)
        sparse_split_path = tf.sparse.SparseTensor(
            indices=split_path.indices,
            values=split_path.values,
            dense_shape=[
                self.config.MAX_CONTEXTS, self.config.MAX_PATH_LENGTH
            ])
        dense_split_path = tf.sparse.to_dense(
            sp_input=sparse_split_path,
            default_value=Common.PAD)  # (batch, max_contexts, max_path_length)

        node_indices = self.node_table.lookup(
            dense_split_path)  # (max_contexts, max_path_length)
        path_lengths = tf.reduce_sum(
            tf.cast(tf.not_equal(dense_split_path, Common.PAD), tf.int32),
            -1)  # (max_contexts)

        path_target_strings = tf.slice(
            dense_split_contexts, [0, 2],
            [self.config.MAX_CONTEXTS, 1])  # (max_contexts, 1)
        flat_target_strings = tf.reshape(path_target_strings,
                                         [-1])  # (max_contexts)
        split_target = tf.string_split(
            flat_target_strings, delimiter='|',
            skip_empty=False)  # (max_contexts, max_name_parts)
        sparse_split_target = tf.sparse.SparseTensor(
            indices=split_target.indices,
            values=split_target.values,
            dense_shape=[
                self.config.MAX_CONTEXTS,
                tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS),
                           split_target.dense_shape[1])
            ])
        dense_split_target = tf.sparse.to_dense(
            sp_input=sparse_split_target,
            default_value=Common.PAD)  # (max_contexts, max_name_parts)
        dense_split_target = tf.slice(dense_split_target, [0, 0],
                                      [-1, self.config.MAX_NAME_PARTS])
        path_target_indices = self.subtoken_table.lookup(
            dense_split_target)  # (max_contexts, max_name_parts)
        path_target_lengths = tf.reduce_sum(
            tf.cast(tf.not_equal(dense_split_target, Common.PAD), tf.int32),
            -1)  # (max_contexts)

        valid_contexts_mask = tf.to_float(
            tf.not_equal(
                tf.reduce_max(path_source_indices, -1) +
                tf.reduce_max(node_indices, -1) +
                tf.reduce_max(path_target_indices, -1), 0))

        return {
            TARGET_STRING_KEY: word,
            TARGET_INDEX_KEY: target_word_labels,
            TARGET_LENGTH_KEY: clipped_target_lengths,
            PATH_SOURCE_INDICES_KEY: path_source_indices,
            NODE_INDICES_KEY: node_indices,
            PATH_TARGET_INDICES_KEY: path_target_indices,
            VALID_CONTEXT_MASK_KEY: valid_contexts_mask,
            PATH_SOURCE_LENGTHS_KEY: path_source_lengths,
            PATH_LENGTHS_KEY: path_lengths,
            PATH_TARGET_LENGTHS_KEY: path_target_lengths,
            PATH_SOURCE_STRINGS_KEY: path_source_strings,
            PATH_STRINGS_KEY: path_strings,
            PATH_TARGET_STRINGS_KEY: path_target_strings,
            LANGUAGE_ID: language_id
        }
示例#60
0
def dense2sparse(tensor):
    tensor_idx = tf.where(tf.not_equal(tensor, 0))
    tensor_sparse = tf.SparseTensor(tensor_idx,
                                    tf.gather_nd(tensor, tensor_idx),
                                    tf.shape(tensor))
    return tensor_sparse