Exemplo n.º 1
0
def coord_addition(votes, H, W):
    """Coordinate addition.

    :param votes: (24, 4, 4, 32, 10, 16)
    :param H, W: spaital height and width 4

    :return votes: (24, 4, 4, 32, 10, 16)
    """
    coordinate_offset_hh = tf.reshape(
      (tf.range(H, dtype=tf.float32) + 0.50) / H, [1, H, 1, 1, 1]
    )
    coordinate_offset_h0 = tf.constant(
      0.0, shape=[1, H, 1, 1, 1], dtype=tf.float32
    )
    coordinate_offset_h = tf.stack(
      [coordinate_offset_hh, coordinate_offset_h0] + [coordinate_offset_h0 for _ in range(14)], axis=-1
    )  # (1, 4, 1, 1, 1, 16)

    coordinate_offset_ww = tf.reshape(
      (tf.range(W, dtype=tf.float32) + 0.50) / W, [1, 1, W, 1, 1]
    )
    coordinate_offset_w0 = tf.constant(
      0.0, shape=[1, 1, W, 1, 1], dtype=tf.float32
    )
    coordinate_offset_w = tf.stack(
      [coordinate_offset_w0, coordinate_offset_ww] + [coordinate_offset_w0 for _ in range(14)], axis=-1
    ) # (1, 1, 4, 1, 1, 16)

    # (24, 4, 4, 32, 10, 16)
    votes = votes + coordinate_offset_h + coordinate_offset_w

    return votes
  def test_docstring_example(self):
    # Produce the first 1000 members of the Halton sequence in 3 dimensions.
    num_results = 1000
    dim = 3
    with self.test_session():
      sample = tfp.mcmc.sample_halton_sequence(
          dim, num_results=num_results, randomized=False)

      # Evaluate the integral of x_1 * x_2^2 * x_3^3  over the three dimensional
      # hypercube.
      powers = tf.range(1., limit=dim + 1)
      integral = tf.reduce_mean(
          tf.reduce_prod(sample ** powers, axis=-1))
      true_value = 1. / tf.reduce_prod(powers + 1.)

      # Produces a relative absolute error of 1.7%.
      self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02)

      # Now skip the first 1000 samples and recompute the integral with the next
      # thousand samples. The sequence_indices argument can be used to do this.

      sequence_indices = tf.range(start=1000, limit=1000 + num_results,
                                  dtype=tf.int32)
      sample_leaped = tfp.mcmc.sample_halton_sequence(
          dim, sequence_indices=sequence_indices, randomized=False)

      integral_leaped = tf.reduce_mean(
          tf.reduce_prod(sample_leaped ** powers, axis=-1))
      self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05)
Exemplo n.º 3
0
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], nb_classes=5, nb_samples_per_class=10, batch_size=1):
    targets = tf.cast(targets, predictions.dtype)

    accuracy = tf.constant(value=0, shape=(batch_size, nb_samples_per_class), dtype=tf.float32)
    indices = tf.constant(value=0, shape=(batch_size, nb_classes+1), dtype=tf.float32)

    def step_((accuracy, indices), (p, t)):
        """with tf.variable_scope("Metric_step_var", reuse=True):
            accuracy = tf.get_variable(name="accuracy", shape=(batch_size, nb_samples_per_class),
                                       initializer=tf.constant_initializer(0), dtype=tf.float32)
            indices = tf.get_variable(name="indices", shape=(batch_size, nb_classes + 1),
                                      initializer=tf.constant_initializer(0), dtype=tf.float32)"""

        p = tf.cast(p, tf.int32)
        t = tf.cast(t, tf.int32)
        ##Accuracy Update
        batch_range = tf.cast(tf.range(0, batch_size), dtype=tf.int32)
        gather = tf.cast(tf.gather_nd(indices,tf.stack([tf.range(0,p.get_shape().as_list()[0]), t], axis=1)), tf.int32)
        index = tf.cast(tf.stack([batch_range, gather], axis=1), dtype=tf.int64)
        val = tf.cast(tf.equal(p, t), tf.float32)
        delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(accuracy.get_shape().as_list(), tf.int64))
        accuracy = accuracy + tf.sparse_tensor_to_dense(delta)
        ##Index Update
        index = tf.cast(tf.stack([batch_range, t], axis=1), dtype=tf.int64)
        val = tf.constant(1.0, shape=[batch_size])
        delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(indices.get_shape().as_list(), dtype=tf.int64))
        indices = indices + tf.sparse_tensor_to_dense(delta)
        return [accuracy, indices]
Exemplo n.º 4
0
    def setup(self, batch_size, num_concurrent):
        # Validate the batch size
        num_images = len(self.image_paths)
        batch_size = min(num_images, batch_size or self.data_spec.batch_size)
        if num_images % batch_size != 0:
            raise ValueError(
                'The total number of images ({}) must be divisible by the batch size ({}).'.format(
                    num_images, batch_size))
        self.num_batches = num_images / batch_size

        # Create a queue that will contain image paths (and their indices and extension indicator)
        if self.face_bboxes is None:
            self.path_bbox_queue = tf.FIFOQueue(capacity=num_images,
                                            dtypes=[tf.int32, tf.bool, tf.string],
                                            name='path_queue')
            indices = tf.range(num_images)
            self.enqueue_paths_op = self.path_bbox_queue.enqueue_many([indices, self.extension_mask,
                                                                   self.image_paths])
        else:
            self.path_bbox_queue = tf.FIFOQueue(capacity=num_images,
                                                dtypes=[tf.int32, tf.bool, tf.string, tf.int32],
                                                name='path_queue')
            indices = tf.range(num_images)
            self.enqueue_paths_op = self.path_bbox_queue.enqueue_many([indices, self.extension_mask,
                                                                                                  self.image_paths,self.face_bboxes])
        # Close the path queue (no more additions)
        self.close_path_queue_op = self.path_bbox_queue.close()

        # Create an operation that dequeues a single path and returns a processed image
        crop_flip = [[0,False]]
        if cfg.CROP:
            for i in range(1,5):
                crop_flip.append([i,False])

        if cfg.FLIP:
            for i in range(len(crop_flip)):
                crop_flip.append((crop_flip[i][0],True))

        (processed_idx_list,processed_img_list) = self.process(crop_flip)
        # Create a queue that will contain the processed images (and their indices)
        image_shape = (self.data_spec.crop_size, self.data_spec.crop_size, self.data_spec.channels)
        processed_queue = tf.FIFOQueue(capacity=int(np.ceil(len(crop_flip)*num_images / float(num_concurrent))),
                                       dtypes=[tf.int32, tf.float32],
                                       shapes=[(), image_shape],
                                       name='processed_queue')

        # Enqueue the processed image and path
        enqueue_processed_op = processed_queue.enqueue_many([processed_idx_list,processed_img_list])

        # Create a dequeue op that fetches a batch of processed images off the queue
        [self.ind_deq,self.img_deq] = processed_queue.dequeue_many(batch_size)
        self.dequeue_op = [self.ind_deq,self.img_deq]

        # Create a queue runner to perform the processing operations in parallel
        num_concurrent = min(num_concurrent, num_images)
        self.queue_runner = tf.train.QueueRunner(processed_queue,
                                                 [enqueue_processed_op] * num_concurrent)

        self.num_imgs = len(crop_flip)*num_images
        self.num_feats_per_image = len(crop_flip)
Exemplo n.º 5
0
def get_idx_map(shape):
    """Get index map for a image.
    Args:
        shape: [B, T, H, W] or [B, H, W]
    Returns:
        idx: [B, T, H, W, 2], or [B, H, W, 2]
    """
    s = shape
    ndims = tf.shape(s)
    wdim = ndims - 1
    hdim = ndims - 2
    idx_shape = tf.concat(0, [s, tf.constant([1])])
    ones_h = tf.ones(hdim - 1, dtype='int32')
    ones_w = tf.ones(wdim - 1, dtype='int32')
    h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])])
    w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])])

    idx_y = tf.zeros(idx_shape, dtype='float')
    idx_x = tf.zeros(idx_shape, dtype='float')

    h = tf.slice(s, ndims - 2, [1])
    w = tf.slice(s, ndims - 1, [1])
    idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape)
    idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape)
    idx = tf.concat(ndims[0], [idx_y, idx_x])

    return idx
Exemplo n.º 6
0
 def fold_batches(acc, x):
   b = x[0]
   l = x[1]
   batch = tf.tile([b], [l])
   start = tf.range(l)
   end   = tf.minimum(tf.range(window, l + window), l)
   return tf.concat([acc, tf.transpose(tf.stack([batch, start, end]))], axis=0)
Exemplo n.º 7
0
 def while_step(t, rnn_state, tas, accs):
   """Implements one timestep of FIVO computation."""
   log_weights_acc, log_p_hat_acc, kl_acc = accs
   cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t)
   # Run the cell for one step.
   log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell(
       cur_inputs,
       rnn_state,
       cur_mask,
   )
   # Compute the incremental weight and use it to update the current
   # accumulated weight.
   kl_acc += kl * cur_mask
   log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask
   log_alpha = tf.reshape(log_alpha, [num_samples, batch_size])
   log_weights_acc += log_alpha
   # Calculate the effective sample size.
   ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0)
   ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0)
   log_ess = ess_num - ess_denom
   # Calculate the ancestor indices via resampling. Because we maintain the
   # log unnormalized weights, we pass the weights in as logits, allowing
   # the distribution object to apply a softmax and normalize them.
   resampling_dist = tf.contrib.distributions.Categorical(
       logits=tf.transpose(log_weights_acc, perm=[1, 0]))
   ancestor_inds = tf.stop_gradient(
       resampling_dist.sample(sample_shape=num_samples, seed=random_seed))
   # Because the batch is flattened and laid out as discussed
   # above, we must modify ancestor_inds to index the proper samples.
   # The particles in the ith filter are distributed every batch_size rows
   # in the batch, and offset i rows from the top. So, to correct the indices
   # we multiply by the batch_size and add the proper offset. Crucially,
   # when ancestor_inds is flattened the layout of the batch is maintained.
   offset = tf.expand_dims(tf.range(batch_size), 0)
   ancestor_inds = tf.reshape(ancestor_inds * batch_size + offset, [-1])
   noresample_inds = tf.range(num_samples * batch_size)
   # Decide whether or not we should resample; don't resample if we are past
   # the end of a sequence.
   should_resample = resampling_criterion(num_samples, log_ess, t)
   should_resample = tf.logical_and(should_resample,
                                    cur_mask[:batch_size] > 0.)
   float_should_resample = tf.to_float(should_resample)
   ancestor_inds = tf.where(
       tf.tile(should_resample, [num_samples]),
       ancestor_inds,
       noresample_inds)
   new_state = nested.gather_tensors(new_state, ancestor_inds)
   # Update the TensorArrays before we reset the weights so that we capture
   # the incremental weights and not zeros.
   ta_updates = [log_weights_acc, log_ess, float_should_resample]
   new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)]
   # For the particle filters that resampled, update log_p_hat and
   # reset weights to zero.
   log_p_hat_update = tf.reduce_logsumexp(
       log_weights_acc, axis=0) - tf.log(tf.to_float(num_samples))
   log_p_hat_acc += log_p_hat_update * float_should_resample
   log_weights_acc *= (1. - tf.tile(float_should_resample[tf.newaxis, :],
                                    [num_samples, 1]))
   new_accs = (log_weights_acc, log_p_hat_acc, kl_acc)
   return t + 1, new_state, new_tas, new_accs
Exemplo n.º 8
0
def _potential_scale_reduction_single_state(state, independent_chain_ndims):
  """potential_scale_reduction for one single state `Tensor`."""
  with tf.name_scope(
      'potential_scale_reduction_single_state',
      values=[state, independent_chain_ndims]):
    # We assume exactly one leading dimension indexes e.g. correlated samples
    # from each Markov chain.
    state = tf.convert_to_tensor(state, name='state')
    sample_ndims = 1

    sample_axis = tf.range(0, sample_ndims)
    chain_axis = tf.range(sample_ndims,
                          sample_ndims + independent_chain_ndims)
    sample_and_chain_axis = tf.range(
        0, sample_ndims + independent_chain_ndims)

    n = _axis_size(state, sample_axis)
    m = _axis_size(state, chain_axis)

    # In the language of Brooks and Gelman (1998),
    # B / n is the between chain variance, the variance of the chain means.
    # W is the within sequence variance, the mean of the chain variances.
    b_div_n = _reduce_variance(
        tf.reduce_mean(state, sample_axis, keepdims=True),
        sample_and_chain_axis,
        biased=False)
    w = tf.reduce_mean(
        _reduce_variance(state, sample_axis, keepdims=True, biased=True),
        sample_and_chain_axis)

    # sigma^2_+ is an estimate of the true variance, which would be unbiased if
    # each chain was drawn from the target.  c.f. "law of total variance."
    sigma_2_plus = w + b_div_n

    return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n)
Exemplo n.º 9
0
    def loop(q_, mask, mass_, found_):
        q_list = tf.dynamic_partition(q_, mask, 2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(q_)[0]), mask, 2)  # 0 element it False,
        #  1 element if true

        p = q_list[1] * (1.0 - mass_) / tf.reduce_sum(q_list[1])
        p_new = tf.dynamic_stitch(condition_indices, [q_list[0], p])

        # condition verification and mask modification
        less_mask = tf.cast(tf.less(u, p_new), tf.int32)  # 0 when u is bigger than p, 1 when u is less than p
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(p_new)[0]), less_mask,
                                                 2)  # 0 when u is bigger than p, 1 when u is less than p

        split_p_new = tf.dynamic_partition(p_new, less_mask, 2)
        split_u = tf.dynamic_partition(u, less_mask, 2)

        alpha = tf.dynamic_stitch(condition_indices, [split_p_new[0], split_u[1]])
        mass_ += tf.reduce_sum(split_u[1])

        mask = mask * (tf.ones_like(less_mask) - less_mask)

        found_ = tf.cond(tf.equal(tf.reduce_sum(less_mask), 0),
                         lambda: False,
                         lambda: True)

        alpha = tf.reshape(alpha, q_.shape)

        return alpha, mask, mass_, found_
Exemplo n.º 10
0
def max_unpool(inputs, pooling_indices, output_shape=None, k_size=[1, 2, 2, 1]):
    # NOTE! this function is based on the implementation by kwotsin in
    # https://github.com/kwotsin/TensorFlow-ENet

    # inputs has shape [batch_size, height, width, channels]

    # pooling_indices: pooling indices of the previously max_pooled layer

    # output_shape: what shape the returned tensor should have

    pooling_indices = tf.cast(pooling_indices, tf.int32)
    input_shape = tf.shape(inputs, out_type=tf.int32)

    one_like_pooling_indices = tf.ones_like(pooling_indices, dtype=tf.int32)
    batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
    batch_range = tf.reshape(tf.range(input_shape[0], dtype=tf.int32), shape=batch_shape)
    b = one_like_pooling_indices*batch_range
    y = pooling_indices//(output_shape[2]*output_shape[3])
    x = (pooling_indices//output_shape[3]) % output_shape[2]
    feature_range = tf.range(output_shape[3], dtype=tf.int32)
    f = one_like_pooling_indices*feature_range

    inputs_size = tf.size(inputs)
    indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, inputs_size]))
    values = tf.reshape(inputs, [inputs_size])

    ret = tf.scatter_nd(indices, values, output_shape)

    return ret
Exemplo n.º 11
0
def scheduled_sample_count(ground_truth_x,
                           generated_x,
                           batch_size,
                           scheduled_sample_var):
  """Sample batch with specified mix of groundtruth and generated data points.

  Args:
    ground_truth_x: tensor of ground-truth data points.
    generated_x: tensor of generated data points.
    batch_size: batch size
    scheduled_sample_var: number of ground-truth examples to include in batch.
  Returns:
    New batch with num_ground_truth sampled from ground_truth_x and the rest
    from generated_x.
  """
  num_ground_truth = scheduled_sample_var
  idx = tf.random_shuffle(tf.range(batch_size))
  ground_truth_idx = tf.gather(idx, tf.range(num_ground_truth))
  generated_idx = tf.gather(idx, tf.range(num_ground_truth, batch_size))

  ground_truth_examps = tf.gather(ground_truth_x, ground_truth_idx)
  generated_examps = tf.gather(generated_x, generated_idx)

  output = tf.dynamic_stitch([ground_truth_idx, generated_idx],
                             [ground_truth_examps, generated_examps])
  # if batch size is known set it.
  if isinstance(batch_size, int):
    output.set_shape([batch_size] + common_layers.shape_list(output)[1:])
  return output
Exemplo n.º 12
0
def filterbank_matrices(g_x, g_y, delta, sigma, N, A, B):
    ''' Computer filter bank matrices. All inputs are in batches.

    Args:
        g_x, g_y: grid centers, relative to the center of the image
        delta: strides
        sigma: isotropic variance
        N: grid dimension
        A, B: input image dimensions, width and height
    Returns:
        F_x, F_y: filter banks matrices [batch, N, A] and [batch, N, B]
    '''

    rng = tf.reshape(tf.cast(tf.range(N), tf.float32), [1, -1])

    # eq 19
    mu_x = g_x + (rng - N / 2 - 0.5) * delta

    # eq 20
    mu_y = g_y + (rng - N / 2 - 0.5) * delta

    a = tf.reshape(tf.cast(tf.range(A), tf.float32), [1, 1, -1])
    b = tf.reshape(tf.cast(tf.range(B), tf.float32), [1, 1, -1])

    # reshape for broadcasting
    mu_x = tf.reshape(mu_x, [-1, N, 1])
    mu_y = tf.reshape(mu_y, [-1, N, 1])
    sigma = tf.reshape(sigma, [-1, 1, 1])

    F_x = tf.exp(-tf.square((a - mu_x) / sigma))
    F_y = tf.exp(-tf.square((b - mu_y) / sigma))

    # transform in a convenient form for further use
    return F_x, F_y
  def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
                                     num_end_samples, total_num_samples):
    """slices num_start_samples and last num_end_samples from input_tensor.

    Args:
      input_tensor: An int32 tensor of shape [N] to be sliced.
      num_start_samples: Number of examples to be sliced from the beginning
        of the input tensor.
      num_end_samples: Number of examples to be sliced from the end of the
        input tensor.
      total_num_samples: Sum of is num_start_samples and num_end_samples. This
        should be a scalar.

    Returns:
      A tensor containing the first num_start_samples and last num_end_samples
      from input_tensor.

    """
    input_length = tf.shape(input_tensor)[0]
    start_positions = tf.less(tf.range(input_length), num_start_samples)
    end_positions = tf.greater_equal(
        tf.range(input_length), input_length - num_end_samples)
    selected_positions = tf.logical_or(start_positions, end_positions)
    selected_positions = tf.cast(selected_positions, tf.int32)
    indexed_positions = tf.multiply(tf.cumsum(selected_positions),
                                    selected_positions)
    one_hot_selector = tf.one_hot(indexed_positions - 1,
                                  total_num_samples,
                                  dtype=tf.int32)
    return tf.tensordot(input_tensor, one_hot_selector, axes=[0, 0])
Exemplo n.º 14
0
  def testPadTensorToBatchSize(self):
    with self.test_session():
      # Cannot pad a 0-dimensional Tensor.
      tensor_0d = tf.constant(1)
      with self.assertRaises(ValueError):
        dataset_ops.pad_tensor_to_batch_size(tensor_0d, 10)

      # 1-dimensional Tensor. Un-padded batch size is 5.
      tensor_1d = tf.range(5, dtype=tf.int32)
      self.assertEqual([5], tensor_1d.shape)
      self.assertAllEqual([0, 1, 2, 3, 4], tensor_1d.eval())

      tensor_1d_pad5 = dataset_ops.pad_tensor_to_batch_size(tensor_1d, 5)
      self.assertEqual([5], tensor_1d_pad5.shape)
      self.assertAllEqual([0, 1, 2, 3, 4], tensor_1d_pad5.eval())

      tensor_1d_pad8 = dataset_ops.pad_tensor_to_batch_size(tensor_1d, 8)
      self.assertEqual([8], tensor_1d_pad8.shape)
      self.assertAllEqual([0, 1, 2, 3, 4, 0, 0, 0], tensor_1d_pad8.eval())

      # 2-dimensional Tensor. Un-padded batch size is 3.
      tensor_2d = tf.reshape(tf.range(9, dtype=tf.int32), [3, 3])
      self.assertEqual([3, 3], tensor_2d.shape)
      self.assertAllEqual([[0, 1, 2], [3, 4, 5], [6, 7, 8]], tensor_2d.eval())

      tensor_2d_pad3 = dataset_ops.pad_tensor_to_batch_size(tensor_2d, 3)
      self.assertEqual([3, 3], tensor_2d_pad3.shape)
      self.assertAllEqual([[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                          tensor_2d_pad3.eval())

      tensor_2d_pad4 = dataset_ops.pad_tensor_to_batch_size(tensor_2d, 4)
      self.assertEqual([4, 3], tensor_2d_pad4.shape)
      self.assertAllEqual([[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 0, 0]],
                          tensor_2d_pad4.eval())
Exemplo n.º 15
0
def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
                 tensors_to_reorder):
  """Reorder to minimize beam costs."""
  # beam_val is [batch_size x beam_size]; let b = batch_size * beam_size
  # decided is len x b x a x b
  # output is b x out_size; step is b x len x a x b;
  outputs = tf.split(axis=0, num_or_size_splits=beam_size, value=tf.nn.log_softmax(output))
  all_beam_vals, all_beam_idx = [], []
  beam_range = 1 if is_first else beam_size
  for i in xrange(beam_range):
    top_out, top_out_idx = tf.nn.top_k(outputs[i], k=beam_size)
    cur_beam_val = beam_val[:, i]
    top_out = tf.Print(top_out, [top_out, top_out_idx, beam_val, i,
                                 cur_beam_val], "GREPO", summarize=8)
    all_beam_vals.append(top_out + tf.expand_dims(cur_beam_val, 1))
    all_beam_idx.append(top_out_idx)
  all_beam_idx = tf.reshape(tf.transpose(tf.concat(axis=1, values=all_beam_idx), [1, 0]),
                            [-1])
  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(axis=1, values=all_beam_vals), k=beam_size)
  top_beam_idx = tf.Print(top_beam_idx, [top_beam, top_beam_idx],
                          "GREP", summarize=8)
  reordered = [[] for _ in xrange(len(tensors_to_reorder) + 1)]
  top_out_idx = []
  for i in xrange(beam_size):
    which_idx = top_beam_idx[:, i] * batch_size + tf.range(batch_size)
    top_out_idx.append(tf.gather(all_beam_idx, which_idx))
    which_beam = top_beam_idx[:, i] / beam_size  # [batch]
    which_beam = which_beam * batch_size + tf.range(batch_size)
    reordered[0].append(tf.gather(output, which_beam))
    for i, t in enumerate(tensors_to_reorder):
      reordered[i + 1].append(tf.gather(t, which_beam))
  new_tensors = [tf.concat(axis=0, values=t) for t in reordered]
  top_out_idx = tf.concat(axis=0, values=top_out_idx)
  return (top_beam, new_tensors[0], top_out_idx, new_tensors[1:])
Exemplo n.º 16
0
def tile_anchors(grid_height,
                 grid_width,
                 scales,
                 aspect_ratios,
                 base_anchor_size,
                 anchor_stride,
                 anchor_offset):
  """Create a tiled set of anchors strided along a grid in image space.

  This op creates a set of anchor boxes by placing a "basis" collection of
  boxes with user-specified scales and aspect ratios centered at evenly
  distributed points along a grid.  The basis collection is specified via the
  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
  placing it over its respective center.

  Grid points are specified via grid_height, grid_width parameters as well as
  the anchor_stride and anchor_offset parameters.

  Args:
    grid_height: size of the grid in the y direction (int or int scalar tensor)
    grid_width: size of the grid in the x direction (int or int scalar tensor)
    scales: a 1-d  (float) tensor representing the scale of each box in the
      basis set.
    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
      box in the basis set.  The length of the scales and aspect_ratios tensors
      must be equal.
    base_anchor_size: base anchor size as [height, width]
      (float tensor of shape [2])
    anchor_stride: difference in centers between base anchors for adjacent grid
                   positions (float tensor of shape [2])
    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
                   upper left element of the grid, this should be zero for
                   feature networks with only VALID padding and even receptive
                   field size, but may need some additional calculation if other
                   padding is used (float tensor of shape [2])
  Returns:
    a BoxList holding a collection of N anchor boxes
  """
  ratio_sqrts = tf.sqrt(aspect_ratios)
  heights = scales / ratio_sqrts * base_anchor_size[0]
  widths = scales * ratio_sqrts * base_anchor_size[1]

  # Get a grid of box centers
  y_centers = tf.to_float(tf.range(grid_height))
  y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
  x_centers = tf.to_float(tf.range(grid_width))
  x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
  x_centers, y_centers = ops.meshgrid(x_centers, y_centers)

  widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
  heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
  bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
  bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
  bbox_centers = tf.reshape(bbox_centers, [-1, 2])
  bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
  bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
  return box_list.BoxList(bbox_corners)
Exemplo n.º 17
0
def translate(U, theta, out_height, out_width):
    num_batch = tf.shape(U)[0]
    height, width, num_ch = U.get_shape()[1:]
    height = height.value
    width = width.value
    num_ch = num_ch.value
    hwc = height*width*num_ch

    nind = tf.range(num_batch)
    x = repeat(tf.range(height), width)
    y = tf.tile(tf.range(width), tf.pack([height]))
    cind = tf.range(num_ch)

    nind = tf.expand_dims(repeat(nind, hwc), 1)
    x = tf.tile(tf.expand_dims(repeat(x, num_ch), 1), tf.pack([num_batch,1]))
    y = tf.tile(tf.expand_dims(repeat(y, num_ch), 1), tf.pack([num_batch,1]))
    cind = tf.tile(tf.expand_dims(cind, 1), tf.pack([num_batch*height*width,1]))

    dx, dy = tf.split(1, 2, theta)
    dx = tf.cast(tf.clip_by_value(dx, 0, out_height-height), 'int32')
    dx = tf.reshape(tf.tile(dx, tf.pack([1,hwc])), [-1,1])
    dy = tf.cast(tf.clip_by_value(dy, 0, out_width-width), 'int32')
    dy = tf.reshape(tf.tile(dy, tf.pack([1,hwc])), [-1,1])
    x = x + dx
    y = y + dy

    tind = tf.concat(1, [nind, x, y, cind])
    val = tf.reshape(U, [-1])
    T = tf.sparse_to_dense(tind,
            tf.pack([num_batch, out_height, out_width, num_ch]),
            val)
    T.set_shape([None, out_height, out_width, num_ch])
    return T
Exemplo n.º 18
0
def decode(detection_feat, feat_sizes=(13, 13), num_classes=80,
           anchors=None):
    """decode from the detection feature"""
    H, W = feat_sizes# 最后 特征图的 尺寸 13*13格子数量
    num_anchors = len(anchors)# 每个格子预测的 边框数量 
    detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors,
                                        num_classes + 5])

    bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2])# 边框中心点 相对于所在格子 左上点 的偏移的比例
    bbox_wh = tf.exp(detetion_results[:, :, :, 2:4])# 
    obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4])# 物体 
    class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:])

    anchors = tf.constant(anchors, dtype=tf.float32)

    height_ind = tf.range(H, dtype=tf.float32)
    width_ind = tf.range(W, dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(height_ind, width_ind)
    x_offset = tf.reshape(x_offset, [1, -1, 1])
    y_offset = tf.reshape(y_offset, [1, -1, 1])

    # decode
    bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W
    bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H
    bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5
    bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5

    bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h,
                       bbox_x + bbox_w, bbox_y + bbox_h], axis=3)

    return bboxes, obj_probs, class_probs
Exemplo n.º 19
0
  def default_exchange_proposed_fn_(num_replica, seed=None):
    """Default function for `exchange_proposed_fn` of `kernel`."""
    num_replica = tf.to_int32(num_replica)

    seed = distributions_util.gen_new_seed(seed, 'default_exchange_proposed_fn')
    random_uniform = tf.random_uniform([], seed=seed)
    accept_proposed_exchange = random_uniform < probs

    seed = distributions_util.gen_new_seed(seed, 'default_exchange_proposed_fn')
    zero_start = tf.random_uniform([], seed=seed) > 0.5
    if num_replica % 2 == 0:
      exchange_proposed = tf.where(
          zero_start, tf.range(num_replica),
          tf.sparse_to_dense(tf.range(num_replica - 2), (num_replica,),
                             tf.range(1, num_replica - 1)))
      exchange_proposed_n = tf.where(zero_start, num_replica // 2,
                                     num_replica // 2 - 1)
    else:
      exchange_proposed = tf.where(
          zero_start, tf.range(num_replica - 1), tf.range(1, num_replica))
      exchange_proposed_n = num_replica // 2

    exchange_proposed = tf.reshape(exchange_proposed, (num_replica // 2, 2))
    exchange_proposed = tf.where(accept_proposed_exchange, exchange_proposed,
                                 tf.zeros_like(exchange_proposed))
    exchange_proposed_n = tf.where(accept_proposed_exchange,
                                   exchange_proposed_n,
                                   tf.zeros_like(exchange_proposed_n))
    return exchange_proposed, exchange_proposed_n
Exemplo n.º 20
0
    def _partition_and_stitch(self, args, func_name):
        """
        args is a list of tensors, to be passed to self.likelihoods.<func_name>

        args[-1] is the 'Y' argument, which contains the indexes to self.likelihoods.

        This function splits up the args using dynamic_partition, calls the
        relevant function on the likelihoods, and re-combines the result.
        """
        # get the index from Y
        Y = args[-1]
        ind = tf.gather(tf.transpose(Y), tf.shape(Y)[1] - 1)  # ind = Y[:,-1]
        ind = tf.cast(ind, tf.int32)
        Y = tf.transpose(tf.gather(tf.transpose(Y), tf.range(0, tf.shape(Y)[1] - 1)))  # Y = Y[:,:-1]
        args[-1] = Y

        # split up the arguments into chunks corresponding to the relevant likelihoods
        args = zip(*[tf.dynamic_partition(X, ind, self.num_likelihoods) for X in args])

        # apply the likelihood-function to each section of the data
        funcs = [getattr(lik, func_name) for lik in self.likelihood_list]
        results = [f(*args_i) for f, args_i in zip(funcs, args)]

        # stitch the results back together
        partitions = tf.dynamic_partition(tf.range(0, tf.size(ind)), ind, self.num_likelihoods)
        results = tf.dynamic_stitch(partitions, results)

        return results
Exemplo n.º 21
0
def inference_pooling_L2norm_choose_filter(images, kheight=2, kwidth=5):
    # channel domain pooling mapper
    split_dim = 1   # 1 represents split on spatial domain
    input_image_list = split_eeg.split_eeg_signal_axes(images,
                                                       split_dim=split_dim)
    input_image_length = len(input_image_list)

    # the pooling mapper should choose half size of the image size
    pool_s, _ = concat_eeg.pool_eeg_signal_channel(input_image_list, input_image_length/2, 1)
    _print_tensor_size(pool_s)

    input_shape = pool_s.get_shape()

    range_even = tf.range(0, input_shape[0], 2)
    range_odd  = tf.range(1, input_shape[0], 2)

    even_rows = tf.nn.embedding_lookup(images, range_even)
    odd_rows = tf.nn.embedding_lookup(images, range_odd)

    even_rows = tf.mul(pool_s,pool_s)
    even_rows = tf.mul(3.0, pool_s)

    even_rows = tf.nn.avg_pool(even_rows, ksize=[1, 1, 3, 1],
                            strides=[1, 1, 3, 1], padding='VALID')

    pool_s = tf.sqrt(pool_s)

    pool_s = tf.nn.max_pool(pool_s, ksize=[1, 2, 1, 1],
                             strides=[1, 2, 1, 1], padding='VALID')

    _print_tensor_size(pool_s)

    return pool_s
Exemplo n.º 22
0
        def loop(step_, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record):
            cur_feat_x_ = tf.gather(x, step_)
            cur_golden_path_ = tf.gather(golden_path, tf.range(step_))
            cur_golden_feat_ = self._add_tag_dynamic(cur_feat_x_, cur_golden_path_)
            # cur_golden_output_ = self._build_cnn(cur_golden_feat_)
            cur_golden_output_ = build(cur_golden_feat_)
            cur_golden_node_ = tf.gather(golden_path, tf.reshape(step_, [1]))
            golden_value_ = tf.add(golden_value_,
                                  tf.slice(cur_golden_output_, tf.concat(0, [[0], cur_golden_node_]), [1, 1]))

            cur_beam_ = tf.unpack(beams_, num=self.beam_size)
            cur_beam_feat_ = tf.concat(0, [self._add_tag_dynamic(cur_feat_x_, tf.reshape(e, [-1])) for e in cur_beam_])
            # cur_beam_output_ = self._build_cnn(cur_beam_feat_)
            cur_beam_output_ = build(cur_beam_feat_)

            golden_record = golden_record.write(step_, cur_golden_output_)
            beam_record = beam_record.write(step_, cur_beam_output_)

            beam_value_, beams_ = self._top_beams_new(cur_beam_output_, beam_value_, beams_)
            new_golden_path_ = tf.gather(golden_path, tf.range(step_ + 1))
            # golden_beam_id_ = index_of_tensor(new_golden_path_, beams_)
            g_id_ = index_of_tensor(new_golden_path_, beams_)
            golden_inside_ = tf.select(tf.less(tf.shape(g_id_)[0], 1),
                                       tf.constant(False, tf.bool), tf.constant(True, tf.bool))

            step_valid_ = tf.logical_and(tf.less(step_+1, length), tf.less(step_+1, self.max_step_tracked))
            return [step_ + 1, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record]
    def unpool_layer2x2_batch(self, bottom, argmax):
        bottom_shape = tf.shape(bottom)
        top_shape = [bottom_shape[0], bottom_shape[1] * 2, bottom_shape[2] * 2, bottom_shape[3]]

        batch_size = top_shape[0]
        height = top_shape[1]
        width = top_shape[2]
        channels = top_shape[3]

        argmax_shape = tf.to_int64([batch_size, height, width, channels])
        argmax = self.unravel_argmax(argmax, argmax_shape)

        t1 = tf.to_int64(tf.range(channels))
        t1 = tf.tile(t1, [batch_size * (width // 2) * (height // 2)])
        t1 = tf.reshape(t1, [-1, channels])
        t1 = tf.transpose(t1, perm=[1, 0])
        t1 = tf.reshape(t1, [channels, batch_size, height // 2, width // 2, 1])
        t1 = tf.transpose(t1, perm=[1, 0, 2, 3, 4])

        t2 = tf.to_int64(tf.range(batch_size))
        t2 = tf.tile(t2, [channels * (width // 2) * (height // 2)])
        t2 = tf.reshape(t2, [-1, batch_size])
        t2 = tf.transpose(t2, perm=[1, 0])
        t2 = tf.reshape(t2, [batch_size, channels, height // 2, width // 2, 1])

        t3 = tf.transpose(argmax, perm=[1, 4, 2, 3, 0])

        t = tf.concat(4, [t2, t3, t1])
        indices = tf.reshape(t, [(height // 2) * (width // 2) * channels * batch_size, 4])

        x1 = tf.transpose(bottom, perm=[0, 3, 1, 2])
        values = tf.reshape(x1, [-1])
        return tf.scatter_nd(indices, values, tf.to_int64(top_shape))
Exemplo n.º 24
0
def get_position_encoding(
    length, hidden_size, min_timescale=1.0, max_timescale=1.0e4):
  """Return positional encoding.

  Calculates the position encoding as a mix of sine and cosine functions with
  geometrically increasing wavelengths.
  Defined and formulized in Attention is All You Need, section 3.5.

  Args:
    length: Sequence length.
    hidden_size: Size of the
    min_timescale: Minimum scale that will be applied at each position
    max_timescale: Maximum scale that will be applied at each position

  Returns:
    Tensor with shape [length, hidden_size]
  """
  position = tf.to_float(tf.range(length))
  num_timescales = hidden_size // 2
  log_timescale_increment = (
      math.log(float(max_timescale) / float(min_timescale)) /
      (tf.to_float(num_timescales) - 1))
  inv_timescales = min_timescale * tf.exp(
      tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0)
  signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
  return signal
Exemplo n.º 25
0
def generateShiftedAnchors(anchors, feature_h, feature_w, feature_stride):
    """ Generate shifted anchors to be regressed into the final RPN output

    A score is created for every anchor at each feature.  Using feature_stride,
    we then determine at which locations in the image the areas these scores represent
    reside.  There are, using defaults, nine total anchors at each position, given in the
    input anchors.  We must shift these anchors to each x,y location, for a total of
    feature_w * feature_h * len(anchors) anchors.
    """

    # The scaling factor I seek is actually the reciprocal, since I want
    # to transform back to original image coordinates, not go from img coords
    # to input coordinates
    feature_stride = tf.constant(feature_stride, dtype=tf.float32)
    x_locations = tf.to_float(tf.range(0, feature_w))
    y_locations = tf.to_float(tf.range(0, feature_h))

    x_zeros = tf.zeros([feature_w])
    y_zeros = tf.zeros([feature_h])

    x_stack = tf.stack([x_locations, x_zeros, x_locations, x_zeros], axis=1)
    y_stack = tf.stack([y_zeros, y_locations, y_zeros, y_locations], axis=1)

    x_reshaped_stack = tf.reshape(x_stack, (1, 1, feature_w, 4))
    y_reshaped_stack = tf.reshape(y_stack, (1, feature_h, 1, 4))

    # I <3 broadcasting
    raw_anchor_shifts = tf.add(x_reshaped_stack, y_reshaped_stack)

    # Transform to scaled image coordinates
    less_raw_anchor_shifts = feature_stride * raw_anchor_shifts

    # Add extra dimensions to anchors for proper broadcasting
    expanded_anchors = tf.expand_dims(tf.expand_dims(tf.constant(anchors), axis=1), axis=1)
    return tf.add(less_raw_anchor_shifts, expanded_anchors, name="shifted_anchors")
Exemplo n.º 26
0
def roll_sequence(tensor, offsets):
  """Shifts sequences by an offset.

  Args:
    tensor: A ``tf.Tensor`` of shape ``[batch_size, time, ...]``.
    offsets : The offset of each sequence.

  Returns:
    A ``tf.Tensor`` of the same shape as :obj:`tensor` with sequences shifted
    by :obj:`offsets`.
  """
  batch_size = tf.shape(tensor)[0]
  time = tf.shape(tensor)[1]

  cols = tf.range(time)
  cols = tf.tile(cols, [batch_size])
  cols = tf.reshape(cols, [batch_size, time])
  cols -= tf.expand_dims(offsets, 1)
  cols = tf.mod(cols, time)

  rows = tf.range(batch_size)
  rows = tf.tile(rows, [time])
  rows = tf.reshape(rows, [time, batch_size])
  rows = tf.transpose(rows, perm=[1, 0])

  indices = tf.concat([tf.expand_dims(rows, -1), tf.expand_dims(cols, -1)], -1)

  return tf.gather_nd(tensor, indices)
Exemplo n.º 27
0
def _do_maximum_mean(samples, envelope, high, name=None):
  """Common code between maximum_mean and minimum_mean."""
  with tf.name_scope(name, "do_maximum_mean", [samples, envelope, high]):
    dtype = dtype_util.common_dtype([samples, envelope, high], tf.float32)
    samples = tf.convert_to_tensor(samples, name="samples", dtype=dtype)
    envelope = tf.convert_to_tensor(envelope, name="envelope", dtype=dtype)
    high = tf.convert_to_tensor(high, name="high", dtype=dtype)
    n = tf.rank(samples)
    # Move the batch dimension of `samples` to the rightmost position,
    # where the _batch_sort_vector function wants it.
    perm = tf.concat([tf.range(1, n), [0]], axis=0)
    samples = tf.transpose(samples, perm)

    samples = _batch_sort_vector(samples)

    # The maximum mean is given by taking `envelope`-worth of
    # probability from the smallest samples and moving it to the
    # maximum value.  This amounts to:
    # - ignoring the smallest k samples, where `k/n < envelope`
    # - taking a `1/n - (envelope - k/n)` part of the index k sample
    # - taking all the other samples
    # - and adding `envelope * high` at the end.
    # The following is a vectorized and batched way of computing this.
    # `max_mean_contrib` is a mask implementing the previous.
    batch_size = tf.shape(samples)[-1]
    batch_size = tf.cast(batch_size, dtype=dtype)
    step = 1. / batch_size
    cum_steps = step * tf.range(1, batch_size + 1, dtype=dtype)
    max_mean_contrib = tf.clip_by_value(
        cum_steps - envelope[..., tf.newaxis],
        clip_value_min=0.,
        clip_value_max=step)
    return tf.reduce_sum(samples * max_mean_contrib, axis=-1) + envelope * high
Exemplo n.º 28
0
def parse_sequence_to_pairs_batch(
    serialized_example, preprocess_fn, is_training, num_views, batch_size,
    window):
  """Parses a serialized sequence example into a batch of preprocessed data.

  Args:
    serialized_example: A serialized SequenceExample.
    preprocess_fn: A function with the signature (raw_images, is_training) ->
      preprocessed_images.
    is_training: Boolean, whether or not we're in training.
    num_views: Int, the number of simultaneous viewpoints at each timestep in
      the dataset.
    batch_size: Int, size of the batch to get.
    window: Int, only take pairs from a maximium window of this size.
  Returns:
    preprocessed: A 4-D float32 `Tensor` holding preprocessed images.
    anchor_images: A 4-D float32 `Tensor` holding raw anchor images.
    pos_images: A 4-D float32 `Tensor` holding raw positive images.
  """
  _, views, seq_len = parse_sequence_example(serialized_example, num_views)

  # Get random (anchor, positive) timestep and viewpoint indices.
  num_pairs = batch_size // 2
  ap_time_indices, a_view_indices, p_view_indices = get_tcn_anchor_pos_indices(
      seq_len, num_views, num_pairs, window)

  # Gather the image strings.
  combined_anchor_indices = tf.concat(
      [tf.expand_dims(a_view_indices, 1),
       tf.expand_dims(ap_time_indices, 1)], 1)
  combined_pos_indices = tf.concat(
      [tf.expand_dims(p_view_indices, 1),
       tf.expand_dims(ap_time_indices, 1)], 1)
  anchor_images = tf.gather_nd(views, combined_anchor_indices)
  pos_images = tf.gather_nd(views, combined_pos_indices)

  # Decode images.
  anchor_images = tf.map_fn(
      preprocessing.decode_image, anchor_images, dtype=tf.float32)
  pos_images = tf.map_fn(
      preprocessing.decode_image, pos_images, dtype=tf.float32)

  # Concatenate [anchor, postitive] images into a batch and preprocess it.
  concatenated = tf.concat([anchor_images, pos_images], 0)
  preprocessed = preprocess_fn(concatenated, is_training)
  anchor_prepro, positive_prepro = tf.split(preprocessed, num_or_size_splits=2,
                                            axis=0)

  # Set static batch dimensions for all image tensors
  ims = [anchor_prepro, positive_prepro, anchor_images, pos_images]
  ims = [set_image_tensor_batch_dim(i, num_pairs) for i in ims]
  [anchor_prepro, positive_prepro, anchor_images, pos_images] = ims

  # Assign each anchor and positive the same label.
  anchor_labels = tf.range(1, num_pairs+1)
  positive_labels = tf.range(1, num_pairs+1)

  return (anchor_prepro, positive_prepro, anchor_images, pos_images,
          anchor_labels, positive_labels, seq_len)
Exemplo n.º 29
0
    def compute_states(self,emb,idx_batch=0):


        num_leaves = tf.squeeze(tf.gather(self.num_leaves,idx_batch))
        #num_leaves=tf.Print(num_leaves,[num_leaves])
        n_inodes = tf.gather(self.n_inodes,idx_batch)
        #embx=tf.gather(emb,tf.range(num_leaves))
        embx=tf.gather(tf.gather(emb,idx_batch),tf.range(num_leaves))
        #treestr=self.treestr#tf.gather(self.treestr,tf.range(self.n_inodes))
        treestr=tf.gather(tf.gather(self.treestr,idx_batch),tf.range(n_inodes))
        leaf_hc = self.process_leafs(embx)
        leaf_h,leaf_c=tf.split(1,2,leaf_hc)


        node_h=tf.identity(leaf_h)
        node_c=tf.identity(leaf_c)

        idx_var=tf.constant(0) #tf.Variable(0,trainable=False)

        with tf.variable_scope("Composition",reuse=True):

            cW = tf.get_variable("cW",[self.degree*self.hidden_dim,(self.degree+3)*self.hidden_dim])
            cb = tf.get_variable("cb",[4*self.hidden_dim])
            bu,bo,bi,bf=tf.split(0,4,cb)

            def _recurrence(node_h,node_c,idx_var):
                node_info=tf.gather(treestr,idx_var)

                child_h=tf.gather(node_h,node_info)
                child_c=tf.gather(node_c,node_info)

                flat_ = tf.reshape(child_h,[-1])
                tmp=tf.matmul(tf.expand_dims(flat_,0),cW)
                u,o,i,fl,fr=tf.split(1,5,tmp)

                i=tf.nn.sigmoid(i+bi)
                o=tf.nn.sigmoid(o+bo)
                u=tf.nn.tanh(u+bu)
                fl=tf.nn.sigmoid(fl+bf)
                fr=tf.nn.sigmoid(fr+bf)

                f=tf.concat(0,[fl,fr])
                c = i * u + tf.reduce_sum(f*child_c,[0])
                h = o * tf.nn.tanh(c)

                node_h = tf.concat(0,[node_h,h])

                node_c = tf.concat(0,[node_c,c])

                idx_var=tf.add(idx_var,1)

                return node_h,node_c,idx_var
            loop_cond = lambda a1,b1,idx_var: tf.less(idx_var,n_inodes)

            loop_vars=[node_h,node_c,idx_var]
            node_h,node_c,idx_var=tf.while_loop(loop_cond, _recurrence,
                                                loop_vars,parallel_iterations=10)

            return node_h
 def _2dMeshGrid(self, height, width):
     # produces two index vectors
     # for 4 (height) x 3 (width) vector
     # (1 1 1) (2 2 2) (3 3 3) (4 4 4) <- height (numbers in height_grid go up to
     # (1 2 3) (1 2 3) (1 2 3) (1 2 3) <- width
     width_grid = tf.tile(tf.reshape(tf.range(0, width), [1, -1]), [1, height])
     height_grid = tf.reshape(tf.tile(tf.reshape(tf.range(0, height), [-1, 1]), [1, width]), [1, -1])
     return tf.to_float(width_grid), tf.to_float(height_grid)
def selective_crop_and_resize(features,
                              boxes,
                              box_levels,
                              boundaries,
                              output_size=7,
                              sample_offset=0.5,
                              use_einsum_gather=False):
    """Crop and resize boxes on a set of feature maps.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, it selectively crops and resizes
  boxes from the corresponding feature maps to generate the box features.

  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
  figure 3 for reference). Specifically, for each feature map, we select an
  (output_size, output_size) set of pixels corresponding to the box location,
  and then use bilinear interpolation to select the feature value for each
  pixel.

  For performance, we perform the gather and interpolation on all layers as a
  single operation. In this op the multi-level features are first stacked and
  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
  interpolation is performed on the gathered feature points to generate
  [output_size, output_size] RoIAlign feature map.

  Here is the step-by-step algorithm:
    1. The multi-level features are gathered into a
       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
       Tensor. The Tensor contains four neighboring feature points for each
       vertice in the output grid.
    2. Compute the interpolation kernel of shape
       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
       can be seen as stacking 2x2 interpolation kernels for all vertices in the
       output grid.
    3. Element-wise multiply the gathered features and interpolation kernel.
       Then apply 2x2 average pooling to reduce spatial dimension to
       output_size.

  Args:
    features: a 5-D tensor of shape [batch_size, num_levels, max_height,
      max_width, num_filters] where cropping and resizing are based.
    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
      information of each box w.r.t. the corresponding feature map.
      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
        in terms of the number of pixels of the corresponding feature map size.
    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
      the 0-based corresponding feature level index of each box.
    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
      the boundary (in (y, x)) of the corresponding feature map for each box.
      Any resampled grid points that go beyond the bounary will be clipped.
    output_size: a scalar indicating the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum_gather: use einsum to replace gather or not. Replacing einsum
      with gather can improve performance when feature size is not large, einsum
      is friendly with model partition as well. Gather's performance is better
      when feature size is very large and there are multiple box levels.

  Returns:
    features_per_box: a 5-D tensor of shape
      [batch_size, num_boxes, output_size, output_size, num_filters]
      representing the cropped features.
  """
    (batch_size, num_levels, max_feature_height, max_feature_width,
     num_filters) = features.get_shape().as_list()
    _, num_boxes, _ = boxes.get_shape().as_list()

    kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
        boxes, boundaries, output_size, sample_offset)
    x_indices = tf.cast(tf.reshape(box_gridx0x1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)
    y_indices = tf.cast(tf.reshape(box_gridy0y1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)

    if use_einsum_gather:
        # Blinear interpolation is done during the last two gathers:
        #        f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
        #                              [f10, f11]]
        #        [[f00, f01],
        #         [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot)
        #       where [hy, ly] and [hx, lx] are the bilinear interpolation kernel.

        # shape is [batch_size, boxes, output_size, 2, 1]
        grid_y_one_hot, grid_x_one_hot = get_grid_one_hot(
            box_gridy0y1, box_gridx0x1, max_feature_height, max_feature_width)

        # shape is [batch_size, num_boxes, output_size, height]
        grid_y_weight = tf.reduce_sum(tf.multiply(grid_y_one_hot, kernel_y),
                                      axis=-2)
        # shape is [batch_size, num_boxes, output_size, width]
        grid_x_weight = tf.reduce_sum(tf.multiply(grid_x_one_hot, kernel_x),
                                      axis=-2)

        # Gather for y_axis.
        # shape is [batch_size, num_boxes, output_size, width, features]
        features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features,
                                     tf.cast(grid_y_weight, features.dtype))
        # Gather for x_axis.
        # shape is [batch_size, num_boxes, output_size, output_size, features]
        features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box,
                                     tf.cast(grid_x_weight, features.dtype))
    else:
        height_dim_offset = max_feature_width
        level_dim_offset = max_feature_height * height_dim_offset
        batch_dim_offset = num_levels * level_dim_offset

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_offset,
                [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        box_levels_offset = tf.tile(
            tf.reshape(box_levels * level_dim_offset,
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(y_indices * height_dim_offset,
                       [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])

        indices = tf.reshape(
            batch_size_offset + box_levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        features = tf.reshape(features, [-1, num_filters])
        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])
        features_per_box = feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)

    return features_per_box
Exemplo n.º 32
0
    def __init__(self, demb, **kwargs):
        super(TFPositionalEmbedding, self).__init__(**kwargs)

        self.inv_freq = 1 / (10000**(tf.range(0, demb, 2.0) / demb))
Exemplo n.º 33
0
print(tf.fill([2, 2], 0))  # 等价于 tf.zeros([2, 2])
print(tf.fill([3, 3], 9))

print("----------")

# tf.random.normal() 正态分布
print(tf.random.normal([3, 3], mean=1, stddev=1))  # mean 均值, stddev 标准差
print(tf.random.normal([3, 3]))  # 默认 mean = 0, stddev = 1

# 使用截断的正态分布防止梯度消失
print(tf.random.truncated_normal([3, 3]))  # 大于两个标准差的数据被舍去

print("----------")

# tf.random.unifrom() 均匀分布
print(tf.random.uniform([2, 2], minval=0, maxval=1))  # 从 [0, 1) 的均匀分布
print(tf.random.uniform([2, 2], minval=0, maxval=100, dtype=tf.int32))  # 从 [0, 100) 的均匀分布
print(tf.random.uniform([2, 2]))  # 默认为 [0, 1) 的均匀分布

print("----------")

# 小应用: 随机打散
idx = tf.range(10)
idx = tf.random.shuffle(idx)
print(idx)

a = tf.random.uniform([10, 2], maxval=10, dtype=tf.int32)
print(a)
print(tf.gather(a, idx))
Exemplo n.º 34
0
    def _estimate_poses_batched(
            self, images, boxes, intrinsic_matrix, distortion_coeffs, extrinsic_matrix,
            world_up_vector, default_fov_degrees, internal_batch_size, antialias_factor, num_aug,
            average_aug, skeleton, suppress_implausible_poses):
        # Special case when zero boxes are provided or found
        # (i.e., all images images without person detections)
        # This must be explicitly handled, else the shapes don't work out automatically
        # for the TensorArray in _predict_in_batches.
        if tf.size(boxes) == 0:
            return self._predict_empty(images, num_aug, average_aug)

        n_images = tf.shape(images)[0]
        # If one intrinsic matrix is given, repeat it for all images
        if tf.shape(intrinsic_matrix)[0] == 1:
            # If intrinsic_matrix is not given, fill it in based on field of view
            if tf.reduce_all(intrinsic_matrix == -1):
                intrinsic_matrix = intrinsic_matrix_from_field_of_view(
                    default_fov_degrees, tf.shape(images)[1:3])
            intrinsic_matrix = tf.repeat(intrinsic_matrix, n_images, axis=0)

        # If one distortion coeff/extrinsic matrix is given, repeat it for all images
        if tf.shape(distortion_coeffs)[0] == 1:
            distortion_coeffs = tf.repeat(distortion_coeffs, n_images, axis=0)
        if tf.shape(extrinsic_matrix)[0] == 1:
            extrinsic_matrix = tf.repeat(extrinsic_matrix, n_images, axis=0)

        # Now repeat these camera params for each box
        n_box_per_image = boxes.row_lengths()
        intrinsic_matrix = tf.repeat(intrinsic_matrix, n_box_per_image, axis=0)
        distortion_coeffs = tf.repeat(distortion_coeffs, n_box_per_image, axis=0)

        # Up-vector in camera-space
        camspace_up = tf.einsum('c,bCc->bC', world_up_vector, extrinsic_matrix[..., :3, :3])
        camspace_up = tf.repeat(camspace_up, n_box_per_image, axis=0)

        # Set up the test-time augmentation parameters
        aug_gammas = tf.cast(tf.linspace(0.6, 1.0, num_aug), tf.float32)
        aug_angle_range = np.float32(np.deg2rad(FLAGS.rot_aug))
        if FLAGS.rot_aug_linspace_noend:
            aug_angles = linspace_noend(-aug_angle_range, aug_angle_range, num_aug)
        else:
            aug_angles = tf.linspace(-aug_angle_range, aug_angle_range, num_aug)
        aug_scales = tf.concat([
            linspace_noend(0.8, 1.0, num_aug // 2),
            tf.linspace(1.0, 1.1, num_aug - num_aug // 2)], axis=0)
        aug_should_flip = (tf.range(num_aug) - num_aug // 2) % 2 != 0
        aug_flipmat = tf.constant([[-1, 0, 0], [0, 1, 0], [0, 0, 1]], np.float32)
        aug_maybe_flipmat = tf.where(
            aug_should_flip[:, np.newaxis, np.newaxis], aug_flipmat, tf.eye(3))
        aug_rotmat = rotation_mat_zaxis(-aug_angles)
        aug_rotflipmat = aug_maybe_flipmat @ aug_rotmat

        # crops_flat, poses3dcam_flat = self._predict_in_batches(
        poses3d_flat = self._predict_in_batches(
            images, intrinsic_matrix, distortion_coeffs, camspace_up, boxes, internal_batch_size,
            aug_should_flip, aug_rotflipmat, aug_gammas, aug_scales, antialias_factor)

        # Project the 3D poses to get the 2D poses
        poses2d_flat_normalized = to_homogeneous(
            distort_points(project(poses3d_flat), distortion_coeffs))
        poses2d_flat = tf.einsum('bank,bjk->banj', poses2d_flat_normalized,
                                 intrinsic_matrix[..., :2, :])
        poses2d_flat = tf.ensure_shape(poses2d_flat, [None, None, self.joint_info.n_joints, 2])

        # Arrange the results back into ragged tensors
        poses3d = tf.RaggedTensor.from_row_lengths(poses3d_flat, n_box_per_image)
        poses2d = tf.RaggedTensor.from_row_lengths(poses2d_flat, n_box_per_image)
        # crops = tf.RaggedTensor.from_row_lengths(crops_flat, n_box_per_image)

        if suppress_implausible_poses:
            # Filter the resulting poses for individual plausibility to reduce false positives
            selected_indices = self._filter_poses(boxes, poses3d, poses2d)
            boxes, poses3d, poses2d = [
                tf.gather(x, selected_indices, batch_dims=1)
                for x in [boxes, poses3d, poses2d]]
            # crops = tf.gather(crops, selected_indices, batch_dims=1)

        # Convert to world coordinates
        extrinsic_matrix = tf.repeat(tf.linalg.inv(extrinsic_matrix), poses3d.row_lengths(), axis=0)
        poses3d = tf.RaggedTensor.from_row_lengths(
            tf.einsum(
                'bank,bjk->banj', to_homogeneous(poses3d.flat_values),
                extrinsic_matrix[..., :3, :]),
            poses3d.row_lengths())

        if skeleton != '':
            poses3d = self._get_skeleton(poses3d, skeleton)
            poses2d = self._get_skeleton(poses2d, skeleton)

        if average_aug:
            poses3d = tf.reduce_mean(poses3d, axis=-3)
            poses2d = tf.reduce_mean(poses2d, axis=-3)

        result = dict(boxes=boxes, poses3d=poses3d, poses2d=poses2d)
        # result['crops'] = crops
        return result
Exemplo n.º 35
0
    def call(self,
             inputs,
             mems=None,
             head_mask=None,
             inputs_embeds=None,
             training=False):
        if isinstance(inputs, (tuple, list)):
            input_ids = inputs[0]
            mems = inputs[1] if len(inputs) > 1 else mems
            head_mask = inputs[2] if len(inputs) > 2 else head_mask
            inputs_embeds = inputs[3] if len(inputs) > 3 else inputs_embeds
            assert len(inputs) <= 4, "Too many inputs."
        elif isinstance(inputs, dict):
            input_ids = inputs.get('input_ids')
            mems = inputs.get('mems', mems)
            head_mask = inputs.get('head_mask', head_mask)
            inputs_embeds = inputs.get('inputs_embeds', inputs_embeds)
            assert len(inputs) <= 4, "Too many inputs."
        else:
            input_ids = inputs

        # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library
        # so we transpose here from shape [bsz, len] to shape [len, bsz]
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_ids = tf.transpose(input_ids, perm=(1, 0))
            qlen, bsz = shape_list(input_ids)
        elif inputs_embeds is not None:
            inputs_embeds = tf.transpose(inputs_embeds, perm=(1, 0, 2))
            qlen, bsz = shape_list(inputs_embeds)[:2]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        if mems is None:
            mems = self.init_mems(bsz)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # attention_probs has shape bsz x n_heads x N x N
        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
        # and head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
        if not head_mask is None:
            raise NotImplementedError
        else:
            head_mask = [None] * self.n_layer

        if inputs_embeds is not None:
            word_emb = inputs_embeds
        else:
            word_emb = self.word_emb(input_ids)

        mlen = shape_list(mems[0])[0] if mems is not None else 0
        klen = mlen + qlen

        attn_mask = tf.ones([qlen, qlen])
        mask_u = tf.linalg.band_part(attn_mask, 0, -1)
        mask_dia = tf.linalg.band_part(attn_mask, 0, 0)
        attn_mask_pad = tf.zeros([qlen, mlen])
        dec_attn_mask = tf.concat([attn_mask_pad, mask_u - mask_dia], 1)
        if self.same_length:
            mask_l = tf.linalg.band_part(attn_mask, -1, 0)
            dec_attn_mask = tf.concat([
                dec_attn_mask[:, :qlen] + mask_l - mask_dia,
                dec_attn_mask[:, qlen:]
            ], 1)
        # ::: PyTorch masking code for reference :::
        # if self.same_length:
        #     all_ones = word_emb.new_ones((qlen, klen), dtype=torch.uint8)
        #     mask_len = klen - self.mem_len
        #     if mask_len > 0:
        #         mask_shift_len = qlen - mask_len
        #     else:
        #         mask_shift_len = qlen
        #     dec_attn_mask = (torch.triu(all_ones, 1+mlen)
        #             + torch.tril(all_ones, -mask_shift_len))[:, :, None] # -1
        # else:
        #     dec_attn_mask = torch.triu(
        #         word_emb.new_ones((qlen, klen), dtype=torch.uint8), diagonal=1+mlen)[:,:,None]

        hids = []
        attentions = []
        if self.attn_type == 0:  # default
            pos_seq = tf.range(klen - 1, -1, -1.0)
            if self.clamp_len > 0:
                pos_seq = tf.minimum(pos_seq, self.clamp_len)
            pos_emb = self.pos_emb(pos_seq)

            core_out = self.drop(word_emb, training=training)
            pos_emb = self.drop(pos_emb, training=training)

            for i, layer in enumerate(self.layers):
                hids.append(core_out)
                mems_i = None if mems is None else mems[i]
                layer_outputs = layer(
                    [core_out, pos_emb, dec_attn_mask, mems_i, head_mask[i]],
                    training=training)
                core_out = layer_outputs[0]
                if self.output_attentions:
                    attentions.append(layer_outputs[1])
        else:  # learnable embeddings and absolute embeddings
            raise NotImplementedError  # Removed these to avoid maintaining dead code - They are not used in our pretrained checkpoint

        core_out = self.drop(core_out, training=training)

        new_mems = self._update_mems(hids, mems, mlen, qlen)

        # We transpose back here to shape [bsz, len, hidden_dim]
        outputs = [tf.transpose(core_out, perm=(1, 0, 2)), new_mems]
        if self.output_hidden_states:
            # Add last layer and transpose to library standard shape [bsz, len, hidden_dim]
            hids.append(core_out)
            hids = list(tf.transpose(t, perm=(1, 0, 2)) for t in hids)
            outputs.append(hids)
        if self.output_attentions:
            # Transpose to library standard shape [bsz, n_heads, query_seq_len, key_seq_len]
            attentions = list(
                tf.transpose(t, perm=(2, 3, 0, 1)) for t in attentions)
            outputs.append(attentions)
        return outputs  # last hidden state, new_mems, (all hidden states), (all attentions)
Exemplo n.º 36
0
def project_2Dlm_to_3D(landmark1,landmark2,depth1,depth2,visibility1,visibility2,matK1,matK2,FLAGS,min_thresh=0.1,with_gtvis=True,with_pose=True):

    B,H,W,D = landmark1.get_shape().as_list()#tf.shape(landmark1)

    visibility1.set_shape([B,D])
    visibility2.set_shape([B,D])
    #Soft arg-max operation
    #import pdb;pdb.set_trace()
    if with_pose:
        norm_to_regular = tf.concat([tf.ones([B,D,1])*H, tf.ones([B,D,1])*W],axis=2)
        lm1_coord = tf.reverse(tf.transpose(tf.reshape((tf.contrib.layers.spatial_softmax(landmark1,temperature=1.0/(FLAGS.img_height*FLAGS.img_width),trainable=False)+1)/2.0,[B,D,2])*norm_to_regular,[0,2,1]),[1])
        lm2_coord = tf.reverse(tf.transpose(tf.reshape((tf.contrib.layers.spatial_softmax(landmark2,temperature=1.0/(FLAGS.img_height*FLAGS.img_width),trainable=False)+1)/2.0,[B,D,2])*norm_to_regular,[0,2,1]),[1])

        gt_lm_coord = lm1_coord
        pred_lm_coord = lm2_coord
    else:
        pred_lm_coord = tf.reverse(argmax_2d(landmark2),[1])
        gt_lm_coord = tf.reverse(argmax_2d(landmark1),[1])

    #Extract depth value at landmark locations
    batch_index = tf.tile(tf.expand_dims(tf.range(B), 1), [1, D])
    index_gt = tf.concat([tf.expand_dims(batch_index,axis=2), tf.transpose(tf.reverse(tf.to_int32(gt_lm_coord),[1]),[0,2,1])], axis=2)
    index_pred = tf.concat([tf.expand_dims(batch_index,axis=2), tf.transpose(tf.reverse(tf.to_int32(pred_lm_coord),[1]),[0,2,1])], axis=2)
    gt_depth_val = tf.gather_nd(depth1,index_gt)
    pred_depth_val = tf.gather_nd(depth2,index_pred)

    #Get mutually visible points
    if with_gtvis:
        lm3d_weights = tf.clip_by_value(visibility1,0.0,1.0)
        lm3d_weights = lm3d_weights*tf.clip_by_value(visibility2,0.0,1.0)
    else:
        lm1_val = tf.gather_nd(landmark1,index_gt)
        lm2_val = tf.gather_nd(landmark2,index_pred)

        lm1_val_sup = tf.expand_dims(lm1_val[:,0,0],axis=1)
        lm2_val_sup = tf.expand_dims(lm2_val[:,0,0],axis=1)
        for ii in range(1,D):
            lm1_val_sup = tf.concat([lm1_val_sup,tf.expand_dims(lm1_val[:,ii,ii],axis=1)],axis=1)
            lm2_val_sup = tf.concat([lm2_val_sup,tf.expand_dims(lm2_val[:,ii,ii],axis=1)],axis=1)                

        pred_vis1 = tf.to_float(lm1_val_sup>(tf.maximum(tf.reduce_max(landmark1)/5.0,min_thresh)))
        pred_vis2 = tf.to_float(lm2_val_sup>(tf.maximum(tf.reduce_max(landmark2)/5.0,min_thresh)))
        lm3d_weights = pred_vis1
        lm3d_weights = lm3d_weights*pred_vis2

    #import pdb;pdb.set_trace()
    #mutual invis and depth zero
    mutualdepth = gt_depth_val*pred_depth_val

    usable_points = tf.logical_and(
                            tf.greater(mutualdepth[:,:,0],tf.ones([],tf.float32)*10.0),
                            tf.equal(lm3d_weights[:],tf.ones([],tf.float32)))

    zero_depth =  tf.where(usable_points)
    usable_points = tf.reduce_sum(tf.to_int32(usable_points))
                                
               
    #zero_index = tf.tile(tf.expand_dims(tf.range(B), 1), [1, tf.shape(zero_depth)[1]])
    #zero_depth = tf.concat([tf.expand_dims(zero_index, axis=2), tf.cast(zero_depth,tf.int32)], axis=2)
    gt_depth_val = tf.expand_dims(tf.gather_nd(gt_depth_val,zero_depth),axis=0)
    gt_lm_coord = tf.transpose(tf.expand_dims(tf.gather_nd(tf.transpose(gt_lm_coord,[0,2,1]),zero_depth),axis=0),[0,2,1])
    pred_depth_val = tf.expand_dims(tf.gather_nd(pred_depth_val,zero_depth),axis=0)
    pred_lm_coord = tf.transpose(tf.expand_dims(tf.gather_nd(tf.transpose(pred_lm_coord,[0,2,1]),zero_depth),axis=0),[0,2,1])


    #Project 2D to 3D
    ones = tf.ones([1, 1, tf.shape(zero_depth)[0]])
    pred_lm_coord = tf.concat([tf.cast(pred_lm_coord,tf.float32),ones],axis=1)
    gt_lm_coord = tf.concat([tf.cast(gt_lm_coord,tf.float32),ones],axis=1)
    gt_cam_coord = pixel2cam(gt_depth_val,gt_lm_coord,matK1)
    pred_cam_coord = pixel2cam(pred_depth_val,pred_lm_coord,matK2)

    return gt_cam_coord,pred_cam_coord,usable_points
Exemplo n.º 37
0
 def arange(limit):
     return tf.range(0, limit, dtype=tf.float32)
Exemplo n.º 38
0
def multilevel_crop_and_resize(features,
                               boxes,
                               output_size=7,
                               sample_offset=0.5):
    """Crop and resize on multilevel feature pyramid.

  Generate the (output_size, output_size) set of pixels for each input box
  by first locating the box into the correct feature level, and then cropping
  and resizing it using the correspoding feature map of that level.

  Args:
    features: A dictionary with key as pyramid level and value as features. The
      features are in shape of [batch_size, height_l, width_l, num_filters].
    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
      a box with [y1, x1, y2, x2] in un-normalized coordinates.
    output_size: A scalar to indicate the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.

  Returns:
    A 5-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size, num_filters].
  """

    with tf.name_scope('multilevel_crop_and_resize'):
        levels = list(features.keys())
        min_level = int(min(levels))
        max_level = int(max(levels))
        features_shape = tf.shape(features[str(min_level)])
        batch_size, max_feature_height, max_feature_width, num_filters = (
            features_shape[0], features_shape[1], features_shape[2],
            features_shape[3])

        num_boxes = tf.shape(boxes)[1]

        # Stack feature pyramid into a features_all of shape
        # [batch_size, levels, height, width, num_filters].
        features_all = []
        feature_heights = []
        feature_widths = []
        for level in range(min_level, max_level + 1):
            shape = features[str(level)].get_shape().as_list()
            feature_heights.append(shape[1])
            feature_widths.append(shape[2])
            # Concat tensor of [batch_size, height_l * width_l, num_filters] for each
            # levels.
            features_all.append(
                tf.reshape(features[str(level)],
                           [batch_size, -1, num_filters]))
        features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters])

        # Calculate height_l * width_l for each level.
        level_dim_sizes = [
            feature_widths[i] * feature_heights[i]
            for i in range(len(feature_widths))
        ]
        # level_dim_offsets is accumulated sum of level_dim_size.
        level_dim_offsets = [0]
        for i in range(len(feature_widths) - 1):
            level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
        batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
        level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
        height_dim_sizes = tf.constant(feature_widths, tf.int32)

        # Assigns boxes to the right level.
        box_width = boxes[:, :, 3] - boxes[:, :, 1]
        box_height = boxes[:, :, 2] - boxes[:, :, 0]
        areas_sqrt = tf.sqrt(
            tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))

        levels = tf.cast(tf.math.floordiv(
            tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)),
            tf.math.log(2.0)) + 4.0,
                         dtype=tf.int32)
        # Maps levels between [min_level, max_level].
        levels = tf.minimum(max_level, tf.maximum(levels, min_level))

        # Projects box location and sizes to corresponding feature levels.
        scale_to_level = tf.cast(tf.pow(tf.constant(2.0),
                                        tf.cast(levels, tf.float32)),
                                 dtype=boxes.dtype)
        boxes /= tf.expand_dims(scale_to_level, axis=2)
        box_width /= scale_to_level
        box_height /= scale_to_level
        boxes = tf.concat([
            boxes[:, :, 0:2],
            tf.expand_dims(box_height, -1),
            tf.expand_dims(box_width, -1)
        ],
                          axis=-1)

        # Maps levels to [0, max_level-min_level].
        levels -= min_level
        level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
        boundary = tf.cast(
            tf.concat([
                tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
                tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
            ],
                      axis=-1), boxes.dtype)

        # Compute grid positions.
        kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions(
            boxes, boundary, output_size, sample_offset)

        x_indices = tf.cast(tf.reshape(
            box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)
        y_indices = tf.cast(tf.reshape(
            box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        # Get level offset for each box. Each box belongs to one level.
        levels_offset = tf.tile(
            tf.reshape(tf.gather(level_dim_offsets, levels),
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(
                y_indices *
                tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
                [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])
        indices = tf.reshape(
            batch_size_offset + levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features_r2, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])

        # Bilinear interpolation.
        features_per_box = _feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)
        return features_per_box
Exemplo n.º 39
0
    def __init__(self, model_path="./64_weights/", batch_size=256):
        self._rnn_size = 64
        self._vocab_size = 26
        self._embed_dim = 10
        self._num_layers = 4
        self._wn = True
        self._shuffle_buffer = 10000
        self._model_path = model_path
        self._batch_size = batch_size
        self._batch_size_placeholder = tf.placeholder(tf.int32,
                                                      shape=[],
                                                      name="batch_size")
        self._minibatch_x_placeholder = tf.placeholder(tf.int32,
                                                       shape=[None, None],
                                                       name="minibatch_x")
        self._initial_state_placeholder = (
            tuple(
                tf.placeholder(tf.float32, shape=[None, self._rnn_size])
                for _ in range(self._num_layers)),
            tuple(
                tf.placeholder(tf.float32, shape=[None, self._rnn_size])
                for _ in range(self._num_layers)))
        self._minibatch_y_placeholder = tf.placeholder(tf.int32,
                                                       shape=[None, None],
                                                       name="minibatch_y")
        # Batch size dimensional placeholder which gives the
        # Lengths of the input sequence batch. Used to index into
        # The final_hidden output and select the stop codon -1
        # final hidden for the graph operation.
        self._seq_length_placeholder = tf.placeholder(tf.int32,
                                                      shape=[None],
                                                      name="seq_len")
        self._temp_placeholder = tf.placeholder(tf.float32,
                                                shape=[],
                                                name="temp")
        rnn = mLSTMCellStackNPY(num_units=self._rnn_size,
                                num_layers=self._num_layers,
                                model_path=model_path,
                                wn=self._wn)
        zero_state = rnn.zero_state(self._batch_size, tf.float32)
        single_zero = rnn.zero_state(1, tf.float32)
        mask = tf.sign(
            self._minibatch_y_placeholder)  # 1 for nonpad, zero for pad
        inverse_mask = 1 - mask  # 0 for nonpad, 1 for pad

        total_padded = tf.reduce_sum(inverse_mask)

        pad_adjusted_targets = (self._minibatch_y_placeholder -
                                1) + inverse_mask

        embed_matrix = tf.get_variable("embed_matrix",
                                       dtype=tf.float32,
                                       initializer=np.load(
                                           os.path.join(
                                               self._model_path,
                                               "embed_matrix:0.npy")))
        embed_cell = tf.nn.embedding_lookup(embed_matrix,
                                            self._minibatch_x_placeholder)
        self._output, self._final_state = tf.nn.dynamic_rnn(
            rnn,
            embed_cell,
            initial_state=self._initial_state_placeholder,
            swap_memory=True,
            parallel_iterations=1)

        # If we are training a model on top of the rep model, we need to access
        # the final_hidden rep from output. Recall we are padding these sequences
        # to max length, so the -1 position will not necessarily be the right rep.
        # to get the right rep, I will use the provided sequence length to index.
        # Subtract one for the last place
        indices = self._seq_length_placeholder - 1
        self._top_final_hidden = tf.gather_nd(
            self._output,
            tf.stack([
                tf.range(tf_get_shape(self._output)[0], dtype=tf.int32),
                indices
            ],
                     axis=1))
        # LEFTOFF self._output is a batch size, seq_len, num_hidden.
        # I want to average along num_hidden, but I'll have to figure out how to mask out
        # the dimensions along sequence_length which are longer than the given sequence.
        flat = tf.reshape(self._output, [-1, self._rnn_size])
        logits_flat = tf.contrib.layers.fully_connected(
            flat,
            self._vocab_size - 1,
            activation_fn=None,
            weights_initializer=tf.constant_initializer(
                np.load(
                    os.path.join(self._model_path,
                                 "fully_connected_weights:0.npy"))),
            biases_initializer=tf.constant_initializer(
                np.load(
                    os.path.join(self._model_path,
                                 "fully_connected_biases:0.npy"))))
        self._logits = tf.reshape(logits_flat, [
            batch_size,
            tf_get_shape(self._minibatch_x_placeholder)[1],
            self._vocab_size - 1
        ])
        batch_losses = tf.contrib.seq2seq.sequence_loss(
            self._logits,
            tf.cast(pad_adjusted_targets, tf.int32),
            tf.cast(mask, tf.float32),
            average_across_batch=False)
        self._loss = tf.reduce_mean(batch_losses)
        self._sample = sample_with_temp(self._logits, self._temp_placeholder)
        with tf.Session() as sess:
            self._zero_state = sess.run(zero_state)
            self._single_zero = sess.run(single_zero)
Exemplo n.º 40
0
def merge_boxes_with_multiple_labels(boxes,
                                     classes,
                                     confidences,
                                     num_classes,
                                     quantization_bins=10000):
  """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices.
      The class index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  boxes_shape = tf.shape(boxes)
  classes_shape = tf.shape(classes)
  confidences_shape = tf.shape(confidences)
  box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
      boxes_shape, classes_shape)
  box_confidence_shape_assert = (
      shape_utils.assert_shape_equal_along_first_dimension(
          boxes_shape, confidences_shape))
  box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
  box_normalized_assert = shape_utils.assert_box_normalized(boxes)

  with tf.control_dependencies(
      [box_class_shape_assert, box_confidence_shape_assert,
       box_dimension_assert, box_normalized_assert]):
    quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
    ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
    hashcodes = (
        ymin +
        xmin * quantization_bins +
        ymax * quantization_bins * quantization_bins +
        xmax * quantization_bins * quantization_bins * quantization_bins)
    unique_hashcodes, unique_indices = tf.unique(hashcodes)
    num_boxes = tf.shape(boxes)[0]
    num_unique_boxes = tf.shape(unique_hashcodes)[0]
    merged_box_indices = tf.unsorted_segment_min(
        tf.range(num_boxes), unique_indices, num_unique_boxes)
    merged_boxes = tf.gather(boxes, merged_box_indices)

    def map_box_encodings(i):
      """Produces box K-hot and score encodings for each class index."""
      box_mask = tf.equal(
          unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
      box_mask = tf.reshape(box_mask, [-1])
      box_indices = tf.boolean_mask(classes, box_mask)
      box_confidences = tf.boolean_mask(confidences, box_mask)
      box_class_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], 1, validate_indices=False)
      box_confidence_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], box_confidences, validate_indices=False)
      return box_class_encodings, box_confidence_encodings

    class_encodings, confidence_encodings = tf.map_fn(
        map_box_encodings,
        tf.range(num_unique_boxes),
        back_prop=False,
        dtype=(tf.int32, tf.float32))

    merged_boxes = tf.reshape(merged_boxes, [-1, 4])
    class_encodings = tf.reshape(class_encodings, [-1, num_classes])
    confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
    merged_box_indices = tf.reshape(merged_box_indices, [-1])
    return (merged_boxes, class_encodings, confidence_encodings,
            merged_box_indices)
Exemplo n.º 41
0
    def decode(self, x, conv_inputs1, features):
        batch_size = tf.shape(x)[0]

        # initialize state tensor arrays
        state_queues = []
        for i, (conv_input, dilation) in enumerate(zip(conv_inputs1, self.dilations)):
            batch_idx = tf.range(batch_size)
            batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation))
            batch_idx = tf.reshape(batch_idx, [-1])

            queue_begin_time = self.encode_len - dilation - 1
            temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0)
            temporal_idx = tf.reshape(temporal_idx, [-1])

            idx = tf.stack([batch_idx, temporal_idx], axis=1)
            slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2)))

            layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.decode_series_len)
            layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2)))
            state_queues.append(layer_ta)

        # initialize feature tensor array
        features_ta = tf.TensorArray(dtype=tf.float32, size=self.decode_series_len)
        features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2)))

        # initialize output tensor array
        emit_ta = tf.TensorArray(size=self.decode_series_len, dtype=tf.float32)

        # initialize other loop vars
        elements_finished = 0 >= self.decode_len
        time = tf.constant(0, dtype=tf.int32)

        # get initial x input
        current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1)
        initial_input = tf.gather_nd(x, current_idx)

        def loop_fn(time1, current_input, queues):

            current_features = features_ta.read(time1)
            current_input = tf.concat([current_input, current_features], axis=1)

            with tf.variable_scope('x-proj-decode', reuse=True):
                w_x_proj = tf.get_variable('weights')
                b_x_proj = tf.get_variable('biases')
                x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj)

            skip_outputs, updated_queues = [], []
            for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs1, queues, self.dilations)):
                state = queue.read(time1)
                with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True):
                    w_conv = tf.get_variable('weights'.format(i))
                    b_conv = tf.get_variable('biases'.format(i))
                    dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv
                conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1)
                dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

                with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True):
                    w_proj = tf.get_variable('weights'.format(i))
                    b_proj = tf.get_variable('biases'.format(i))
                    concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj
                skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1)

                x_proj += residuals
                skip_outputs.append(skips)
                updated_queues.append(queue.write(time1 + dilation, x_proj))

            skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1))
            with tf.variable_scope('dense-decode-1', reuse=True):
                w_h = tf.get_variable('weights')
                b_h = tf.get_variable('biases')
                h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h)

            with tf.variable_scope('dense-decode-2', reuse=True):
                w_y = tf.get_variable('weights')
                b_y = tf.get_variable('biases')
                y_hat2 = tf.matmul(h, w_y) + b_y

            elements_finished2 = (time1 >= self.decode_len)
            finished = tf.reduce_all(elements_finished2)

            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, 1], dtype=tf.float32),
                lambda: y_hat2
            )
            next_elements_finished = (time1 >= self.decode_len -1)

            return next_elements_finished, next_input, updated_queues

        def condition(unused_time, elements_finished1, *_):
            return tf.logical_not(tf.reduce_all(elements_finished1))

        def body(time1, elements_finished1, emit_ta1, *state_queues1):
            (next_finished, emit_output, state_queues2) = loop_fn(time1, initial_input, state_queues1)

            emit = tf.where(elements_finished1, tf.zeros_like(emit_output), emit_output)
            emit_ta2 = emit_ta1.write(time1, emit)

            #elements_finished2 = tf.logical_or(elements_finished1, next_finished)

            return [time1 + 1, next_finished, emit_ta2] + list(state_queues2)

        returned = tf.while_loop(
            cond=condition,
            body=body,
            loop_vars=[time, elements_finished, emit_ta] + state_queues
        )

        outputs_ta = returned[2]
        y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2))

        return y_hat
Exemplo n.º 42
0
def _non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold,
                                level):
    """A wrapper that handles non-maximum suppression.

  Assumption:
    * The boxes are sorted by scores unless the box is a dot (all coordinates
      are zero).
    * Boxes with higher scores can be used to suppress boxes with lower scores.

  The overal design of the algorithm is to handle boxes tile-by-tile:

  boxes = boxes.pad_to_multiply_of(tile_size)
  num_tiles = len(boxes) // tile_size
  output_boxes = []
  for i in range(num_tiles):
    box_tile = boxes[i*tile_size : (i+1)*tile_size]
    for j in range(i - 1):
      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
      iou = _bbox_overlap(box_tile, suppressing_tile)
      # if the box is suppressed in iou, clear it to a dot
      box_tile *= _update_boxes(iou)
    # Iteratively handle the diagnal tile.
    iou = _box_overlap(box_tile, box_tile)
    iou_changed = True
    while iou_changed:
      # boxes that are not suppressed by anything else
      suppressing_boxes = _get_suppressing_boxes(iou)
      # boxes that are suppressed by suppressing_boxes
      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
      # clear iou to 0 for boxes that are suppressed, as they cannot be used
      # to suppress other boxes any more
      new_iou = _clear_iou(iou, suppressed_boxes)
      iou_changed = (new_iou != iou)
      iou = new_iou
    # remaining boxes that can still suppress others, are selected boxes.
    output_boxes.append(_get_suppressing_boxes(iou))
    if len(output_boxes) >= max_output_size:
      break

  Args:
    scores: a tensor with a shape of [batch_size, anchors].
    boxes: a tensor with a shape of [batch_size, anchors, 4].
    max_output_size: a scalar integer `Tensor` representing the maximum number
      of boxes to be selected by non max suppression.
    iou_threshold: a float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    level: a integer for the level that the function operates on.
  Returns:
    nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
      dtype as input scores.
    nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
      same dtype as input boxes.
  """
    with tf.name_scope('nms_l%d' % level):
        batch_size = tf.shape(boxes)[0]
        num_boxes = tf.shape(boxes)[1]
        pad = tf.cast(tf.ceil(tf.cast(num_boxes, tf.float32) / _NMS_TILE_SIZE),
                      tf.int32) * _NMS_TILE_SIZE - num_boxes
        boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]])
        scores = tf.pad(tf.cast(scores, tf.float32), [[0, 0], [0, pad]])
        num_boxes += pad

        def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
            return tf.logical_and(
                tf.reduce_min(output_size) < max_output_size,
                idx < num_boxes // _NMS_TILE_SIZE)

        selected_boxes, _, output_size, _ = tf.while_loop(
            _loop_cond, _suppression_loop_body, [
                boxes, iou_threshold,
                tf.zeros([batch_size], tf.int32),
                tf.constant(0)
            ])
        idx = num_boxes - tf.cast(
            tf.nn.top_k(
                tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
                tf.expand_dims(tf.range(num_boxes, 0, -1), 0),
                max_output_size)[0], tf.int32)
        idx = tf.minimum(idx, num_boxes - 1)
        idx = tf.reshape(
            idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
        boxes = tf.reshape(tf.gather(tf.reshape(boxes, [-1, 4]), idx),
                           [batch_size, max_output_size, 4])
        boxes = boxes * tf.cast(
            tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
                output_size, [-1, 1, 1]), boxes.dtype)
        scores = tf.reshape(tf.gather(tf.reshape(scores, [-1, 1]), idx),
                            [batch_size, max_output_size])
        scores = scores * tf.cast(
            tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
                output_size, [-1, 1]), scores.dtype)
        return scores, boxes
def crop_mask_in_target_box(masks,
                            boxes,
                            target_boxes,
                            output_size,
                            sample_offset=0,
                            use_einsum=True):
    """Crop masks in target boxes.

  Args:
    masks: A tensor with a shape of [batch_size, num_masks, height, width].
    boxes: a float tensor representing box cooridnates that tightly enclose
      masks with a shape of [batch_size, num_masks, 4] in un-normalized
      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
    target_boxes: a float tensor representing target box cooridnates for masks
      with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A
      box is represented by [ymin, xmin, ymax, xmax].
    output_size: A scalar to indicate the output crop size. It currently only
      supports to output a square shape outputs.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum: Use einsum to replace gather in selective_crop_and_resize.

  Returns:
    A 4-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size].
  """
    with tf.name_scope('crop_mask_in_target_box'):
        batch_size, num_masks, height, width = masks.get_shape().as_list()
        masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1])
        # Pad zeros on the boundary of masks.
        masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4,
                                             width + 4)
        masks = tf.reshape(masks,
                           [batch_size, num_masks, height + 4, width + 4, 1])

        # Projects target box locations and sizes to corresponding cropped
        # mask coordinates.
        gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=target_boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min +
                                                        _EPSILON) + 2
        x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min +
                                                        _EPSILON) + 2
        h_transform = (bb_y_max - bb_y_min) * width / (gt_y_max - gt_y_min +
                                                       _EPSILON)
        w_transform = (bb_x_max - bb_x_min) * width / (gt_x_max - gt_x_min +
                                                       _EPSILON)

        boundaries = tf.concat([
            tf.cast(tf.ones_like(y_transform) * ((height + 4) - 1),
                    dtype=tf.float32),
            tf.cast(tf.ones_like(x_transform) * ((width + 4) - 1),
                    dtype=tf.float32)
        ],
                               axis=-1)

        # Reshape tensors to have the right shape for selective_crop_and_resize.
        trasnformed_boxes = tf.concat(
            [y_transform, x_transform, h_transform, w_transform], -1)
        levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
                         [batch_size, 1])

        cropped_masks = selective_crop_and_resize(masks,
                                                  trasnformed_boxes,
                                                  levels,
                                                  boundaries,
                                                  output_size,
                                                  sample_offset=sample_offset,
                                                  use_einsum_gather=use_einsum)
        cropped_masks = tf.squeeze(cropped_masks, axis=-1)

    return cropped_masks
Exemplo n.º 44
0
    def _build(
            self,  # pylint: disable=arguments-differ
            memory=None,
            memory_sequence_length=None,
            memory_attention_bias=None,
            inputs=None,
            sequence_length=None,
            decoding_strategy='train_greedy',
            beam_width=None,
            length_penalty=0.,
            start_tokens=None,
            end_token=None,
            context=None,
            context_sequence_length=None,
            softmax_temperature=None,
            max_decoding_length=None,
            impute_finished=False,
            helper=None,
            mode=None):
        """Performs decoding.

        The interface is very similar to that of RNN decoders
        (:meth:`texar.modules.RNNDecoderBase._build`). In particular,
        the function provides **3 ways** to specify the decoding method, with
        varying flexibility:

        1. The :attr:`decoding_strategy` argument.

            - **"train_greedy"**: decoding in teacher-forcing fashion (i.e.,
              feeding ground truth to decode the next step), and for each step
              sample is obtained by taking the `argmax` of logits.
              Argument :attr:`inputs` is required for this strategy.
              :attr:`sequence_length` is optional.
            - **"infer_greedy"**: decoding in inference fashion (i.e., feeding
              `generated` sample to decode the next step), and for each step
              sample is obtained by taking the `argmax` of logits.
              Arguments :attr:`(start_tokens, end_token)` are
              required for this strategy, and argument
              :attr:`max_decoding_length` is optional.
            - **"infer_sample"**: decoding in inference fashion, and for each
              step sample is obtained by `random sampling` from the logits.
              Arguments :attr:`(start_tokens, end_token)` are required for this
              strategy, and argument :attr:`max_decoding_length` is optional.

          This argument is used only when arguments :attr:`helper` and
          :attr:`beam_width` are both `None`.

        2. The :attr:`helper` argument: An instance of subclass of
           :tf_main:`tf.contrib.seq2seq.Helper <contrib/seq2seq/Helper>`.
           This provides a superset of decoding strategies than above.
           The interface is the same as in RNN decoders.
           Please refer to :meth:`texar.modules.RNNDecoderBase._build` for
           detailed usage and examples.

           Note that, here, though using a :tf_main:`TrainingHelper
           <contrib/seq2seq/TrainingHelper>` corresponding to the
           "train_greedy" strategy above, the implementation is *slower* than
           directly setting `decoding_strategy="train_greedy"` (though the
           output results are the same).

           Argument :attr:`max_decoding_length` is optional.

        3. **Beam search**: set :attr:`beam_width` to use beam search decoding.
           Arguments :attr:`(start_tokens, end_token)` are required,
           and argument :attr:`max_decoding_length` is optional.

        Args:
            memory (optional): The memory to attend, e.g., the output of an RNN encoder.
                A Tensor of shape `[batch_size, memory_max_time, dim]`.
            memory_sequence_length (optional): A Tensor of shape `[batch_size]`
                containing the sequence lengths for the batch entries in
                memory. Used to create attention bias of
                :attr:`memory_attention_bias` is not given. Ignored if
                `memory_attention_bias` is provided.
            memory_attention_bias (optional): A Tensor of shape
                `[batch_size, num_heads, memory_max_time, dim]`.
                An attention bias typically sets the value of a padding
                position to a large negative value for masking. If not given,
                :attr:`memory_sequence_length` is used to automatically
                create an attention bias.
            inputs (optional): Input tensor for teacher forcing decoding, of
                shape `[batch_size, target_max_time, emb_dim]` containing the
                target sequence word embeddings.
                Used when :attr:`decoding_strategy` is set to "train_greedy".
            sequence_length (optional): A Tensor of shape `[batch_size]`,
                containing the sequence length of :attr:`inputs`.
                Tokens beyond the respective sequence length are masked out.
                Used when :attr:`decoding_strategy` is set to
                "train_greedy".
            decoding_strategy (str): A string specifying the decoding
                strategy, including "train_greedy", "infer_greedy",
                "infer_sample".
                Different arguments are required based on the
                strategy. See above for details. Ignored if
                :attr:`beam_width` or :attr:`helper` is set.
            beam_width (int): Set to use beam search. If given,
                :attr:`decoding_strategy` is ignored.
            length_penalty (float): Length penalty coefficient used in beam search
                decoding. Refer to https://arxiv.org/abs/1609.08144
                for more details.
                It Should be larger if longer sentences are wanted.
            start_tokens (optional): An int Tensor of shape `[batch_size]`,
                containing the start tokens.
                Used when :attr:`decoding_strategy` = "infer_greedy" or
                "infer_sample", or :attr:`beam_width` is set.
                Ignored when context is set.
            end_token (optional): An int 0D Tensor, the token that marks end
                of decoding.
                Used when :attr:`decoding_strategy` = "infer_greedy" or
                "infer_sample", or :attr:`beam_width` is set.
            context (optional): An int Tensor of shape `[batch_size, length]`,
                containing the starting tokens for decoding.
                If context is set, the start_tokens will be ignored.
            context_sequence_length (optional): specify the length of context.
            softmax_temperature (optional): A float 0D Tensor, value to divide
                the logits by before computing the softmax. Larger values
                (above 1.0) result in more random samples. Must > 0. If `None`,
                1.0 is used.
                Used when :attr:`decoding_strategy` = "infer_sample"`.
            max_decoding_length (optional): An int scalar Tensor indicating
                the maximum allowed number of decoding steps.
                If `None` (default), use "max_decoding_length" defined in
                :attr:`hparams`. Ignored in "train_greedy" decoding.
            impute_finished (bool): If `True`, then states for batch
                entries which are marked as finished get copied through and
                the corresponding outputs get zeroed out.  This causes some
                slowdown at each time step, but ensures that the final state
                and outputs have the correct values and that backprop ignores
                time steps that were marked as finished. Ignored in
                "train_greedy" decoding.
            helper (optional): An instance of
                :tf_main:`Helper <contrib/seq2seq/Helper>` that defines the
                decoding strategy. If given, :attr:`decoding_strategy` is
                ignored.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. Controls dropout mode.
                If `None` (default), :func:`texar.global_mode`
                is used.

        Returns:

            - For **"train_greedy"** decoding, returns an instance of \
            :class:`~texar.modules.TransformerDecoderOutput` which contains\
            `sample_id` and `logits`.

            - For **"infer_greedy"** and **"infer_sample"** decoding or\
            decoding with :attr:`helper`, returns\
            a tuple `(outputs, sequence_lengths)`, where `outputs` is an \
            instance of :class:`~texar.modules.TransformerDecoderOutput` as\
            in "train_greedy", and `sequence_lengths` is a Tensor of shape\
            `[batch_size]` containing the length of each sample.

            - For **beam search** decoding, returns a `dict` containing keys\
            "sample_id" and "log_prob".

                - **"sample_id"** is an int Tensor of shape \
                `[batch_size, max_time, beam_width]` containing generated\
                token indexes. `sample_id[:,:,0]` is the highest-probable \
                sample.
                - **"log_prob"** is a float Tensor of shape \
                `[batch_size, beam_width]` containing the log probability \
                of each sequence sample.
        """

        if memory is not None:
            if memory_attention_bias is None:
                if memory_sequence_length is None:
                    raise ValueError("`memory_sequence_length` is required if "
                                     "`memory_attention_bias` is not given.")

                enc_padding = 1 - tf.sequence_mask(memory_sequence_length,
                                                   tf.shape(memory)[1],
                                                   dtype=tf.float32)
                memory_attention_bias = attn.attention_bias_ignore_padding(
                    enc_padding)

        # record the context, which will be used in step function
        # for dynamic_decode
        if context is not None:
            start_tokens = context[:, 0]
            self.context = context[:, 1:]
            self.context_sequence_length = context_sequence_length - 1
        else:
            self.context = None

        if helper is None and beam_width is None and \
                decoding_strategy == 'train_greedy': # Teacher-forcing
            if sequence_length is not None:
                inputs = mask_sequences(inputs, sequence_length, tensor_rank=3)

            decoder_self_attention_bias = (attn.attention_bias_lower_triangle(
                shape_list(inputs)[1]))
            if self._hparams.scale_embeds:
                target_inputs = inputs * self._hparams.dim**0.5

            _, lengths, _ = shape_list(target_inputs)
            positions = tf.expand_dims(tf.range(lengths, dtype=tf.int32), 0)
            pos_embeds = self.position_embedder(positions)

            inputs = target_inputs + pos_embeds

            decoder_output = self._self_attention_stack(
                inputs,
                memory,
                decoder_self_attention_bias=decoder_self_attention_bias,
                memory_attention_bias=memory_attention_bias,
                cache=None,
                mode=mode)
            logits = self.output_layer(decoder_output)
            preds = tf.to_int32(tf.argmax(logits, axis=-1))
            rets = TransformerDecoderOutput(logits=logits, sample_id=preds)

        else:
            if max_decoding_length is None:
                max_decoding_length = self._hparams.max_decoding_length

            self._inputs_to_outputs = self._inputs_to_outputs_fn(
                max_decoding_length + 1)

            if beam_width is None:  #Inference-like decoding
                # Prepare helper
                if helper is not None:
                    # ignore `decoding_strategy`
                    pass
                else:
                    if decoding_strategy == "infer_greedy":
                        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                            self._embedding, start_tokens, end_token)
                    elif decoding_strategy == "infer_sample":
                        helper = tf.contrib.seq2seq.SampleEmbeddingHelper(
                            self._embedding, start_tokens, end_token,
                            softmax_temperature)
                    else:
                        raise ValueError(
                            "Unknown decoding strategy: {}".format(
                                decoding_strategy))
                self._helper = helper

                self._cache = self._init_cache(memory,
                                               memory_attention_bias,
                                               beam_search_decoding=False)
                if context is not None:
                    self.context = tf.pad(
                        self.context,
                        [[0, 0],
                         [0, max_decoding_length - tf.shape(self.context)[1]]])

                outputs, cache, sequence_lengths = dynamic_decode(
                    decoder=self,
                    impute_finished=impute_finished,
                    maximum_iterations=max_decoding_length,
                    output_time_major=False,
                    scope=self.variable_scope)

                if context is not None:
                    # Here the length of sample_id will be larger than that
                    # of logit by 1, because there will be a additional
                    # start_token in the returned sample_id.
                    # the start_id should be the first token of the
                    # given context
                    outputs = TransformerDecoderOutput(
                        logits=outputs.logits,
                        sample_id=tf.concat([
                            tf.expand_dims(start_tokens, 1), outputs.sample_id
                        ],
                                            axis=1))
                    sequence_lengths = sequence_lengths + 1
                rets = outputs, sequence_lengths

            else:  #Beam-search decoding
                # ignore `decoding_strategy`
                # assume `helper` is not set
                if helper is not None:
                    raise ValueError("Must not set 'beam_width' and 'helper' "
                                     "simultaneously.")
                _batch_size = tf.shape(start_tokens)[0]
                self._cache = self._init_cache(memory,
                                               memory_attention_bias,
                                               beam_search_decoding=True,
                                               batch_size=_batch_size)

                # The output format is different when running beam search
                sample_id, log_prob = self._beam_decode(
                    start_tokens,
                    end_token,
                    beam_width=beam_width,
                    length_penalty=length_penalty,
                    decode_length=max_decoding_length,
                )
                rets = {'sample_id': sample_id, 'log_prob': log_prob}

        if not self._built:
            self._add_internal_trainable_variables()
            self._built = True

        return rets
Exemplo n.º 45
0
def positions_for(tokens, past_length):
    batch_size = tf.shape(input=tokens)[0]
    nsteps = tf.shape(input=tokens)[1]
    return expand_tile(past_length + tf.range(nsteps), batch_size)
Exemplo n.º 46
0
    def call(self, inputs):
        # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords
        boxes = inputs[0]

        # Feature Maps. List of feature maps from different level of the
        # feature pyramid. Each is [batch, height, width, channels]
        feature_maps = inputs[1:]

        # Assign each ROI to a level in the pyramid based on the ROI area.
        y1, x1, y2, x2 = tf.split(boxes, 4, axis=2)
        h = y2 - y1
        w = x2 - x1
        # Equation 1 in the Feature Pyramid Networks paper. Account for
        # the fact that our coordinates are normalized here.
        # e.g. a 224x224 ROI (in pixels) maps to P4
        image_area = tf.cast(self.image_shape[0] * self.image_shape[1],
                             tf.float32)
        roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area)))
        roi_level = tf.minimum(
            5, tf.maximum(2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
        roi_level = tf.squeeze(roi_level, 2)

        # Loop through levels and apply ROI pooling to each. P2 to P5.
        pooled = []
        box_to_level = []
        for i, level in enumerate(range(2, 6)):
            ix = tf.where(tf.equal(roi_level, level))
            level_boxes = tf.gather_nd(boxes, ix)

            # Box indicies for crop_and_resize.
            box_indices = tf.cast(ix[:, 0], tf.int32)

            # Keep track of which box is mapped to which level
            box_to_level.append(ix)

            # Stop gradient propogation to ROI proposals
            level_boxes = tf.stop_gradient(level_boxes)
            box_indices = tf.stop_gradient(box_indices)

            # Crop and Resize
            # From Mask R-CNN paper: "We sample four regular locations, so
            # that we can evaluate either max or average pooling. In fact,
            # interpolating only a single value at each bin center (without
            # pooling) is nearly as effective."
            #
            # Here we use the simplified approach of a single value per bin,
            # which is how it's done in tf.crop_and_resize()
            # Result: [batch * num_boxes, pool_height, pool_width, channels]
            pooled.append(
                tf.image.crop_and_resize(feature_maps[i],
                                         level_boxes,
                                         box_indices,
                                         self.pool_shape,
                                         method="bilinear"))

        # Pack pooled features into one tensor
        pooled = tf.concat(pooled, axis=0)

        # Pack box_to_level mapping into one array and add another
        # column representing the order of pooled boxes
        box_to_level = tf.concat(box_to_level, axis=0)
        box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1)
        box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range],
                                 axis=1)

        # Rearrange pooled features to match the order of the original boxes
        # Sort box_to_level by batch then box index
        # TF doesn't have a way to sort by two columns, so merge them and sort.
        sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1]
        ix = tf.nn.top_k(sorting_tensor,
                         k=tf.shape(box_to_level)[0]).indices[::-1]
        ix = tf.gather(box_to_level[:, 2], ix)
        pooled = tf.gather(pooled, ix)

        # Re-add the batch dimension
        pooled = tf.expand_dims(pooled, 0)
        return pooled
Exemplo n.º 47
0
    def _build_net(self):
        # ------------------ all inputs ------------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features],
                                name='s')  # input State
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features],
                                 name='s_')  # input Next State
        self.r = tf.placeholder(tf.float32, [
            None,
        ], name='r')  # input Reward
        self.a = tf.placeholder(tf.int32, [
            None,
        ], name='a')  # input Action

        w_initializer, b_initializer = tf.random_normal_initializer(
            0., 0.3), tf.constant_initializer(0.1)

        # ------------------ build evaluate_net ------------------
        with tf.variable_scope('eval_net'):
            e1 = tf.layers.dense(self.s,
                                 20,
                                 tf.nn.relu,
                                 kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer,
                                 name='e1')
            self.q_eval = tf.layers.dense(e1,
                                          self.n_actions,
                                          kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer,
                                          name='q')

        # ------------------ build target_net ------------------
        with tf.variable_scope('target_net'):
            t1 = tf.layers.dense(self.s_,
                                 20,
                                 tf.nn.relu,
                                 kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer,
                                 name='t1')
            self.q_next = tf.layers.dense(t1,
                                          self.n_actions,
                                          kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer,
                                          name='t2')

        with tf.variable_scope('q_target'):
            q_target = self.r + self.gamma * tf.reduce_max(
                self.q_next, axis=1, name='Qmax_s_')  # shape=(None, )
            self.q_target = tf.stop_gradient(q_target)
            # # tf.stop_gradient:
        with tf.variable_scope('q_eval'):
            a_indices = tf.stack(
                [tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a],
                axis=1)
            # axis=1 --> a_indices: [[0, a0], [1, a1], [2, a2], ...]
            self.q_eval_wrt_a = tf.gather_nd(
                params=self.q_eval, indices=a_indices)  # shape=(None, )
            # # tf.stack
            # # tf.gather_nd
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target,
                                      self.q_eval_wrt_a,
                                      name='TD_error'))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(
                self.loss)
def single_level_feature_crop(features, level_boxes, detection_prior_levels,
                              min_mask_level, mask_crop_size):
    """Crop the FPN features at the appropriate levels for each detection.


  Args:
    features: a float tensor of shape [batch_size, num_levels, max_feature_size,
      max_feature_size, num_downsample_channels].
    level_boxes: a float Tensor of the level boxes to crop from. [batch_size,
      num_instances, 4].
    detection_prior_levels: an int Tensor of instance assigned level of shape
      [batch_size, num_instances].
    min_mask_level: minimum FPN level to crop mask feature from.
    mask_crop_size: an int of mask crop size.

  Returns:
    crop_features: a float Tensor of shape [batch_size * num_instances,
        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
        instance feature crop.
  """
    (batch_size, num_levels, max_feature_size, _,
     num_downsample_channels) = features.get_shape().as_list()
    _, num_of_instances, _ = level_boxes.get_shape().as_list()
    level_boxes = tf.cast(level_boxes, tf.int32)
    assert num_of_instances == detection_prior_levels.get_shape().as_list()[1]

    x_start_indices = level_boxes[:, :, 1]
    y_start_indices = level_boxes[:, :, 0]
    # generate the full indices (not just the starting index)
    x_idx_list = []
    y_idx_list = []
    for i in range(mask_crop_size):
        x_idx_list.append(x_start_indices + i)
        y_idx_list.append(y_start_indices + i)

    x_indices = tf.stack(x_idx_list, axis=2)
    y_indices = tf.stack(y_idx_list, axis=2)
    levels = detection_prior_levels - min_mask_level
    height_dim_size = max_feature_size
    level_dim_size = max_feature_size * height_dim_size
    batch_dim_size = num_levels * level_dim_size
    # TODO(weicheng) change this to gather_nd for better readability.
    indices = tf.reshape(
        tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
            [1, num_of_instances, mask_crop_size, mask_crop_size]) + tf.tile(
                tf.reshape(levels * level_dim_size,
                           [batch_size, num_of_instances, 1, 1]),
                [1, 1, mask_crop_size, mask_crop_size]) +
        tf.tile(
            tf.reshape(y_indices * height_dim_size,
                       [batch_size, num_of_instances, mask_crop_size, 1]),
            [1, 1, 1, mask_crop_size]) + tf.tile(
                tf.reshape(x_indices,
                           [batch_size, num_of_instances, 1, mask_crop_size]),
                [1, 1, mask_crop_size, 1]), [-1])

    features_r2 = tf.reshape(features, [-1, num_downsample_channels])
    crop_features = tf.reshape(tf.gather(features_r2, indices), [
        batch_size * num_of_instances, mask_crop_size, mask_crop_size,
        num_downsample_channels
    ])

    return crop_features
Exemplo n.º 49
0
def transformer_decode(src,
                       dst,
                       hidden_units,
                       vocab_size,
                       max_len,
                       num_blocks=6,
                       num_heads=8,
                       dropout_rate=0,
                       is_training=True,
                       trainable=True,
                       scope="transformer_decode"):
    with tf.variable_scope(scope):
        decoder_inputs = tf.concat((tf.ones_like(dst[:, :1]), dst[:, :-1]),
                                   -1)  # 1: <GO>
        # embedding
        dec = embedding(decoder_inputs,
                        vocab_size=vocab_size,
                        num_units=hidden_units,
                        trainable=trainable,
                        scope="dec_embed")
        # positional embedding
        dec += embedding(tf.tile(
            tf.expand_dims(tf.range(tf.shape(decoder_inputs)[1]), 0),
            [tf.shape(decoder_inputs)[0], 1]),
                         vocab_size=max_len,
                         num_units=hidden_units,
                         zero_pad=False,
                         trainable=trainable,
                         scope="dec_pe")
        # dropout
        dec = tf.layers.dropout(dec,
                                rate=dropout_rate,
                                training=tf.convert_to_tensor(is_training))
        # blocks
        for i in range(num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i)):
                # self attention
                dec = multihead_attention(queries=dec,
                                          keys=dec,
                                          num_units=hidden_units,
                                          num_heads=num_heads,
                                          dropout_rate=dropout_rate,
                                          is_training=is_training,
                                          causality=True,
                                          trainable=trainable,
                                          scope="self_attention")
                # vanilla attention
                dec = multihead_attention(queries=dec,
                                          keys=src,
                                          num_units=hidden_units,
                                          num_heads=num_heads,
                                          dropout_rate=dropout_rate,
                                          is_training=is_training,
                                          causality=False,
                                          trainable=trainable,
                                          scope="vanilla_attention")
                dec = feedforward(dec,
                                  num_units=[4 * hidden_units, hidden_units],
                                  dropout_rate=dropout_rate,
                                  trainable=trainable)
    return dec
Exemplo n.º 50
0
    def compute_loss(self, y_true, y_pred):
        batch_size = tf.shape(y_true)[0]
        num_boxes = tf.to_float(tf.shape(y_true)[1])

        # 计算所有的loss
        # 分类的loss
        # batch_size,8732,21 -> batch_size,8732
        conf_loss = self._softmax_loss(y_true[:, :, 4:-8],
                                       y_pred[:, :, 4:-8])
        # 框的位置的loss
        # batch_size,8732,4 -> batch_size,8732
        loc_loss = self._l1_smooth_loss(y_true[:, :, :4],
                                        y_pred[:, :, :4])

        # 获取所有的正标签的loss
        # 每一张图的pos的个数
        num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1)
        # 每一张图的pos_loc_loss
        pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8],
                                     axis=1)
        # 每一张图的pos_conf_loss
        pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8],
                                      axis=1)

        # 获取一定的负样本
        num_neg = tf.minimum(self.neg_pos_ratio * num_pos,
                             num_boxes - num_pos)

        # 找到了哪些值是大于0的
        pos_num_neg_mask = tf.greater(num_neg, 0)
        # 获得一个1.0
        has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
        num_neg = tf.concat( axis=0,values=[num_neg,
                                [(1 - has_min) * self.negatives_for_hard]])
        # 求平均每个图片要取多少个负样本
        num_neg_batch = tf.reduce_mean(tf.boolean_mask(num_neg,
                                                      tf.greater(num_neg, 0)))
        num_neg_batch = tf.to_int32(num_neg_batch)

        # conf的起始
        confs_start = 4 + self.background_label_id + 1
        # conf的结束
        confs_end = confs_start + self.num_classes - 1

        # 找到实际上在该位置不应该有预测结果的框,求他们最大的置信度。
        max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end],
                                  axis=2)

        # 取top_k个置信度,作为负样本
        _, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
                                 k=num_neg_batch)

        # 找到其在1维上的索引
        batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
        batch_idx = tf.tile(batch_idx, (1, num_neg_batch))
        full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) +
                        tf.reshape(indices, [-1]))
        
        # full_indices = tf.concat(2, [tf.expand_dims(batch_idx, 2),
        #                              tf.expand_dims(indices, 2)])
        # neg_conf_loss = tf.gather_nd(conf_loss, full_indices)
        neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]),
                                  full_indices)
        neg_conf_loss = tf.reshape(neg_conf_loss,
                                   [batch_size, num_neg_batch])
        neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1)

        # loss is sum of positives and negatives
        
        num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
                            tf.ones_like(num_pos))
        total_loss = tf.reduce_sum(pos_conf_loss) + tf.reduce_sum(neg_conf_loss)
        total_loss /= tf.reduce_sum(num_pos)
        total_loss += tf.reduce_sum(self.alpha * pos_loc_loss) / tf.reduce_sum(num_pos)

        return total_loss
Exemplo n.º 51
0
def directional_attention_with_dense(rep_tensor,
                                     rep_mask,
                                     direction=None,
                                     scope=None,
                                     keep_prob=1.,
                                     is_train=None,
                                     weight_decay=0.,
                                     activation='elu',
                                     tensor_dict=None,
                                     name=None):
    def scaled_tanh(x, scale=5.):
        return scale * tf.nn.tanh(1. / scale * x)

    batch_size, sent_len, word_embedding_len = tf.shape(
        rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]  # a different size is alternative
    with tf.variable_scope(scope or 'directional_attention_%' % 'direction'
                           or 'diag'):
        # mask generation
        sent_len_indices = tf.range(sent_len, dtype=tf.int32)
        sent_len_col, sent_len_row = tf.meshgrid(sent_len_indices,
                                                 sent_len_indices)
        if direction is None:  #  True-->0, False-->-inf
            direct_mask = tf.cast(
                tf.diag(-tf.ones([sent_len], tf.int32)) + 1,
                tf.bool)  # sent_len,sent_len
        else:
            if direction == 'forward':
                direct_mask = tf.greater(sent_len_row, sent_len_col)
            else:
                direct_mask = tf.greater(sent_len_col, sent_len_row)
        direct_mask_tile = tf.tile(
            tf.expand_dims(direct_mask, 0),
            [batch_size, 1, 1])  # batch_size, sent_len, sent_len
        rep_mask_tile = tf.tile(tf.expand_dims(
            rep_mask, 1), [1, sent_len, 1])  # batch_size, sent_len, sent_len
        attn_mask = tf.logical_and(
            direct_mask_tile, rep_mask_tile)  # batch_size, sent_len, sent_len

        # non-linear  ,x-->h
        rep_map = bn_dense_layer(rep_tensor, ivec, True, 0., 'bn_dense_map',
                                 activation, False, weight_decay, keep_prob,
                                 is_train)  # batch_size, sent_len, ivec
        rep_map_tile = tf.tile(
            tf.expand_dims(rep_map, 1),
            [1, sent_len, 1, 1])  # batch_size,sent_len,sent_len,ivec
        rep_map_dp = dropout(rep_map, keep_prob, is_train)

        # attention
        with tf.variable_scope('attention'):
            f_bias = tf.get_variable('f_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            dependent = linear(
                rep_map_dp, ivec, False,
                scope='linear_dependent')  # batch_size, sent_len, ivec
            dependent_etd = tf.expand_dims(
                dependent, axis=1)  # batch_size, 1, sent_len, ivec
            head = linear(rep_map_dp, ivec, False, 'linear_head')
            head_etd = tf.expand_dims(head,
                                      axis=2)  # batch_size, sent_len, 1, ivec

            logits = scaled_tanh(dependent_etd + head_etd + f_bias, 5.0)

            logits_mask = exp_mask_for_high_rank(logits, attn_mask)
            attn_score = tf.nn.softmax(logits_mask, 2)
            attn_score = mask_for_high_rank(attn_score, attn_mask)

            attn_result = tf.reduce_sum(attn_score * rep_map_tile, 2)

        # output, fusion
        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                linear(rep_map, ivec, True, 0., 'linear_fusion_i', False,
                       weight_decay, keep_prob, is_train) +
                linear(attn_result, ivec, True, 0., 'linear_fusion_a', False,
                       weight_decay, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_map + (1 - fusion_gate) * attn_result
            output = mask_for_high_rank(output, rep_mask)

        # save attention
        if tensor_dict is not None and name is not None:
            tensor_dict[name + '_dependent'] = dependent
            tensor_dict[name + '_head'] = head
            tensor_dict[name] = attn_score
            tensor_dict[name + '_gate'] = fusion_gate

        return output
Exemplo n.º 52
0
    def _decode(self, input_dict):
        """Decodes representation into data.

    Args:
      input_dict (dict): Python dictionary with inputs to decoder.


    Config parameters:

    * **src_inputs** --- Decoder input Tensor of shape [batch_size, time, dim]
      or [time, batch_size, dim].
    * **src_lengths** --- Decoder input lengths Tensor of shape [batch_size]
    * **tgt_inputs** --- Only during training. labels Tensor of the
      shape [batch_size, time] or [time, batch_size].
    * **tgt_lengths** --- Only during training. labels lengths
      Tensor of the shape [batch_size].

    Returns:
      dict: Python dictionary with:
      * outputs - [predictions, alignments, enc_src_lengths].
        predictions are the final predictions of the model. tensor of shape [batch_size, time].
        alignments are the attention probabilities if attention is used. None if 'plot_attention' in attention_params is set to False.
        enc_src_lengths are the lengths of the input. tensor of shape [batch_size].
      * logits - logits with the shape=[batch_size, output_dim].
      * tgt_length - tensor of shape [batch_size] indicating the predicted sequence lengths.
    """
        encoder_outputs = input_dict['encoder_output']['outputs']
        enc_src_lengths = input_dict['encoder_output']['src_length']

        self._batch_size = int(encoder_outputs.get_shape()[0])
        self._beam_width = self.params.get("beam_width", 1)

        tgt_inputs = None
        tgt_lengths = None
        if 'target_tensors' in input_dict:
            tgt_inputs = input_dict['target_tensors'][0]
            tgt_lengths = input_dict['target_tensors'][1]
            tgt_inputs = tf.concat([
                tf.fill([self._batch_size, 1], self.GO_SYMBOL),
                tgt_inputs[:, :-1]
            ], -1)

        layer_type = self.params['rnn_type']
        num_layers = self.params['num_layers']
        attention_params = self.params['attention_params']
        hidden_dim = self.params['hidden_dim']
        dropout_keep_prob = self.params.get(
            'dropout_keep_prob', 1.0) if self._mode == "train" else 1.0

        # To-Do Seperate encoder and decoder position embeddings
        use_positional_embedding = self.params.get("pos_embedding", False)
        use_language_model = self.params.get("use_language_model", False)
        use_beam_search_decoder = (self._beam_width != 1) and (self._mode
                                                               == "infer")

        self._target_emb_layer = tf.get_variable(
            name='TargetEmbeddingMatrix',
            shape=[self._tgt_vocab_size, self._tgt_emb_size],
            dtype=tf.float32,
        )

        if use_positional_embedding:
            self.enc_pos_emb_size = int(encoder_outputs.get_shape()[-1])
            self.enc_pos_emb_layer = tf.get_variable(
                name='EncoderPositionEmbeddingMatrix',
                shape=[1024, self.enc_pos_emb_size],
                dtype=tf.float32,
            )
            encoder_output_positions = tf.range(0,
                                                tf.shape(encoder_outputs)[1],
                                                delta=1,
                                                dtype=tf.int32,
                                                name='positional_inputs')
            encoder_position_embeddings = tf.cast(tf.nn.embedding_lookup(
                self.enc_pos_emb_layer, encoder_output_positions),
                                                  dtype=encoder_outputs.dtype)
            encoder_outputs += encoder_position_embeddings

            self.dec_pos_emb_size = self._tgt_emb_size
            self.dec_pos_emb_layer = tf.get_variable(
                name='DecoderPositionEmbeddingMatrix',
                shape=[1024, self.dec_pos_emb_size],
                dtype=tf.float32,
            )

        output_projection_layer = FullyConnected(
            [self._tgt_vocab_size],
            dropout_keep_prob=dropout_keep_prob,
            mode=self._mode,
        )

        rnn_cell = cells_dict[layer_type]

        dropout = tf.nn.rnn_cell.DropoutWrapper

        multirnn_cell = tf.nn.rnn_cell.MultiRNNCell([
            dropout(rnn_cell(hidden_dim), output_keep_prob=dropout_keep_prob)
            for _ in range(num_layers)
        ])

        if use_beam_search_decoder:
            encoder_outputs = tf.contrib.seq2seq.tile_batch(
                encoder_outputs,
                multiplier=self._beam_width,
            )
            enc_src_lengths = tf.contrib.seq2seq.tile_batch(
                enc_src_lengths,
                multiplier=self._beam_width,
            )

        attention_dim = attention_params["attention_dim"]
        attention_type = attention_params["attention_type"]
        num_heads = attention_params["num_heads"]
        plot_attention = attention_params["plot_attention"]
        if plot_attention:
            if use_beam_search_decoder:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Beam Search Decoding")
            if num_heads != 1:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Multi Head Attention")
            if self.params['dtype'] != tf.float32:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Mixed Precision Mode")

        attention_params_dict = {}
        if attention_type == "bahadanu":
            AttentionMechanism = BahdanauAttention
            attention_params_dict["normalize"] = False,
        elif attention_type == "chorowski":
            AttentionMechanism = LocationSensitiveAttention
            attention_params_dict["use_coverage"] = attention_params[
                "use_coverage"]
            attention_params_dict["location_attn_type"] = attention_type
            attention_params_dict["location_attention_params"] = {
                'filters': 10,
                'kernel_size': 101
            }
        elif attention_type == "zhaopeng":
            AttentionMechanism = LocationSensitiveAttention
            attention_params_dict["use_coverage"] = attention_params[
                "use_coverage"]
            attention_params_dict["query_dim"] = hidden_dim
            attention_params_dict["location_attn_type"] = attention_type

        attention_mechanism = []

        for head in range(num_heads):
            attention_mechanism.append(
                AttentionMechanism(num_units=attention_dim,
                                   memory=encoder_outputs,
                                   memory_sequence_length=enc_src_lengths,
                                   probability_fn=tf.nn.softmax,
                                   dtype=tf.get_variable_scope().dtype,
                                   **attention_params_dict))

        multirnn_cell_with_attention = AttentionWrapper(
            cell=multirnn_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=[hidden_dim for i in range(num_heads)],
            output_attention=True,
            alignment_history=plot_attention,
        )

        if self._mode == "train":
            decoder_output_positions = tf.range(0,
                                                tf.shape(tgt_inputs)[1],
                                                delta=1,
                                                dtype=tf.int32,
                                                name='positional_inputs')
            tgt_input_vectors = tf.nn.embedding_lookup(self._target_emb_layer,
                                                       tgt_inputs)
            if use_positional_embedding:
                tgt_input_vectors += tf.nn.embedding_lookup(
                    self.dec_pos_emb_layer, decoder_output_positions)
            tgt_input_vectors = tf.cast(
                tgt_input_vectors,
                dtype=self.params['dtype'],
            )
            # helper = tf.contrib.seq2seq.TrainingHelper(
            helper = TrainingHelper(
                inputs=tgt_input_vectors,
                sequence_length=tgt_lengths,
            )
        elif self._mode == "infer" or self._mode == "eval":
            embedding_fn = lambda ids: tf.cast(
                tf.nn.embedding_lookup(self._target_emb_layer, ids),
                dtype=self.params['dtype'],
            )
            pos_embedding_fn = None
            if use_positional_embedding:
                pos_embedding_fn = lambda ids: tf.cast(
                    tf.nn.embedding_lookup(self.dec_pos_emb_layer, ids),
                    dtype=self.params['dtype'],
                )

            # helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            helper = GreedyEmbeddingHelper(
                embedding=embedding_fn,
                start_tokens=tf.fill([self._batch_size], self.GO_SYMBOL),
                end_token=self.END_SYMBOL,
                positional_embedding=pos_embedding_fn)

        if self._mode != "infer":
            maximum_iterations = tf.reduce_max(tgt_lengths)
        else:
            maximum_iterations = tf.reduce_max(enc_src_lengths)

        if not use_beam_search_decoder:
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=multirnn_cell_with_attention,
                helper=helper,
                initial_state=multirnn_cell_with_attention.zero_state(
                    batch_size=self._batch_size,
                    dtype=encoder_outputs.dtype,
                ),
                output_layer=output_projection_layer,
            )
        else:
            batch_size_tensor = tf.constant(self._batch_size)
            decoder = BeamSearchDecoder(
                cell=multirnn_cell_with_attention,
                embedding=embedding_fn,
                start_tokens=tf.tile([self.GO_SYMBOL], [self._batch_size]),
                end_token=self.END_SYMBOL,
                initial_state=multirnn_cell_with_attention.zero_state(
                    dtype=encoder_outputs.dtype,
                    batch_size=batch_size_tensor * self._beam_width,
                ),
                beam_width=self._beam_width,
                output_layer=output_projection_layer,
                length_penalty_weight=0.0,
            )

        final_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            impute_finished=self.mode != "infer",
            maximum_iterations=maximum_iterations,
        )

        if plot_attention:
            alignments = tf.transpose(final_state.alignment_history[0].stack(),
                                      [1, 0, 2])
        else:
            alignments = None

        if not use_beam_search_decoder:
            outputs = tf.argmax(final_outputs.rnn_output, axis=-1)
            logits = final_outputs.rnn_output
            return_outputs = [outputs, alignments, enc_src_lengths]
        else:
            outputs = final_outputs.predicted_ids[:, :, 0]
            logits = final_outputs.predicted_ids[:, :, 0]
            return_outputs = [outputs, enc_src_lengths]

        if self.mode == "eval":
            max_len = tf.reduce_max(tgt_lengths)
            logits = tf.while_loop(
                lambda logits: max_len > tf.shape(logits)[1],
                lambda logits: tf.concat([
                    logits,
                    tf.fill([tf.shape(logits)[0], 1,
                             tf.shape(logits)[2]],
                            tf.cast(1.0, self.params['dtype']))
                ], 1),
                loop_vars=[logits],
                back_prop=False,
            )

        return {
            'outputs': return_outputs,
            'logits': logits,
            'tgt_length': final_sequence_lengths,
        }
Exemplo n.º 53
0
    def __init__(self,
                 nb_clusters,
                 centroids_init=None,
                 nb_tries=10,
                 nb_iterations=10,
                 input_tensor=None,
                 latent_space_tensor=None,
                 beta=None,
                 threshold=2.5,
                 assign_at_end=False):

        self.nb_clusters = nb_clusters
        self.nb_iterations = nb_iterations
        self.nb_tries = nb_tries
        self.latent_space_tensor = latent_space_tensor
        self.beta = beta
        self.assign_at_end = assign_at_end

        if input_tensor is None:
            self.graph = tf.Graph()
        else:
            self.graph = tf.get_default_graph()

        with self.graph.as_default():
            with tf.name_scope('kmeans'):
                if input_tensor is None:
                    # Spectrogram, embeddings
                    # shape = [batch, L , E ]
                    self.X_in = tf.placeholder("float", [None, None, None],
                                               name='Kmeans_input')
                else:
                    self.X_in = input_tensor

                # mean, _ = tf.nn.moments(self.X_in, axes=-1, keep_dims=True)
                x_norm = tf.nn.l2_normalize(self.X_in, axis=-1)
                self.b = tf.shape(x_norm)[0]
                self.X_original = tf.identity(x_norm)
                self.X = tf.expand_dims(x_norm, 1)
                self.X = tf.tile(self.X, [1, self.nb_tries, 1, 1])

                self.L = tf.shape(self.X)[-2]
                self.E = tf.shape(self.X)[-1]
                self.X = tf.reshape(self.X, [-1, self.L, self.E])

                self.B = tf.shape(self.X)[0]

                self.ones = tf.ones_like(self.X, tf.float32)

                self.shifting = tf.tile(
                    tf.expand_dims(tf.range(self.B) * self.nb_clusters, 1),
                    [1, self.L])

                if centroids_init is None:

                    def random_without_replace(b, l):
                        a = np.array([
                            np.random.choice(range(l),
                                             size=self.nb_clusters,
                                             replace=False) for _ in range(b)
                        ])
                        return a.astype(np.int32)

                    y = tf.py_func(random_without_replace, [self.B, self.L],
                                   tf.int32)
                    random = tf.reshape(y, [self.B, self.nb_clusters, 1])

                    # Take randomly 'nb_clusters' vectors from X
                    batch_range = tf.tile(
                        tf.reshape(tf.range(self.B, dtype=tf.int32),
                                   shape=[self.B, 1, 1]),
                        [1, self.nb_clusters, 1])
                    indices = tf.concat([batch_range, random], axis=2)
                    self.centroid_init = tf.gather_nd(self.X, indices)
                else:
                    self.centroids = tf.identity(centroids_init)
                    self.centroids = tf.tile(self.centroids,
                                             [self.nb_tries, 1, 1])

                if not self.latent_space_tensor is None:
                    latent_space_tensor = tf.reshape(latent_space_tensor,
                                                     [self.b, self.L])
                    log_lst = log10(
                        tf.divide(
                            tf.reduce_max(latent_space_tensor, [-1],
                                          keep_dims=True),
                            latent_space_tensor))
                    self.notsilent_notry = tf.reshape(
                        tf.cast(log_lst < threshold, tf.float32),
                        [self.b, self.L, 1])
                    self.notsilent = tf.tile(self.notsilent_notry,
                                             [self.nb_tries, 1, 1])
                else:
                    self.notsilent = tf.ones([self.B, self.L, 1])

                self.network
Exemplo n.º 54
0
# 常量1-D, 直接输出 其值 依然会报错
tensor = tf.constant([1, 2, 3, 4, 5, 6, 7])
# print tensor.eval()

# 常量2-D, 输出 [[-1, -1, -1], [-1, -1, -1]]
tf.constant(-1.0, shape=[2, 3])

# 线型等分, 3表示分成3分[10.0, 11.0, 12.0]
tensor = tf.linspace(10.0, 12.0, 3, name='linspace')
# tensor = tf.linspace(10.0, 12.0, 3, name='linspace')
# print tensor

# start:3, limit: 18, delta: 3, 含头不含尾
# 则输出结果: [3, 6, 9, 12, 15]
start, limit, delta = 3, 18, 3
ran = tf.range(start, limit, delta)

# with tf.Session() as sess:
#     sess.run(ran)
#     print(ran.eval())
"""
tensorflow 随机值
"""
# 服从高斯(正态)分布, 均值为-1, 方差为4
norm = tf.random_normal([2, 3], mean=-1, stddev=4)

# 洗牌操作,仅针对第一维度
c = tf.constant([[1, 2], [3, 4], [5, 6]])
shuff = tf.random_shuffle(c)

with tf.Session() as sess:
Exemplo n.º 55
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        assert len(features) == 5, "Features have to be P23456!"
        gt_boxes, gt_labels, *_ = targets

        if self.training:
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)

        fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC)
        if not cfg.FPN.CASCADE:
            roi_feature_fastrcnn = multilevel_roi_align(
                features[:4], proposals.boxes, 7)

            head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn)
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS)
            fastrcnn_head = FastRCNNHead(
                proposals, fastrcnn_box_logits, fastrcnn_label_logits,
                gt_boxes,
                tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))
        else:

            def roi_func(boxes):
                return multilevel_roi_align(features[:4], boxes, 7)

            fastrcnn_head = CascadeRCNNHead(proposals, roi_func,
                                            fastrcnn_head_func,
                                            (gt_boxes, gt_labels),
                                            image_shape2d, cfg.DATA.NUM_CLASS)

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4],
                    proposals.fg_boxes(),
                    14,
                    name_scope='multilevel_roi_align_mask')
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    28,
                    pad_border=False)  # fg x 1x28x28
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')
            if cfg.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4], final_boxes, 14)
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx28x28
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
# SimpleRNNCell层
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Embedding层功能验证
input1 = tf.random.shuffle(tf.range(10))  # shuffle将数据随机打乱
# 创建共10个单词,每个单词用长度为4的向量表示的层。Embedding生成单词向量,可学习。
net_Embedding = layers.Embedding(input_dim=10, output_dim=4)
out = net_Embedding(input1)
print('Embedding层:', out[1])
print(net_Embedding.embeddings[0], net_Embedding.embeddings.trainable)
print('trainable:', net_Embedding.trainable)
net_Embedding.trainable = False

# SimpleRNNCell测试
net_cell = layers.SimpleRNNCell(units=3)
net_cell.build(input_shape=(None, 4))
print('net_cell.trainable_variables:', net_cell.trainable_variables[0][0])

# 初始化状态向量
h0 = [tf.zeros([4, 64])]
x_0 = tf.random.normal([4, 80, 100])
x_1 = x_0[:,
          0, :]  # 这里只要保证去掉只有1列的维后总维度只有2,那就可以计算。也就是[4,1,1,1,1,8]计算时也是二维的[4,8]。其他网络也如此
net_cell2 = layers.SimpleRNNCell(64)
out1, h1 = net_cell2(x_1, h0)
print('shape:', tf.shape(out1), tf.shape(h1), id(h1))

# 构建多层SimpleRNNCell
Exemplo n.º 57
0
  def _remove_dilations(self):
    """
            This method removes the dilations by extracting the values from
            the input for every sliding window according to the dilations,
            strides and kernel size and generates output that can be used by
            pooling operations with strides = kernel_shape to accomplish
            dilated pooling

            Example:
              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

              Will return:
                         [[  0,  2,  1,  3],
                          [  8, 10,  9, 11],
                          [  4,  6,  5,  7],
                          [ 12, 14, 13, 15]]

              After max_pool2d with kernel_shape = strides = [2, 2]
              the result is:
                         [[ 10, 11],
                          [ 14, 15]]
        """

    input_shape = tf_shape(self.input)
    in_spatial_shape = input_shape[1:self.spatial_size + 1]

    channels_count = input_shape[self.spatial_size + 1]
    # Initialize gather_ind with the range of channels
    # e.g. [0 1]
    gather_ind = tf.range(channels_count, dtype=tf.int64)
    # convert the vector to column vector
    # in the following logic we use column vectors
    gather_ind = tf.expand_dims(gather_ind, 1)

    # initilize the output_shape with zeros
    # self.output_shape will contain the shape of the
    # output tensor after the loop below is executed
    self.output_shape = [0] * (self.spatial_size + 2)
    self.output_shape[0] = input_shape[0]
    """
            Loop over the input spatial dimensions starting from the
            last (most internal) going up to the first dimension

            On every step of the loop calculate the output indices and
            map them to the input indices using `_calc_input_ind`,
            then "combine" with the already calculated indices from the
            previous dimensions using cartesian product.

            For the following example input:

              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

            these are the steps that will be executed:

            1. Initilize gather_ind = [[0]]     # we have only 1 channel

            2. Loop step 0 (axis 1):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0]
                                [2 0]
                                [1 0]
                                [3 0]]

            3. Loop step 1 (axis 0):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0 0]
                                [0 2 0]
                                [0 1 0]
                                [0 3 0]
                                [2 0 0]
                                [2 2 0]
                                [2 1 0]
                                [2 3 0]
                                [1 0 0]
                                [1 2 0]
                                [1 1 0]
                                [1 3 0]
                                [3 0 0]
                                [3 2 0]
                                [3 1 0]
                                [3 3 0]]

            These are the indices used for gather_nd operation to collect
            the values from the input data.
        """

    for dim in range(self.spatial_size - 1, -1, -1):
      filter_size = (self.kernel_shape[dim] - 1) * \
                     self.dilations[dim] + 1
      output_size = ((
          (in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1
                    ) * self.kernel_shape[dim]
      self.output_shape[dim + 1] = output_size

      # initialize the output dimension index with the range of the
      # dimension output size (e.g. 4): [0, 1, 2, 3]
      dim_ind = tf.range(output_size)

      # calculate the matching indices in the input data
      # [0, 1, 2, 3] will calculate to [0, 2, 1, 3]
      # from the above example
      dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim],
                                     self.dilations[dim], self.strides[dim])
      # convert to column vector
      dim_ind = tf.expand_dims(dim_ind, 1)

      # "combine" current dimension indices with the previous dimensions
      # using cartesian product
      gather_ind = tf_product(dim_ind, gather_ind)

    # The result from the above loop for 2D data will be:
    # [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height,
    # m is the width and c is the channel number.

    # set the channels count in the output_shape
    self.output_shape[self.spatial_size + 1] = channels_count

    # expand the dimensions to match the input dimensions + 1
    for x in range(self.spatial_size):
      gather_ind = tf.expand_dims(gather_ind, 0)
    # dublicate the indices for every batch
    gather_ind = tf.tile(gather_ind,
                         [input_shape[0]] + [1] * (self.spatial_size + 1))

    # extract the selected values from the input
    output = tf.gather_nd(self.input, gather_ind, batch_dims=1)
    # reshape the output to the correct shape calculated earlier
    output = tf.reshape(output, self.output_shape)

    return output
Exemplo n.º 58
0
def proposal_label_op(boxes,
                      gt_boxes,
                      gt_labels,
                      image_info,
                      batch_size_per_im=512,
                      fg_fraction=0.25,
                      fg_thresh=0.5,
                      bg_thresh_hi=0.5,
                      bg_thresh_lo=0.):
    """Assigns the proposals with ground truth labels and performs subsmpling.

  Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the
  following algorithm to generate the final `batch_size_per_im` RoIs.
  1. Calculates the IoU between each proposal box and each gt_boxes.
  2. Assigns each proposal box with a ground truth class and box label by
     choosing the largest overlap.
  3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns
     box_targets, class_targets, and RoIs.
  The reference implementations of #1 and #2 are here: https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py  # pylint: disable=line-too-long
  The reference implementation of #3 is here: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py.  # pylint: disable=line-too-long

  Args:
    boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
      proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
      last dimension is the pixel coordinates of scaled images in
      [ymin, xmin, ymax, xmax] form.
    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
      tensor might have paddings with a value of -1. The coordinates of gt_boxes
      are in the pixel coordinates of the original image scale.
    gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with a value of -1.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width.
    batch_size_per_im: a integer represents RoI minibatch size per image.
    fg_fraction: a float represents the target fraction of RoI minibatch that
      is labeled foreground (i.e., class > 0).
    fg_thresh: a float represents the overlap threshold for an RoI to be
      considered foreground (if >= fg_thresh).
    bg_thresh_hi: a float represents the overlap threshold for an RoI to be
      considered background (class = 0 if overlap in [LO, HI)).
    bg_thresh_lo: a float represents the overlap threshold for an RoI to be
      considered background (class = 0 if overlap in [LO, HI)).
  Returns:
    box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor
      contains the ground truth pixel coordinates of the scaled images for each
      roi. K is the number of sample RoIs (e.g., batch_size_per_im).
    class_targets: a integer tensor with a shape of [batch_size, K]. The tensor
      contains the ground truth class for each roi.
    rois: a tensor with a shape of [batch_size, K, 4], representing the
      coordinates of the selected RoI.
    proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor
      keeps the mapping between proposal to labels. proposal_to_label_map[i]
      means the index of the ground truth instance for the i-th proposal.
  """
    with tf.name_scope('proposal_label'):
        batch_size = boxes.shape[0]
        # Scales ground truth boxes to the scaled image coordinates.
        image_scale = 1 / image_info[:, 2]
        scaled_gt_boxes = gt_boxes * tf.reshape(image_scale,
                                                [batch_size, 1, 1])

        # The reference implementation intentionally includes ground truth boxes in
        # the proposals. see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359.  # pylint: disable=line-too-long
        boxes = tf.concat([boxes, scaled_gt_boxes], axis=1)
        iou = _bbox_overlap(boxes, scaled_gt_boxes)

        (pre_sample_box_targets, pre_sample_class_targets, max_overlap,
         proposal_to_label_map) = _add_class_assignments(
             iou, scaled_gt_boxes, gt_labels)

        # Generates a random sample of RoIs comprising foreground and background
        # examples. reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132  # pylint: disable=line-too-long
        positives = tf.greater(max_overlap,
                               fg_thresh * tf.ones_like(max_overlap))
        negatives = tf.logical_and(
            tf.greater_equal(max_overlap,
                             bg_thresh_lo * tf.ones_like(max_overlap)),
            tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap)))
        pre_sample_class_targets = tf.where(
            negatives, tf.zeros_like(pre_sample_class_targets),
            pre_sample_class_targets)
        proposal_to_label_map = tf.where(negatives,
                                         tf.zeros_like(proposal_to_label_map),
                                         proposal_to_label_map)

        # Handles ground truth paddings.
        ignore_mask = tf.less(tf.reduce_min(iou, axis=2),
                              tf.zeros_like(max_overlap))
        # indicator includes both positive and negative labels.
        # labels includes only positives labels.
        # positives = indicator & labels.
        # negatives = indicator & !labels.
        # ignore = !indicator.
        labels = positives
        pos_or_neg = tf.logical_or(positives, negatives)
        indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask))

        all_samples = []
        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))
        # Batch-unroll the sub-sampling process.
        for i in range(batch_size):
            samples = sampler.subsample(indicator[i], batch_size_per_im,
                                        labels[i])
            all_samples.append(samples)
        all_samples = tf.stack([all_samples], axis=0)[0]
        # A workaround to get the indices from the boolean tensors.
        _, samples_indices = tf.nn.top_k(tf.to_int32(all_samples),
                                         k=batch_size_per_im,
                                         sorted=True)
        # Contructs indices for gather.
        samples_indices = tf.reshape(
            samples_indices +
            tf.expand_dims(tf.range(batch_size) * tf.shape(boxes)[1], 1), [-1])
        rois = tf.reshape(
            tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices),
            [batch_size, -1, 4])
        class_targets = tf.reshape(
            tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]),
                      samples_indices), [batch_size, -1])
        sample_box_targets = tf.reshape(
            tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]),
                      samples_indices), [batch_size, -1, 4])
        sample_proposal_to_label_map = tf.reshape(
            tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]),
                      samples_indices), [batch_size, -1])
    return sample_box_targets, class_targets, rois, sample_proposal_to_label_map
Exemplo n.º 59
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        featuremap = features[0]

        gt_boxes, gt_labels, *_ = targets

        if self.training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)
        # The boxes to be used to crop RoIs.
        # Use all proposal boxes in inference

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
Exemplo n.º 60
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 embed,
                 entity_embed=None,
                 num_entities=0,
                 num_trans_units=100,
                 learning_rate=0.0001,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 num_samples=500,
                 max_length=60,
                 mem_use=True,
                 output_alignments=True,
                 use_lstm=False):

        # 输入数据占位定义
        self.posts = tf.placeholder(tf.string, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None),
                                           'enc_lens')  # batch
        self.responses = tf.placeholder(tf.string, (None, None),
                                        'dec_inps')  # batch*len
        self.responses_length = tf.placeholder(tf.int32, (None),
                                               'dec_lens')  # batch
        self.entities = tf.placeholder(tf.string, (None, None, None),
                                       'entities')  # batch
        self.entity_masks = tf.placeholder(tf.string, (None, None),
                                           'entity_masks')  # batch
        self.triples = tf.placeholder(tf.string, (None, None, None, 3),
                                      'triples')  # batch
        self.posts_triple = tf.placeholder(tf.int32, (None, None, 1),
                                           'enc_triples')  # batch
        self.responses_triple = tf.placeholder(tf.string, (None, None, 3),
                                               'dec_triples')  # batch
        self.match_triples = tf.placeholder(tf.int32, (None, None, None),
                                            'match_triples')  # batch

        encoder_batch_size, encoder_len = tf.unstack(tf.shape(self.posts))
        triple_num = tf.shape(self.triples)[1]
        triple_len = tf.shape(self.triples)[2]
        one_hot_triples = tf.one_hot(self.match_triples, triple_len)
        use_triples = tf.reduce_sum(one_hot_triples, axis=[2, 3])

        # 构建词汇查询talbe (index to string, string to index)
        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        self.index2symbol = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_UNK',
                                             shared_name="out_table",
                                             name="out_table",
                                             checkpoint=True)
        self.entity2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=NONE_ID,
                                             shared_name="entity_in_table",
                                             name="entity_in_table",
                                             checkpoint=True)
        self.index2entity = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_NONE',
                                             shared_name="entity_out_table",
                                             name="entity_out_table",
                                             checkpoint=True)

        self.posts_word_id = self.symbol2index.lookup(self.posts)  # batch*len
        self.posts_entity_id = self.entity2index.lookup(
            self.posts)  # batch*len
        #self.posts_word_id = tf.Print(self.posts_word_id, ['use_triples', use_triples, 'one_hot_triples', one_hot_triples], summarize=1e6)
        self.responses_target = self.symbol2index.lookup(
            self.responses)  # batch*len

        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(
            self.responses)[1]
        self.responses_word_id = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int64) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)  # batch*len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        # 构建词嵌入 table (index to vector)
        if embed is None:
            # 随机初始化词嵌入
            self.embed = tf.get_variable('word_embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # 使用预训练的词嵌入初始化 (pre-trained word vectors, GloVe or Word2Vec)
            self.embed = tf.get_variable('word_embed',
                                         dtype=tf.float32,
                                         initializer=embed)
        if entity_embed is None:
            # 随机初始化词嵌入
            self.entity_trans = tf.get_variable(
                'entity_embed', [num_entities, num_trans_units],
                tf.float32,
                trainable=False)
        else:
            # 使用预训练的词嵌入初始化 (pre-trained word vectors, GloVe or Word2Vec)
            self.entity_trans = tf.get_variable('entity_embed',
                                                dtype=tf.float32,
                                                initializer=entity_embed,
                                                trainable=False)

        self.entity_trans_transformed = tf.layers.dense(
            self.entity_trans,
            num_trans_units,
            activation=tf.tanh,
            name='trans_transformation')
        padding_entity = tf.get_variable('entity_padding_embed',
                                         [7, num_trans_units],
                                         dtype=tf.float32,
                                         initializer=tf.zeros_initializer())

        self.entity_embed = tf.concat(
            [padding_entity, self.entity_trans_transformed], axis=0)

        triples_embedding = tf.reshape(
            tf.nn.embedding_lookup(self.entity_embed,
                                   self.entity2index.lookup(self.triples)),
            [encoder_batch_size, triple_num, -1, 3 * num_trans_units])
        entities_word_embedding = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entities)),
            [encoder_batch_size, -1, num_embed_units])

        head, relation, tail = tf.split(triples_embedding,
                                        [num_trans_units] * 3,
                                        axis=3)

        # 知识融合层的静态注意力
        with tf.variable_scope('graph_attention'):
            # 拼接head tail
            head_tail = tf.concat([head, tail], axis=3)
            # head tail合成一个向量
            head_tail_transformed = tf.layers.dense(head_tail,
                                                    num_trans_units,
                                                    activation=tf.tanh,
                                                    name='head_tail_transform')
            # relation 向量
            relation_transformed = tf.layers.dense(relation,
                                                   num_trans_units,
                                                   name='relation_transform')
            # relation 和 head_tail 计算注意力权重
            e_weight = tf.reduce_sum(relation_transformed *
                                     head_tail_transformed,
                                     axis=3)
            # 将注意力权重归一化
            alpha_weight = tf.nn.softmax(e_weight)
            # 将权重和head_tail进行加权求和
            graph_embed = tf.reduce_sum(tf.expand_dims(alpha_weight, 3) *
                                        head_tail,
                                        axis=2)

        graph_embed_input = tf.gather_nd(
            graph_embed,
            tf.concat([
                tf.tile(
                    tf.reshape(tf.range(encoder_batch_size, dtype=tf.int32),
                               [-1, 1, 1]), [1, encoder_len, 1]),
                self.posts_triple
            ],
                      axis=2))

        triple_embed_input = tf.reshape(
            tf.nn.embedding_lookup(
                self.entity_embed,
                self.entity2index.lookup(self.responses_triple)),
            [batch_size, decoder_len, 3 * num_trans_units])

        post_word_input = tf.nn.embedding_lookup(
            self.embed, self.posts_word_id)  # batch*len*unit
        response_word_input = tf.nn.embedding_lookup(
            self.embed, self.responses_word_id)  # batch*len*unit

        # 在输入语句中拼接注意力机制计算出来的图谱信息
        self.encoder_input = tf.concat([post_word_input, graph_embed_input],
                                       axis=2)

        # 在输出语句中拼接所有图谱信息
        self.decoder_input = tf.concat(
            [response_word_input, triple_embed_input], axis=2)

        # 编码器使用GRUCell, num_layers为网络层数
        encoder_cell = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])

        # 解码器层使用GRUCell,num_layers为网络层数
        decoder_cell = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])

        # RNN编码器的包装
        encoder_output, encoder_state = dynamic_rnn(encoder_cell,
                                                    self.encoder_input,
                                                    self.posts_length,
                                                    dtype=tf.float32,
                                                    scope="encoder")

        # get output projection function
        output_fn, selector_fn, sequence_loss, sampled_sequence_loss, total_loss = output_projection_layer(
            num_units, num_symbols, num_samples)
        # 解码器
        with tf.variable_scope('decoder'):
            # 获取 attention 函数
            attention_keys_init, attention_values_init, attention_score_fn_init, attention_construct_fn_init \
                = prepare_attention(encoder_output, 'bahdanau', num_units, imem=(graph_embed, triples_embedding), output_alignments=output_alignments and mem_use)  # 'luong', num_units)

            decoder_fn_train = attention_decoder_fn_train(
                encoder_state,
                attention_keys_init,
                attention_values_init,
                attention_score_fn_init,
                attention_construct_fn_init,
                output_alignments=output_alignments and mem_use,
                max_length=tf.reduce_max(self.responses_length))
            self.decoder_output, _, alignments_ta = dynamic_rnn_decoder(
                decoder_cell,
                decoder_fn_train,
                self.decoder_input,
                self.responses_length,
                scope="decoder_rnn")
            if output_alignments:
                self.alignments = tf.transpose(alignments_ta.stack(),
                                               perm=[1, 0, 2, 3])
                self.decoder_loss, self.ppx_loss, self.sentence_ppx = total_loss(
                    self.decoder_output, self.responses_target,
                    self.decoder_mask, self.alignments, triples_embedding,
                    use_triples, one_hot_triples)
                self.sentence_ppx = tf.identity(self.sentence_ppx,
                                                name='ppx_loss')
            else:
                self.decoder_loss = sequence_loss(self.decoder_output,
                                                  self.responses_target,
                                                  self.decoder_mask)

        with tf.variable_scope('decoder', reuse=True):
            # 获取 attention 函数
            attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                = prepare_attention(encoder_output, 'bahdanau', num_units, reuse=True, imem=(graph_embed, triples_embedding), output_alignments=output_alignments and mem_use)  # 'luong', num_units)
            decoder_fn_inference = attention_decoder_fn_inference(
                output_fn,
                encoder_state,
                attention_keys,
                attention_values,
                attention_score_fn,
                attention_construct_fn,
                self.embed,
                GO_ID,
                EOS_ID,
                max_length,
                num_symbols,
                imem=(entities_word_embedding,
                      tf.reshape(
                          triples_embedding,
                          [encoder_batch_size, -1, 3 * num_trans_units])),
                selector_fn=selector_fn)

            self.decoder_distribution, _, output_ids_ta = dynamic_rnn_decoder(
                decoder_cell, decoder_fn_inference, scope="decoder_rnn")

            output_len = tf.shape(self.decoder_distribution)[1]
            output_ids = tf.transpose(
                output_ids_ta.gather(tf.range(output_len)))
            word_ids = tf.cast(tf.clip_by_value(output_ids, 0, num_symbols),
                               tf.int64)
            entity_ids = tf.reshape(
                tf.clip_by_value(-output_ids, 0, num_symbols) + tf.reshape(
                    tf.range(encoder_batch_size) *
                    tf.shape(entities_word_embedding)[1], [-1, 1]), [-1])
            entities = tf.reshape(
                tf.gather(tf.reshape(self.entities, [-1]), entity_ids),
                [-1, output_len])
            words = self.index2symbol.lookup(word_ids)

            # 生成用于输出的回复语句
            self.generation = tf.where(output_ids > 0, words, entities)
            self.generation = tf.identity(self.generation, name='generation')

        # 训练参数初始化
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.params = tf.global_variables()

        # 使用Adam优化器,计算高效、梯度平滑、参数调节简单
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.lr = opt._lr

        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('decoder_loss', self.decoder_loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
        self.saver_epoch = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                          max_to_keep=1000,
                                          pad_step_number=True)