Пример #1
0
def test_forward_ceil():
    ishape = (1, 3, 10, 10)
    inp_array = np.random.uniform(size=ishape).astype(np.float32)
    with tf.Graph().as_default():
        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
        tf.ceil(in1)
        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Ceil:0')
Пример #2
0
def pad_to_multiple(tensor, multiple):
  """Returns the tensor zero padded to the specified multiple.

  Appends 0s to the end of the first and second dimension (height and width) of
  the tensor until both dimensions are a multiple of the input argument
  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
  be of shape [1, 4, 8, 1].

  Args:
    tensor: rank 4 float32 tensor, where
            tensor -> [batch_size, height, width, channels].
    multiple: the multiple to pad to.

  Returns:
    padded_tensor: the tensor zero padded to the specified multiple.
  """
  tensor_shape = tensor.get_shape()
  batch_size = static_shape.get_batch_size(tensor_shape)
  tensor_height = static_shape.get_height(tensor_shape)
  tensor_width = static_shape.get_width(tensor_shape)
  tensor_depth = static_shape.get_depth(tensor_shape)

  if batch_size is None:
    batch_size = tf.shape(tensor)[0]

  if tensor_height is None:
    tensor_height = tf.shape(tensor)[1]
    padded_tensor_height = tf.to_int32(
        tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_height = int(
        math.ceil(float(tensor_height) / multiple) * multiple)

  if tensor_width is None:
    tensor_width = tf.shape(tensor)[2]
    padded_tensor_width = tf.to_int32(
        tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_width = int(
        math.ceil(float(tensor_width) / multiple) * multiple)

  if tensor_depth is None:
    tensor_depth = tf.shape(tensor)[3]

  # Use tf.concat instead of tf.pad to preserve static shape
  if padded_tensor_height != tensor_height:
    height_pad = tf.zeros([
        batch_size, padded_tensor_height - tensor_height, tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, height_pad], 1)
  if padded_tensor_width != tensor_width:
    width_pad = tf.zeros([
        batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, width_pad], 2)

  return tensor
  def _update_lipschitz(self,v,i):
    config = self.config
    if len(v.shape) > 1:
      k = self.config.weight_constraint_k or 100.0000
      wi_hat = v
      if len(v.shape) == 4:
        #fij = tf.reduce_sum(tf.abs(wi_hat),  axis=[0,1])
        fij = wi_hat
        fij = tf.reduce_sum(tf.abs(fij),  axis=[1])
        fij = tf.reduce_max(fij,  axis=[0])
      else:
        fij = wi_hat

      if self.config.ortho_pnorm == "inf":
        wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=0), axis=0)
      else:
        # conv
        wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=1), axis=0)
      ratio = (1.0/tf.maximum(1.0, wp/k))
      
      if self.config.weight_bounce:
        bounce = tf.minimum(1.0, tf.ceil(wp/k-0.999))
        ratio -= tf.maximum(0.0, bounce) * 0.2

      if self.config.weight_scaleup:
        up = tf.minimum(1.0, tf.ceil(0.02-wp/k))
        ratio += tf.maximum(0.0, up) * k/wp * 0.2

      wi = ratio*(wi_hat)
      #self.gan.metrics['wi'+str(i)]=wp
      #self.gan.metrics['wk'+str(i)]=ratio
      #self.gan.metrics['bouce'+str(i)]=bounce
      return tf.assign(v, wi)
    return None
Пример #4
0
  def _anchor_component_tf(self):
    print('Use TF anchors')
    with tf.variable_scope('ANCHOR_' + self._tag) as scope:
      # just to get the shape right
      height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0])))
      width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0])))

      self._anchors, self._anchor_length = generate_anchors_pre_tf(
        height, width, self._feat_stride[0], self._anchor_scales,
        self._anchor_ratios)
Пример #5
0
 def _anchor_component(self):
   with tf.variable_scope('ANCHOR_' + self._tag) as scope:
     # just to get the shape right
     height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0])))
     width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0])))
     anchors, anchor_length = tf.py_func(generate_anchors_pre,
                                         [height, width,
                                          self._feat_stride, self._anchor_scales, self._anchor_ratios],
                                         [tf.float32, tf.int32], name="generate_anchors")
     anchors.set_shape([None, 4])
     anchor_length.set_shape([])
     self._anchors = anchors
     self._anchor_length = anchor_length
Пример #6
0
def sample_img(img, n_samples):
    sx = tf.random_uniform((n_samples,), 0, 1) * 27
    sy = tf.random_uniform((n_samples,), 0, 1) * 27
    sx_lower = tf.cast(tf.floor(sx), tf.int32)
    sx_upper = tf.cast(tf.ceil(sx), tf.int32)

    sy_lower = tf.cast(tf.floor(sy), tf.int32)
    sy_upper = tf.cast(tf.ceil(sy), tf.int32)

    sx_nearest = tf.cast(tf.round(sx), tf.int32)
    sy_nearest = tf.cast(tf.round(sy), tf.int32)
    inds = tf.pack([sx_nearest, sy_nearest])
    samples = tf.gather(tf.reshape(img, (-1,)), sx_nearest + sy_nearest*28)
    return sx/27, sy/27, samples
  def _survival_function(self, y):
    low = self._low
    high = self._high

    # Recall the promise:
    # survival_function(y) := P[Y > y]
    #                       = 0, if y >= high,
    #                       = 1, if y < low,
    #                       = P[X > y], otherwise.

    # P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in
    # between.
    j = tf.ceil(y)

    # P[X > j], used when low < X < high.
    result_so_far = self.distribution.survival_function(j)

    # Broadcast, because it's possible that this is a single distribution being
    # evaluated on a number of samples, or something like that.
    j += tf.zeros_like(result_so_far)

    # Re-define values at the cutoffs.
    if low is not None:
      result_so_far = tf.where(j < low, tf.ones_like(result_so_far),
                               result_so_far)
    if high is not None:
      result_so_far = tf.where(j >= high, tf.zeros_like(result_so_far),
                               result_so_far)

    return result_so_far
Пример #8
0
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
    shape2d = tf.shape(image)[2:]
    mult = float(cfg.FPN.RESOLUTION_REQUIREMENT)
    new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
    pad_shape2d = new_shape2d - shape2d
    assert len(num_blocks) == 4, num_blocks
    with resnet_argscope():
        chan = image.shape[1]
        pad_base = maybe_reverse_pad(2, 3)
        l = tf.pad(image, tf.stack(
            [[0, 0], [0, 0],
             [pad_base[0], pad_base[1] + pad_shape2d[0]],
             [pad_base[0], pad_base[1] + pad_shape2d[1]]]))
        l.set_shape([None, chan, None, None])
        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
        l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
        if freeze_c2:
            c2 = tf.stop_gradient(c2)
        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
        c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
    # 32x downsampling up to now
    # size of c5: ceil(input/32)
    return c2, c3, c4, c5
Пример #9
0
def crop_or_pad(waves, length, channels):
  """Crop or pad wave to have shape [N, length, channels].

  Args:
    waves: A 3D `Tensor` of NLC format.
    length: A Python scalar. The output wave size.
    channels: Number of output waves channels.

  Returns:
    A 3D `Tensor` of NLC format with shape [N, length, channels].
  """
  waves = tf.convert_to_tensor(waves)
  batch_size = waves.shape[0].value
  waves_shape = tf.shape(waves)

  # Force audio length.
  pad = tf.maximum(0, length - waves_shape[1])
  right_pad = tf.to_int32(tf.to_float(pad) / 2.0)
  left_pad = pad - right_pad
  waves = tf.pad(waves, [[0, 0], [left_pad, right_pad], [0, 0]])
  waves = waves[:, :length, :]

  # Force number of channels.
  num_repeats = tf.to_int32(
      tf.ceil(tf.to_float(channels) / tf.to_float(waves_shape[2])))
  waves = tf.tile(waves, [1, 1, num_repeats])[:, :, :channels]

  waves.set_shape([batch_size, length, channels])
  return waves
def non_zero_tokens(tokens):
    """Receives a vector of tokens (float) which are zero-padded. Returns a vector of the same size, which has the value
    1.0 in positions with actual tokens and 0.0 in positions with zero-padding.

    :param tokens:
    :return:
    """
    return tf.ceil(tokens / tf.reduce_max(tokens, [1], keep_dims=True))
Пример #11
0
 def reshape_seqs(x, avg_window_size=3, **kwargs):
     B = tf.shape(x)[0]
     L = tf.cast(tf.shape(x)[1], tf.float32)
     D = x.get_shape().as_list()[-1]
     b = tf.transpose(x, [0, 2, 1])
     extra_pads = tf.cast(tf.ceil(L / avg_window_size) * avg_window_size - L, tf.int32)
     c = tf.pad(b, tf.concat([tf.zeros([2, 2], dtype=tf.int32), [[0, extra_pads]]], axis=0))
     return tf.reshape(c, [B, D, avg_window_size, -1])
Пример #12
0
def imageWarpIm(imageBatch,pMtrxBatch,opt,name=None):
	with tf.name_scope("ImWarp"):
		imageBatch = tf.expand_dims(imageBatch,-1)
		batchSize = tf.shape(imageBatch)[0]
		imageH,imageW = opt.H,opt.H
		H,W = opt.H,opt.W
		warpGTmtrxBatch = tf.tile(tf.expand_dims(opt.warpGTmtrx,0),[batchSize,1,1])
		transMtrxBatch = tf.matmul(warpGTmtrxBatch,pMtrxBatch)
		# warp the canonical coordinates
		X,Y = np.meshgrid(np.linspace(-1,1,W),np.linspace(-1,1,H))
		XYhom = tf.transpose(tf.stack([X.reshape([-1]),Y.reshape([-1]),np.ones([X.size])],axis=1))
		XYhomBatch = tf.tile(tf.expand_dims(XYhom,0),[batchSize,1,1])
		XYwarpHomBatch = tf.matmul(transMtrxBatch,tf.to_float(XYhomBatch))
		XwarpHom,YwarpHom,ZwarpHom = tf.split(XYwarpHomBatch,3,1)
		Xwarp = tf.reshape(XwarpHom/ZwarpHom,[batchSize,H,W])
		Ywarp = tf.reshape(YwarpHom/ZwarpHom,[batchSize,H,W])
		# get the integer sampling coordinates
		Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)
		Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)
		XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)
		YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)
		imageIdx = tf.tile(tf.reshape(tf.range(batchSize),[batchSize,1,1]),[1,H,W])
		imageVec = tf.reshape(imageBatch,[-1,tf.shape(imageBatch)[3]])
		imageVecOutside = tf.concat([imageVec,tf.zeros([1,tf.shape(imageBatch)[3]])],0)
		idxUL = (imageIdx*imageH+YfloorInt)*imageW+XfloorInt
		idxUR = (imageIdx*imageH+YfloorInt)*imageW+XceilInt
		idxBL = (imageIdx*imageH+YceilInt)*imageW+XfloorInt
		idxBR = (imageIdx*imageH+YceilInt)*imageW+XceilInt
		idxOutside = tf.fill([batchSize,H,W],batchSize*imageH*imageW)
		def insideIm(Xint,Yint):
			return (Xint>=0)&(Xint<imageW)&(Yint>=0)&(Yint<imageH)
		idxUL = tf.where(insideIm(XfloorInt,YfloorInt),idxUL,idxOutside)
		idxUR = tf.where(insideIm(XceilInt,YfloorInt),idxUR,idxOutside)
		idxBL = tf.where(insideIm(XfloorInt,YceilInt),idxBL,idxOutside)
		idxBR = tf.where(insideIm(XceilInt,YceilInt),idxBR,idxOutside)
		# bilinear interpolation
		Xratio = tf.reshape(Xwarp-Xfloor,[batchSize,H,W,1])
		Yratio = tf.reshape(Ywarp-Yfloor,[batchSize,H,W,1])
		ImUL = tf.to_float(tf.gather(imageVecOutside,idxUL))*(1-Xratio)*(1-Yratio)
		ImUR = tf.to_float(tf.gather(imageVecOutside,idxUR))*(Xratio)*(1-Yratio)
		ImBL = tf.to_float(tf.gather(imageVecOutside,idxBL))*(1-Xratio)*(Yratio)
		ImBR = tf.to_float(tf.gather(imageVecOutside,idxBR))*(Xratio)*(Yratio)
		ImWarpBatch = ImUL+ImUR+ImBL+ImBR
		ImWarpBatch = tf.identity(ImWarpBatch,name=name)
	return ImWarpBatch
Пример #13
0
def cnn(model, config, scope, connect = None):
	with tf.variable_scope(scope), tf.name_scope(scope):
		with tf.variable_scope('inputs'), tf.name_scope('inputs'):
			sizes = {size: config.getint(scope, '%s_size' %size) for size in ['clength', 'cstep', 'plength', 'pstep']}
			if connect is None:
				model['%s_in0length' %scope] = config.getint('global', 'batch_size')
				model['%s_in1length' %scope] = config.getint('global', 'input_size')
				model['%s_in2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_in2length' %scope)
				model['%s_maxin2length' %scope] = config.getint('global', 'time_size')
				model['%s_inputs' %scope] = tf.placeholder(tf.float32, [model['%s_maxin2length' %scope], model['%s_in0length' %scope], model['%s_in1length' %scope]], '%s_inputs' %scope)
			else:
				model['%s_in0length' %scope] = model['%s_out0length' %connect]
				model['%s_in1length' %scope] = model['%s_out1length' %connect]
				model['%s_in2length' %scope] = model['%s_out2length' %connect]
				model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect]
				model['%s_inputs' %scope] = model['%s_outputs' %connect]
			model['%s_transform' %scope] = tf.transpose(tf.reshape(model['%s_inputs' %scope], [model['%s_maxin2length' %scope], model['%s_in0length' %scope], model['%s_in1length' %scope], 1]), [1, 0, 2, 3], '%s_transform' %scope)
			model['%s_out0length' %scope] = model['%s_in0length' %scope]
			model['%s_out1length' %scope] = model['%s_in1length' %scope]
			model['%s_out2length' %scope] = model['%s_in2length' %scope]
			model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope]

		for _ in xrange(config.getint(scope, 'layer_size')):
			if _ == 0: model['%s_transform%i' %(scope, _)] = model['%s_transform' %scope]
			else: model['%s_transform%i' %(scope, _)] = model['%s_pooling%i' %(scope, _ - 1)]

			with tf.variable_scope('filter%i' %_), tf.name_scope('filter%s' %_):
				model['%s_filter%i' %(scope, _)] = tf.Variable(tf.truncated_normal([sizes['clength'], sizes['clength'], 1, 1]))
				model['%s_stride%i' %(scope, _)] = [1, sizes['cstep'], sizes['cstep'], 1]

			with tf.variable_scope('convolution%i' %_), tf.name_scope('convolution%i' %_):
				model['%s_convolution%i' %(scope, _)] = tf.nn.conv2d(model['%s_transform%i' %(scope, _)], model['%s_filter%i' %(scope, _)], model['%s_stride%i' %(scope, _)], 'VALID')
				model['%s_out1length' %scope] = int(math.ceil(float(model['%s_out1length' %scope] - sizes['clength'] + 1) / float(sizes['cstep'])))
				model['%s_out2length' %scope] = tf.to_int32(tf.ceil(tf.div(tf.to_float(tf.subtract(model['%s_out2length' %scope], sizes['clength'] - 1)), tf.to_float(sizes['cstep']))))
				model['%s_maxout2length' %scope] = int(math.ceil(float(model['%s_maxout2length' %scope] - sizes['clength'] + 1) / float(sizes['cstep'])))
				model['%s_pooling%i' %(scope, _)] = getattr(tf.nn, '%s_pool' %config.get(scope, 'pool'))(model['%s_convolution%i' %(scope, _)], [1, sizes['plength'], sizes['plength'], 1], [1, sizes['pstep'], sizes['pstep'], 1], 'VALID')
				model['%s_out1length' %scope] = int(math.ceil(float(model['%s_out1length' %scope] - sizes['plength'] + 1) / float(sizes['pstep'])))
				model['%s_out2length' %scope] = tf.to_int32(tf.ceil(tf.div(tf.to_float(tf.subtract(model['%s_out2length' %scope], sizes['plength'] - 1)), tf.to_float(sizes['pstep']))))
				model['%s_maxout2length' %scope] = int(math.ceil(float(model['%s_maxout2length' %scope] - sizes['plength'] + 1) / float(sizes['pstep'])))

		with tf.variable_scope('outputs'), tf.name_scope('outputs'):
			model['%s_outputs' %scope] = tf.transpose(tf.squeeze(model['%s_pooling%i' %(scope, _)], [3], '%s_outputs' %scope), [1, 0, 2])

	return model
Пример #14
0
 def _compare(self, x, use_gpu):
   np_floor, np_ceil = np.floor(x), np.ceil(x)
   with self.test_session(use_gpu=use_gpu) as sess:
     inx = tf.convert_to_tensor(x)
     ofloor, oceil = tf.floor(inx), tf.ceil(inx)
     tf_floor, tf_ceil = sess.run([ofloor, oceil])
   self.assertAllEqual(np_floor, tf_floor)
   self.assertAllEqual(np_ceil, tf_ceil)
   self.assertShapeEqual(np_floor, ofloor)
   self.assertShapeEqual(np_ceil, oceil)
Пример #15
0
 def integral(lower, upper):
     val = tf.cond(
         tf.logical_or(
             tf.is_inf(tf.ceil(tf.cast(lower, config.dtype))),
             tf.is_inf(tf.floor(tf.cast(upper, config.dtype)))
         ),
         lambda: tf.constant(1, dtype=config.dtype),
         lambda: tf.cast(upper, config.dtype) - tf.cast(lower, config.dtype),
     )
     return val
Пример #16
0
    def clampSlice(self, shouldCeil, transformedCoordinates, index):

        coordinateSlice = tf.slice(transformedCoordinates, [0, index], [tf.shape(transformedCoordinates)[0], 1])

        if not shouldCeil:
            result = tf.floor(coordinateSlice)
        else:
            result = tf.ceil(coordinateSlice)

        return result
def interp(w, i, channel_dim):
    '''
    Input:
        w: A 4D block tensor of shape (n, h, w, c)
        i: A list of 3-tuples [(x_1, y_1, z_1), (x_2, y_2, z_2), ...],
            each having type (int, float, float)
 
        The 4D block represents a batch of 3D image feature volumes with c channels.
        The input i is a list of points  to index into w via interpolation. Direct
        indexing is not possible due to y_1 and z_1 being float values.
    Output:
        A list of the values: [
            w[x_1, y_1, z_1, :]
            w[x_2, y_2, z_2, :]
            ...
            w[x_k, y_k, z_k, :]
        ]
        of the same length == len(i)
    '''
    w_as_vector = tf.reshape(w, [-1, channel_dim]) # gather expects w to be 1-d
    upper_l = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.floor(i[:, 1:2]), tf.floor(i[:, 2:3])]))
    upper_r = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.floor(i[:, 1:2]), tf.ceil(i[:, 2:3])]))
    lower_l = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.ceil(i[:, 1:2]), tf.floor(i[:, 2:3])]))
    lower_r = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.ceil(i[:, 1:2]), tf.ceil(i[:, 2:3])]))

    upper_l_idx = to_idx(upper_l, tf.shape(w))
    upper_r_idx = to_idx(upper_r, tf.shape(w))
    lower_l_idx = to_idx(lower_l, tf.shape(w))
    lower_r_idx = to_idx(lower_r, tf.shape(w))
 
    upper_l_value = tf.gather(w_as_vector, upper_l_idx)
    upper_r_value = tf.gather(w_as_vector, upper_r_idx)
    lower_l_value = tf.gather(w_as_vector, lower_l_idx)
    lower_r_value = tf.gather(w_as_vector, lower_r_idx)
 
    alpha_lr = tf.expand_dims(i[:, 2] - tf.floor(i[:, 2]), 1)
    alpha_ud = tf.expand_dims(i[:, 1] - tf.floor(i[:, 1]), 1)
 
    upper_value = (1 - alpha_lr) * upper_l_value + (alpha_lr) * upper_r_value
    lower_value = (1 - alpha_lr) * lower_l_value + (alpha_lr) * lower_r_value
    value = (1 - alpha_ud) * upper_value + (alpha_ud) * lower_value
    return value
Пример #18
0
    def slice_feature_and_anchors(self, image_shape2d, p23456, anchors):
        for i, stride in enumerate(cfg.FPN.ANCHOR_STRIDES):
            with tf.name_scope('FPN_slice_lvl{}'.format(i)):
                if i < 3:
                    # Images are padded for p5, which are too large for p2-p4.
                    # This seems to have no effect on mAP.
                    pi = p23456[i]
                    target_shape = tf.to_int32(tf.ceil(tf.to_float(image_shape2d) * (1.0 / stride)))
                    p23456[i] = tf.slice(pi, [0, 0, 0, 0],
                                         tf.concat([[-1, -1], target_shape], axis=0))
                    p23456[i].set_shape([1, pi.shape[1], None, None])

                anchors[i] = anchors[i].narrow_to(p23456[i])
Пример #19
0
def bernoulli_sample(x):
    """
    Uses a tensor whose values are in [0,1] to sample a tensor with values
    in {0, 1}, using the straight through estimator for the gradient.

    E.g., if x is 0.6, bernoulliSample(x) will be 1 with probability 0.6,
    and 0 otherwise, and the gradient will be pass-through (identity).
    """
    g = tf.get_default_graph()

    with ops.name_scope("BernoulliSample") as name:
        with g.gradient_override_map({"Ceil": "Identity",
                                      "Sub": "BernoulliSample_ST"}):
            return tf.ceil(x - tf.random_uniform(tf.shape(x)), name=name)
    def testProbAndGradGivesFiniteResultsForCommonEvents(self):
        with self.test_session():
            mu = tf.Variable(0.0, name="mu")
            sigma = tf.Variable(1.0, name="sigma")
            qdist = distributions.QuantizedDistribution(distribution=distributions.Normal(mu=mu, sigma=sigma))
            x = tf.ceil(4 * rng.rand(100).astype(np.float32) - 2)

            tf.global_variables_initializer().run()

            proba = qdist.prob(x)
            self._assert_all_finite(proba.eval())

            grads = tf.gradients(proba, [mu, sigma])
            self._assert_all_finite(grads[0].eval())
            self._assert_all_finite(grads[1].eval())
Пример #21
0
def _nearest_neighbor_features_per_object_in_chunks(
    reference_embeddings_flat, query_embeddings_flat, reference_labels_flat,
    ref_obj_ids, k_nearest_neighbors, n_chunks):
  """Calculates the nearest neighbor features per object in chunks to save mem.

  Uses chunking to bound the memory use.

  Args:
    reference_embeddings_flat: Tensor of shape [n, embedding_dim],
      the embedding vectors for the reference frame.
    query_embeddings_flat: Tensor of shape [m, embedding_dim], the embedding
      vectors for the query frames.
    reference_labels_flat: Tensor of shape [n], the class labels of the
      reference frame.
    ref_obj_ids: int tensor of unique object ids in the reference labels.
    k_nearest_neighbors: Integer, the number of nearest neighbors to use.
    n_chunks: Integer, the number of chunks to use to save memory
      (set to 1 for no chunking).

  Returns:
    nn_features: A float32 tensor of nearest neighbor features of shape
      [m, n_objects, feature_dim].
  """
  chunk_size = tf.cast(tf.ceil(tf.cast(tf.shape(query_embeddings_flat)[0],
                                       tf.float32) / n_chunks), tf.int32)
  wrong_label_mask = tf.not_equal(reference_labels_flat,
                                  ref_obj_ids[:, tf.newaxis])
  all_features = []
  for n in range(n_chunks):
    if n_chunks == 1:
      query_embeddings_flat_chunk = query_embeddings_flat
    else:
      chunk_start = n * chunk_size
      chunk_end = (n + 1) * chunk_size
      query_embeddings_flat_chunk = query_embeddings_flat[chunk_start:chunk_end]
    # Use control dependencies to make sure that the chunks are not processed
    # in parallel which would prevent any peak memory savings.
    with tf.control_dependencies(all_features):
      features = _nn_features_per_object_for_chunk(
          reference_embeddings_flat, query_embeddings_flat_chunk,
          wrong_label_mask, k_nearest_neighbors
      )
    all_features.append(features)
  if n_chunks == 1:
    nn_features = all_features[0]
  else:
    nn_features = tf.concat(all_features, axis=0)
  return nn_features
Пример #22
0
  def __init__(self, pool_size=1, **kwargs):
    """
    :param pool_size int: size of the pool to take median of (is also used as stride size)
    """
    super(BatchMedianPoolingLayer, self).__init__(**kwargs)

    input_placeholder = self.input_data.get_placeholder_as_batch_major()

    # get median over pooled batches
    # - reshape input for usage with tf.nn.top_k
    reshaped_input = tf.reshape(tf.transpose(input_placeholder, [1, 2, 0]), shape=(tf.shape(input_placeholder)[1], tf.shape(input_placeholder)[2], tf.shape(input_placeholder)[0] / pool_size, pool_size))
    # - get median of each pool
    median = tf.nn.top_k(reshaped_input, k=tf.cast(tf.ceil(tf.constant(pool_size, dtype=tf.float32) / 2), dtype=tf.int32)).values[:, :, :, -1]
    median_batch_major = tf.transpose(median, [2, 0, 1])
    self.output.placeholder = median_batch_major
    self.output.size_placeholder = {self.output.time_dim_axis_excluding_batch: tf.strided_slice(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch], [0], tf.shape(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch]), [pool_size])}
  def test_prob_and_grad_gives_finite_results_for_common_events(self):
    with self.test_session():
      mu = tf.Variable(0.0, name="mu")
      sigma = tf.Variable(1.0, name="sigma")
      qdist = distributions.QuantizedDistribution(
          base_dist_cls=distributions.Normal,
          mu=mu,
          sigma=sigma)
      x = tf.ceil(4 * self._rng.rand(100).astype(np.float32) - 2)

      tf.initialize_all_variables().run()

      proba = qdist.prob(x)
      self._assert_all_finite(proba.eval())

      grads = tf.gradients(proba, [mu, sigma])
      self._assert_all_finite(grads[0].eval())
      self._assert_all_finite(grads[1].eval())
Пример #24
0
  def _sample_n(self, n, seed=None):
    low = self._low
    high = self._high
    with tf.name_scope("transform"):
      n = tf.convert_to_tensor(n, name="n")
      x_samps = self.distribution.sample(n, seed=seed)
      ones = tf.ones_like(x_samps)

      # Snap values to the intervals (j - 1, j].
      result_so_far = tf.ceil(x_samps)

      if low is not None:
        result_so_far = tf.where(result_so_far < low, low * ones, result_so_far)

      if high is not None:
        result_so_far = tf.where(result_so_far > high, high * ones,
                                 result_so_far)

      return result_so_far
Пример #25
0
  def encode(self, inputs, sequence_length=None, mode=tf.estimator.ModeKeys.TRAIN):
    encoder_state = []

    for layer_index, layer in enumerate(self.layers):
      input_depth = inputs.get_shape().as_list()[-1]

      if layer_index == 0:
        # For the first input, make the number of timesteps a multiple of the
        # total reduction factor.
        total_reduction_factor = pow(self.reduction_factor, len(self.layers) - 1)

        current_length = tf.shape(inputs)[1]
        factor = tf.divide(tf.cast(current_length, tf.float32), total_reduction_factor)
        new_length = tf.cast(tf.ceil(factor), tf.int32) * total_reduction_factor
        padding = new_length - current_length

        inputs = tf.pad(
            inputs,
            [[0, 0], [0, padding], [0, 0]])
        inputs.set_shape((None, None, input_depth))
      else:
        # In other cases, reduce the time dimension.
        inputs = tf.reshape(
            inputs,
            [tf.shape(inputs)[0], -1, input_depth * self.reduction_factor])
        if sequence_length is not None:
          sequence_length = tf.div(sequence_length, self.reduction_factor)

      with tf.variable_scope("layer_{}".format(layer_index)):
        outputs, state, sequence_length = layer.encode(
            inputs,
            sequence_length=sequence_length,
            mode=mode)

      encoder_state.append(state)
      inputs = outputs

    return (
        outputs,
        self.state_reducer.reduce(encoder_state),
        sequence_length)
Пример #26
0
def sample_patch(image, patch_height, patch_width, colors):
  """Crops image to the desired aspect ratio shape and resizes it.

  If the image has shape H x W, crops a square in the center of
  shape min(H,W) x min(H,W).

  Args:
    image: A 3D `Tensor` of HWC format.
    patch_height: A Python integer. The output images height.
    patch_width: A Python integer. The output images width.
    colors: Number of output image channels. Defaults to 3.

  Returns:
    A 3D `Tensor` of HWC format with shape [patch_height, patch_width, colors].
  """
  image_shape = tf.shape(image)
  h, w = image_shape[0], image_shape[1]

  h_major_target_h = h
  h_major_target_w = tf.maximum(1, tf.to_int32(
      (h * patch_width) / patch_height))
  w_major_target_h = tf.maximum(1, tf.to_int32(
      (w * patch_height) / patch_width))
  w_major_target_w = w
  target_hw = tf.cond(
      h_major_target_w <= w,
      lambda: tf.convert_to_tensor([h_major_target_h, h_major_target_w]),
      lambda: tf.convert_to_tensor([w_major_target_h, w_major_target_w]))
  # Cut a patch of shape (target_h, target_w).
  image = tf.image.resize_image_with_crop_or_pad(image, target_hw[0],
                                                 target_hw[1])
  # Resize the cropped image to (patch_h, patch_w).
  image = tf.image.resize_images([image], [patch_height, patch_width])[0]
  # Force number of channels: repeat the channel dimension enough
  # number of times and then slice the first `colors` channels.
  num_repeats = tf.to_int32(tf.ceil(colors / image_shape[2]))
  image = tf.tile(image, [1, 1, num_repeats])
  image = tf.slice(image, [0, 0, 0], [-1, -1, colors])
  image.set_shape([patch_height, patch_width, colors])
  return image
Пример #27
0
def binary_stochastic_REINFORCE(x, loss_op_name="loss_by_example"):
    """
    Sigmoid followed by a random sample from a bernoulli distribution
    according to the result (binary stochastic neuron). Uses the REINFORCE
    estimator. See https://arxiv.org/abs/1308.3432.

    NOTE: Requires a loss operation with name matching the argument for
    loss_op_name in the graph. This loss operation should be broken out by
    example (i.e., not a single number for the entire batch).
    """
    g = tf.get_default_graph()

    with ops.name_scope("BinaryStochasticREINFORCE"):
        with g.gradient_override_map({"Sigmoid": "BinaryStochastic_REINFORCE",
                                      "Ceil": "Identity"}):
            p = tf.sigmoid(x)

            reinforce_collection = g.get_collection("REINFORCE")
            if not reinforce_collection:
                g.add_to_collection("REINFORCE", {})
                reinforce_collection = g.get_collection("REINFORCE")
            reinforce_collection[0][p.op.name] = loss_op_name

            return tf.ceil(p - tf.random_uniform(tf.shape(x)))
Пример #28
0
    def _resample_inv_dst_weighting(self, inputs, sample_coords):
        in_size = inputs.shape.as_list()
        in_spatial_size = in_size[1:-1]
        in_spatial_rank = infer_spatial_rank(inputs)
        out_rank = len(sample_coords.shape.as_list())

        self.N = 2 ** in_spatial_rank
        binary_neighbour_ids = [
            [int(c) for c in format(i, '0%ib' % in_spatial_rank)]
            for i in range(self.N)]
        weight_id = [[[c, i] for i, c in enumerate(bc)]
                     for bc in binary_neighbour_ids]

        sample_coords = tf.transpose(
            sample_coords, [out_rank - 1, 0] + list(range(1, out_rank - 1)))
        # broadcasting input spatial size for boundary functions
        b_size = tf.reshape(in_spatial_size,
                            [len(in_spatial_size)] + [1] * (out_rank - 1))
        # find floor and ceil coordinates
        all_coords_f = tf.stack([
            self.boundary_func(tf.floor(sample_coords), b_size),
            self.boundary_func(tf.ceil(sample_coords), b_size)])
        # find N weights associated to each output point
        diff = tf.stack(
            [tf.squared_difference(sample_coords - EPS, all_coords_f[0]),
             tf.squared_difference(sample_coords + EPS, all_coords_f[1])])

        # gather_nd for both matrices, the same as:
        # point_weights = tf.gather_nd(diff, weight_id)
        # knots_id = tf.gather_nd(all_coords_f, weight_id)
        n_val = tf.gather_nd(tf.stack([diff, all_coords_f], axis=-1), weight_id)
        n_val = tf.unstack(n_val, axis=-1)
        point_weights, knots_id = n_val[0], n_val[1]

        # inverse distance weighting
        # sum_i (w_i*p_i/(sum_j w_j)) w_i = 1/((p-p_i)^2)
        # point_weights shape:
        # `[N, input_rank, b, sp_dim_0, ..., sp_dim_K]`
        # where:
        #  `N` is 2**source data spatial rank
        #  `b` is batch size,
        #  `sp_dim_0` is the output spatial output 0,
        #
        # `point_weights` represents (p - p_i)^2
        #      with i= 0...2**source_rank neighbours
        # (to do: these operations could be refactored as a resampling kernel)
        point_weights = tf.reduce_sum(point_weights, axis=1)
        # skip this as power = 2.0:
        # self.power = 1.0
        # point_weights = tf.pow(point_weights, self.power / 2.0)
        point_weights = tf.reciprocal(point_weights)
        point_weights = point_weights / tf.reduce_sum(point_weights, axis=0)

        # find N neighbours associated to each output point
        knots_id = tf.transpose(tf.cast(knots_id, COORDINATES_TYPE),
                                [0] + list(range(2, out_rank + 1)) + [1])
        # get values of N neighbours
        samples = [
            tf.gather_nd(img, knots) for (img, knots) in
            zip(tf.unstack(inputs, axis=0), tf.unstack(knots_id, axis=1))]
        samples = tf.stack(samples, axis=1)

        # weighted average over N neighbours
        return tf.reduce_sum(
            samples * tf.expand_dims(point_weights, axis=-1), axis=0)
Пример #29
0
def resize_to_range(image,
                    label=None,
                    min_size=None,
                    max_size=None,
                    factor=None,
                    align_corners=True,
                    label_layout_is_chw=False,
                    scope=None,
                    method=tf.image.ResizeMethod.BILINEAR):
    """Resizes image or label so their sides are within the provided range.

  The output size can be described by two cases:
  1. If the image can be rescaled so its minimum size is equal to min_size
     without the other side exceeding max_size, then do so.
  2. Otherwise, resize so the largest side is equal to max_size.

  An integer in `range(factor)` is added to the computed sides so that the
  final dimensions are multiples of `factor` plus one.

  Args:
    image: A 3D tensor of shape [height, width, channels].
    label: (optional) A 3D tensor of shape [height, width, channels] (default)
      or [channels, height, width] when label_layout_is_chw = True.
    min_size: (scalar) desired size of the smaller image side.
    max_size: (scalar) maximum allowed size of the larger image side. Note
      that the output dimension is no larger than max_size and may be slightly
      smaller than min_size when factor is not None.
    factor: Make output size multiple of factor plus one.
    align_corners: If True, exactly align all 4 corners of input and output.
    label_layout_is_chw: If true, the label has shape [channel, height, width].
      We support this case because for some instance segmentation dataset, the
      instance segmentation is saved as [num_instances, height, width].
    scope: Optional name scope.
    method: Image resize method. Defaults to tf.image.ResizeMethod.BILINEAR.

  Returns:
    A 3-D tensor of shape [new_height, new_width, channels], where the image
    has been resized (with the specified method) so that
    min(new_height, new_width) == ceil(min_size) or
    max(new_height, new_width) == ceil(max_size).

  Raises:
    ValueError: If the image is not a 3D tensor.
  """
    with tf.name_scope(scope, 'resize_to_range', [image]):
        new_tensor_list = []
        min_size = tf.cast(min_size, tf.float32)
        if max_size is not None:
            max_size = tf.cast(max_size, tf.float32)
            # Modify the max_size to be a multiple of factor plus 1 and make sure the
            # max dimension after resizing is no larger than max_size.
            if factor is not None:
                max_size = (max_size + (factor - (max_size - 1) % factor) % factor
                            - factor)

        [orig_height, orig_width, _] = resolve_shape(image, rank=3)
        orig_height = tf.cast(orig_height, tf.float32)
        orig_width = tf.cast(orig_width, tf.float32)
        orig_min_size = tf.minimum(orig_height, orig_width)

        # Calculate the larger of the possible sizes
        large_scale_factor = min_size / orig_min_size
        large_height = tf.to_int32(tf.ceil(orig_height * large_scale_factor))
        large_width = tf.to_int32(tf.ceil(orig_width * large_scale_factor))
        large_size = tf.stack([large_height, large_width])

        new_size = large_size
        if max_size is not None:
            # Calculate the smaller of the possible sizes, use that if the larger
            # is too big.
            orig_max_size = tf.maximum(orig_height, orig_width)
            small_scale_factor = max_size / orig_max_size
            small_height = tf.to_int32(tf.ceil(orig_height * small_scale_factor))
            small_width = tf.to_int32(tf.ceil(orig_width * small_scale_factor))
            small_size = tf.stack([small_height, small_width])
            new_size = tf.cond(
                tf.cast(tf.reduce_max(large_size), tf.float32) > max_size,
                lambda: small_size,
                lambda: large_size)
        # Ensure that both output sides are multiples of factor plus one.
        if factor is not None:
            new_size += (factor - (new_size - 1) % factor) % factor
        new_tensor_list.append(tf.image.resize_images(
            image, new_size, method=method, align_corners=align_corners))
        if label is not None:
            if label_layout_is_chw:
                # Input label has shape [channel, height, width].
                resized_label = tf.expand_dims(label, 3)
                resized_label = tf.image.resize_nearest_neighbor(
                    resized_label, new_size, align_corners=align_corners)
                resized_label = tf.squeeze(resized_label, 3)
            else:
                # Input label has shape [height, width, channel].
                resized_label = tf.image.resize_images(
                    label, new_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                    align_corners=align_corners)
            new_tensor_list.append(resized_label)
        else:
            new_tensor_list.append(None)
        return new_tensor_list
Пример #30
0
def depth_compress(l_args):
    """Compresses an image."""
    # Load input image and add batch dimension.
    x_rgbd = load_image_rgbd(l_args.input)
    x_rgbd = tf.expand_dims(x_rgbd, 0)
    x_rgbd.set_shape([1, None, None, 4])
    # ======================== Input image dim should be multiple of 16
    x_shape = tf.shape(x_rgbd)
    x_shape = tf.ceil(x_shape / 16) * 16
    x_rgbd = tf.image.resize_images(x_rgbd, (x_shape[1], x_shape[2]))
    # ========================
    # Transform and compress the image, then remove batch dimension.
    x, depth = tf.split(x_rgbd, [3, 1], 3)

    y = depth_analysis_transform_3(x, depth, l_args.num_filters)

    entropy_bottleneck = tfc.EntropyBottleneck()
    string = entropy_bottleneck.compress(y)
    string = tf.squeeze(string, axis=0)

    # Transform the quantized image back (if requested).
    y_hat, likelihoods = entropy_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat, l_args.num_filters)

    num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))

    # Total number of bits divided by number of pixels.
    eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Bring both images back to 0..255 range.
    x *= 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)

    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(
            checkpoint_dir=l_args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        string, x_shape, y_shape = sess.run([string, tf.shape(x), tf.shape(y)])

        # Write a binary file with the shape information and the compressed string.
        with open(l_args.output, "wb") as f:
            f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes())
            f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes())
            f.write(string)

        # If requested, transform the quantized image back and measure performance.

        eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
            [eval_bpp, mse, psnr, msssim, num_pixels])

        # The actual bits per pixel including overhead.
        bpp = (8 + len(string)) * 8 / num_pixels

        print("Mean squared error: {:0.4f}".format(mse))
        print("PSNR (dB): {:0.2f}".format(psnr))
        print("Multiscale SSIM: {:0.4f}".format(msssim))
        print("Multiscale SSIM (dB): {:0.2f}".format(-10 *
                                                     np.log10(1 - msssim)))
        print("Information content in bpp: {:0.4f}".format(eval_bpp))
        print("Actual bits per pixel: {:0.4f}".format(bpp))
        msssim_db = (-10 * np.log10(1 - msssim))

    return mse, psnr, msssim, msssim_db, eval_bpp, bpp
Пример #31
0
    def body1(self, num, objectNum, loss, predict, labels, nilboy):
        '''
        Calculate loss.

        Args:
            num: spedify which image is to be processed
            objectNum: #objects in an image
            loss: [class loss, object loss, no object loss, coord loss]
            predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
            labels: [max_objects, 5]  (x_center, y_center, w, h, class)
                --- > class and coord
                --- > x_center is the x value of resized image. the same to y_center
            nilboy: has/no objects
        '''
        #Get label form labels by the varibale num
        label = labels[num]
        label = tf.reshape(label, [-1])

        minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize)
        maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize)
        minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize)
        maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize)

        #Determine which cell is the object belongs to.
        minX = tf.floor(minX)
        minY = tf.floor(minY)
        maxX = tf.ceil(maxX)
        maxY = tf.ceil(maxY)

        #temp: if a cell contains an object, temp = 1, else 0
        temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32)
        objects = tf.ones(temp, tf.float32)

        #temp: if a cell doesn't contains an object, temp = 0
        #Which means pad it to S*S scale.
        temp = tf.cast(tf.stack(
            [minY, self.cellSize - maxY, minX, self.cellSize - maxX]),
                       dtype=tf.int32)
        temp = tf.reshape(temp, (2, 2))
        objects = tf.pad(objects, temp, 'CONSTANT')

        #Calculate which cell contains the center point of the object.
        centerX = label[0] / (self.imageSize / self.cellSize)
        centerX = tf.floor(centerX)
        centerY = label[1] / (self.imageSize / self.cellSize)
        centerY = tf.floor(centerY)
        response = tf.ones([1, 1], tf.float32)

        # pad to S*S scale.
        temp = tf.cast(tf.stack([
            centerY, self.cellSize - centerY - 1, centerX,
            self.cellSize - centerX - 1
        ]),
                       dtype=tf.int32)
        temp = tf.reshape(temp, (2, 2))
        response = tf.pad(response, temp, 'CONSTANT')

        #predictBoxes: predicted boxes
        predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:]

        # 7 * 7 * 2 * 4
        predictBoxes = tf.reshape(
            predictBoxes, [self.cellSize, self.cellSize, self.boxesPerCell, 4])

        # get real size form 0-1 predicted size
        predictBoxes = predictBoxes * [
            self.imageSize / self.cellSize, self.imageSize / self.cellSize,
            self.imageSize, self.imageSize
        ]

        #grid cell coord
        baseBoxes = np.zeros([self.cellSize, self.cellSize, 4])

        for y in range(self.cellSize):
            for x in range(self.cellSize):
                baseBoxes[y, x, :] = [
                    self.imageSize / self.cellSize * x,
                    self.imageSize / self.cellSize * y, 0, 0
                ]

        #Make the shape of baseBoxes is the same with predictedBoxes.
        baseBoxes = np.tile(
            np.resize(baseBoxes, [self.cellSize, self.cellSize, 1, 4]),
            [1, 1, self.boxesPerCell, 1])

        # predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image.
        predictBoxes = baseBoxes + predictBoxes

        #iou for each cell 7 * 7 * 1
        iouPredictTruth = self.iou(predictBoxes, label[0:4])

        # filter out the cells that don't have objects
        C = iouPredictTruth * tf.reshape(response,
                                         [self.cellSize, self.cellSize, 1])

        #
        I = iouPredictTruth * tf.reshape(response,
                                         [self.cellSize, self.cellSize, 1])

        #get the maximum iou for each cell's boxes
        maxI = tf.reduce_max(I, 2, keepdims=True)

        # the max iou for the cell contains the center point
        I = tf.cast((I >= maxI), tf.float32) * tf.reshape(
            response, (self.cellSize, self.cellSize, 1))

        #noI: [cell size, cell size, boxes per cell]
        noI = tf.ones_like(I, dtype=tf.float32) - I

        # B confidences
        pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell]

        #real x center, y center
        x = label[0]
        y = label[1]

        sqrtW = tf.sqrt(tf.abs(label[2]))
        sqrtH = tf.sqrt(tf.abs(label[3]))

        # real predicted x center and y center
        pX = predictBoxes[:, :, :, 0]
        pY = predictBoxes[:, :, :, 1]

        #square root of predicted boxes' width and height
        pSqrtW = tf.sqrt(
            tf.minimum(self.imageSize * 1.0,
                       tf.maximum(0.0, predictBoxes[:, :, :, 2])))
        pSqrtH = tf.sqrt(
            tf.minimum(self.imageSize * 1.0,
                       tf.maximum(0.0, predictBoxes[:, :, :, 3])))

        # one hot encoding
        P = tf.one_hot(tf.cast(label[4], tf.int32),
                       self.numClasses,
                       dtype=tf.float32)

        #predict classes
        pP = predict[:, :, 0:self.numClasses]

        #classLoss: only cells containing objects
        classLoss = tf.nn.l2_loss(
            tf.reshape(objects, (self.cellSize, self.cellSize, 1)) *
            (pP - P)) * self.classScale

        #objectLoss: object center location loss
        objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale

        noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale

        coordLoss = (
            tf.nn.l2_loss(I * (pX - x) / (self.imageSize / self.cellSize)) +
            tf.nn.l2_loss(I * (pY - y) / (self.imageSize / self.cellSize)) +
            tf.nn.l2_loss(I * (pSqrtW - sqrtW)) / self.imageSize +
            tf.nn.l2_loss(I *
                          (pSqrtH - sqrtH)) / self.imageSize) + self.coordScale
        nilboy = I

        return num + 1, objectNum, [
            loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss,
            loss[3] + coordLoss
        ], predict, labels, nilboy
def calc_loss__slda__tensorflow_graph(
        param_vec=None,
        dim_P=None,
        dataset=None,
        convex_alpha_minus_1=None,
        tau=1.1,
        delta=0.1,
        lambda_w=0.001,
        weight_x=1.0,
        weight_y=1.0,
        weight_pi=1.0,
        return_dict=False,
        rescale_total_loss_by_n_tokens=True,
        frac_train_laps_completed=1.0,
        pi_max_iters_first_train_lap=DefaultDocTopicOptKwargs['pi_max_iters'],
        pi_max_iters=DefaultDocTopicOptKwargs['pi_max_iters'],
        active_proba_thr=0.005,
        **unused_kwargs):
    ''' Compute log probability of bow dataset under topic model.

    Returns
    -------
    log_proba : avg. log probability of dataset under provided LDA model.
        Scaled by number of docs in the dataset.
    '''
    # Unpack dataset
    doc_indptr_Dp1 = dataset['doc_indptr_Dp1']
    word_id_U = dataset['word_id_U']
    word_ct_U = dataset['word_ct_U']
    n_docs = dataset['n_docs']
    y_DC = dataset['y_DC']
    y_rowmask = dataset['y_rowmask']
    
    ## Unpack params
    assert param_vec is not None
    param_dict = _unflatten_to_common_param_dict__tf_graph(param_vec, **dim_P)
    topics_KV = param_dict['topics_KV']
    w_CK = param_dict['w_CK']
    K, _ = topics_KV.get_shape().as_list()
    C, _ = w_CK.get_shape().as_list()

    ## Establish kwargs for pi optimization step
    # Use 'ramp up' strategy to gradually increase per-doc iteration costs.
    # At first, perform only pi_max_iters_first_train_lap.
    # Linearly increase until reaching pi_max_iters,
    # which is designed to happen 50% of way through training.
    #
    # frac_progress : float within (0.0, 1.0)
    #     0.0 when frac_lap == 0
    #     0.5 when frac_lap == 0.25
    #     1.0 when frac_lap >= 0.5
    # cur_pi_max_iters : int
    #     Number of pi iters to run now
    assert pi_max_iters_first_train_lap <= pi_max_iters
    frac_progress = tf.minimum(
        tf.cast(1.0, tf.float64),
        2.0 * frac_train_laps_completed)
    cur_pi_max_iters = tf.cast(
        pi_max_iters_first_train_lap
        + tf.ceil(frac_progress * (pi_max_iters - pi_max_iters_first_train_lap)),
        tf.int32)
    # Pack up into the kwargs handed to pi optimization
    pi_opt_kwargs = dict(**DefaultDocTopicOptKwargs)
    pi_opt_kwargs['pi_max_iters'] = cur_pi_max_iters

    def has_docs_left(
            d, avg_log_proba_x, avg_log_proba_y,
            avg_log_proba_pi, pi_arr, y_arr):
        return d < n_docs
    def update_doc(
            d, avg_log_proba_x, avg_log_proba_y,
            avg_log_proba_pi, pi_arr, y_arr):
        start_d = doc_indptr_Dp1[d]
        stop_d = doc_indptr_Dp1[d+1]
        word_id_d_Ud = word_id_U[start_d:stop_d]
        word_ct_d_Ud = word_ct_U[start_d:stop_d]
        pi_d_K, topics_KUd, _, _ = \
            _calc_nef_map_pi_d_K__tensorflow_graph(
                _word_id_d_Ud=word_id_d_Ud,
                _word_ct_d_Ud=word_ct_d_Ud,
                _topics_KV=topics_KV,
                convex_alpha_minus_1=convex_alpha_minus_1,
                **pi_opt_kwargs)
        pi_arr = pi_arr.write(d, pi_d_K)
        avg_log_proba_pi_d = weight_pi * tf.reduce_sum(
            convex_alpha_minus_1 * tf.log(1e-9 + pi_d_K))
        avg_log_proba_x_d = tf.reduce_sum(
            word_ct_d_Ud * 
            tf.log(tf.matmul(tf.reshape(pi_d_K, (1,K)), topics_KUd)))
        avg_log_proba_x_d += (
            tf.lgamma(1.0 + tf.reduce_sum(word_ct_d_Ud))
            - tf.reduce_sum(tf.lgamma(1.0 + word_ct_d_Ud)))

        log_proba_y_d_C = tf.reduce_sum(
            w_CK * tf.reshape(pi_d_K, (1,K)), axis=1)
        avg_log_proba_y_d = tf.cond(
            y_rowmask[d] > 0,
            lambda: -1.0 * tf.reduce_sum(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=log_proba_y_d_C, labels=y_DC[d])),
            lambda: tf.constant(0.0, dtype=tf.float64))
        y_arr = y_arr.write(d, tf.sigmoid(log_proba_y_d_C))
        return (
            d+1,
            avg_log_proba_x + weight_x * avg_log_proba_x_d,
            avg_log_proba_y + weight_y * avg_log_proba_y_d,
            avg_log_proba_pi + avg_log_proba_pi_d,
            pi_arr,
            y_arr)

    _avg_log_proba_x = tf.constant(0.0, dtype=tf.float64)
    _avg_log_proba_y = tf.constant(0.0, dtype=tf.float64)
    _avg_log_proba_pi = tf.constant(0.0, dtype=tf.float64)
    _K = tf.cast(K, tf.float64)
    _convex_alpha_minus_1 = tf.cast(convex_alpha_minus_1, tf.float64)
    _d = 0
    _pi_arr = tf.TensorArray(dtype=tf.float64, size=n_docs) 
    _y_arr = tf.TensorArray(dtype=tf.float64, size=n_docs) 
    (_d, _avg_log_proba_x, _avg_log_proba_y, _avg_log_proba_pi,
        _pi_arr, _y_arr) = tf.while_loop(
            has_docs_left,
            update_doc,
            loop_vars=[
                _d, _avg_log_proba_x, _avg_log_proba_y, 
                _avg_log_proba_pi, _pi_arr, _y_arr])
    _pi_DK = tf.reshape(_pi_arr.concat(), (n_docs, K))
    _y_proba_DC = tf.reshape(_y_arr.concat(), (n_docs, C))

    _avg_log_proba_topics = (tau - 1.0) * tf.reduce_sum(tf.log(topics_KV))
    _avg_log_proba_w = -1.0 * (
        weight_y * lambda_w * tf.reduce_sum(tf.square(w_CK)))

    scale_ttl = tf.reduce_sum(word_ct_U)
    _avg_log_proba_x /= scale_ttl
    _avg_log_proba_pi /= scale_ttl
    _avg_log_proba_y /= scale_ttl
    _avg_log_proba_topics /= scale_ttl
    _avg_log_proba_w /= scale_ttl

    return (
        -1.0 * _avg_log_proba_x,
        -1.0 * _avg_log_proba_y,
        -1.0 * _avg_log_proba_pi,
        -1.0 * _avg_log_proba_topics,
        -1.0 * _avg_log_proba_w,
        _pi_DK,
        _y_proba_DC)
Пример #33
0
 def compute_num_leapfrog_steps(self, step_size):
     return tf.cast(tf.ceil(self.trajectory_length / step_size), tf.int64)
Пример #34
0
def percentile(x,
               q,
               axis=None,
               interpolation=None,
               keep_dims=False,
               validate_args=False,
               name=None):
  """Compute the `q`-th percentile(s) of `x`.

  Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the
  way from the minimum to the maximum in a sorted copy of `x`.

  The values and distances of the two nearest neighbors as well as the
  `interpolation` parameter will determine the percentile if the normalized
  ranking does not match the location of `q` exactly.

  This function is the same as the median if `q = 50`, the same as the minimum
  if `q = 0` and the same as the maximum if `q = 100`.

  Multiple percentiles can be computed at once by using `1-D` vector `q`.
  Dimension zero of the returned `Tensor` will index the different percentiles.


  ```python
  # Get 30th percentile with default ('nearest') interpolation.
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=30.)
  ==> 2.0

  # Get 30th and 70th percentiles with 'lower' interpolation
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=[30., 70.], interpolation='lower')
  ==> [1., 3.]

  # Get 100th percentile (maximum).  By default, this is computed over every dim
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100.)
  ==> 4.

  # Treat the leading dim as indexing samples, and find the 100th quantile (max)
  # over all such samples.
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100., axis=[0])
  ==> [3., 4.]
  ```

  Compare to `numpy.percentile`.

  Args:
    x:  Floating point `N-D` `Tensor` with `N > 0`.  If `axis` is not `None`,
      `x` must have statically known number of dimensions.
    q:  Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s).
    axis:  Optional `0-D` or `1-D` integer `Tensor` with constant values. The
      axis that hold independent samples over which to return the desired
      percentile.  If `None` (the default), treat every dimension as a sample
      dimension, returning a scalar.
    interpolation : {'lower', 'higher', 'nearest'}.  Default: 'nearest' This
      optional parameter specifies the interpolation method to
      use when the desired quantile lies between two data points `i < j`:
        * lower: `i`.
        * higher: `j`.
        * nearest: `i` or `j`, whichever is nearest.
    keep_dims:  Python `bool`. If `True`, the last dimension is kept with size 1
      If `False`, the last dimension is removed from the output shape.
    validate_args:  Whether to add runtime checks of argument validity. If
      False, and arguments are incorrect, correct behavior is not guaranteed.
    name:  A Python string name to give this `Op`.  Default is 'percentile'

  Returns:
    A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or,
      if `axis` is `None`, a `rank(q)` `Tensor`.  The first `rank(q)` dimensions
      index quantiles for different values of `q`.

  Raises:
    ValueError:  If argument 'interpolation' is not an allowed type.
  """
  name = name or 'percentile'
  allowed_interpolations = {'lower', 'higher', 'nearest'}

  if interpolation is None:
    interpolation = 'nearest'
  else:
    if interpolation not in allowed_interpolations:
      raise ValueError('Argument `interpolation` must be in %s.  Found %s' %
                       (allowed_interpolations, interpolation))

  with tf.name_scope(name, values=[x, q]):
    x = tf.convert_to_tensor(x, name='x')
    # Double is needed here and below, else we get the wrong index if the array
    # is huge along axis.
    q = tf.to_double(q, name='q')
    _get_static_ndims(q, expect_ndims_no_more_than=1)

    if validate_args:
      q = control_flow_ops.with_dependencies([
          tf.assert_rank_in(q, [0, 1]),
          tf.assert_greater_equal(q, tf.to_double(0.)),
          tf.assert_less_equal(q, tf.to_double(100.))
      ], q)

    if axis is None:
      y = tf.reshape(x, [-1])
    else:
      axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32)
      tf.assert_integer(axis)
      axis_ndims = _get_static_ndims(
          axis, expect_static=True, expect_ndims_no_more_than=1)
      axis_const = tensor_util.constant_value(axis)
      if axis_const is None:
        raise ValueError(
            'Expected argument `axis` to be statically available.  Found: %s' %
            axis)
      axis = axis_const
      if axis_ndims == 0:
        axis = [axis]
      axis = [int(a) for a in axis]
      x_ndims = _get_static_ndims(
          x, expect_static=True, expect_ndims_at_least=1)
      axis = _make_static_axis_non_negative(axis, x_ndims)
      # Move dims in axis to the end, since _sort_tensor, which calls top_k,
      # only sorts the last dim.
      y = _move_dims_to_flat_end(x, axis, x_ndims)

    frac_at_q_or_above = 1. - q / 100.
    d = tf.to_double(tf.shape(y)[-1])

    if interpolation == 'lower':
      indices = tf.ceil((d - 1) * frac_at_q_or_above)
    elif interpolation == 'higher':
      indices = tf.floor((d - 1) * frac_at_q_or_above)
    elif interpolation == 'nearest':
      indices = tf.round((d - 1) * frac_at_q_or_above)

    # If d is gigantic, then we would have d == d - 1, even in double... So
    # let's use max/min to avoid out of bounds errors.
    d = tf.shape(y)[-1]
    # d - 1 will be distinct from d in int32.
    indices = tf.clip_by_value(tf.to_int32(indices), 0, d - 1)

    # Sort everything, not just the top 'k' entries, which allows multiple calls
    # to sort only once (under the hood) and use CSE.
    sorted_y = _sort_tensor(y)

    # Gather the indices along the sorted (last) dimension.
    # If q is a vector, the last dim of gathered_y indexes different q[i].
    gathered_y = tf.gather(sorted_y, indices, axis=-1)

    if keep_dims:
      if axis is None:
        ones_vec = tf.ones(
            shape=[_get_best_effort_ndims(x) + _get_best_effort_ndims(q)],
            dtype=tf.int32)
        gathered_y *= tf.ones(ones_vec, dtype=x.dtype)
      else:
        gathered_y = _insert_back_keep_dims(gathered_y, axis)

    # If q is a scalar, then result has the right shape.
    # If q is a vector, then result has trailing dim of shape q.shape, which
    # needs to be rotated to dim 0.
    return util.rotate_transpose(gathered_y, tf.rank(q))
Пример #35
0
def auto_correlation(x,
                     axis=-1,
                     max_lags=None,
                     center=True,
                     normalize=True,
                     name='auto_correlation'):
  """Auto correlation along one axis.

  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)

  ```
  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
  W[n]   := (X[n] - MU) / S,
  MU     := E{ X[0] },
  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
  ```

  This function takes the viewpoint that `x` is (along one axis) a finite
  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
  estimate of `RXX[m]` as follows:

  After extending `x` from length `L` to `inf` by zero padding, the auto
  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as

  ```
  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
  w[n]   := (x[n] - mu) / s,
  mu     := L**-1 sum_n x[n],
  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
  ```

  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
  often set `max_lags` small enough so that the entire output is meaningful.

  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.

  Args:
    x:  `float32` or `complex64` `Tensor`.
    axis:  Python `int`. The axis number along which to compute correlation.
      Other dimensions index different batch members.
    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider (in
      equation above).  If `max_lags >= x.shape[axis]`, we effectively re-set
      `max_lags` to `x.shape[axis] - 1`.
    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
      from `x[n]` when forming `w[n]`.
    normalize:  Python `bool`.  If `False`, do not divide by the variance
      estimate `s**2` when forming `w[n]`.
    name:  `String` name to prepend to created ops.

  Returns:
    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.

  Raises:
    TypeError:  If `x` is not a supported type.
  """
  # Implementation details:
  # Extend length N / 2 1-D array x to length N by zero padding onto the end.
  # Then, set
  #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
  # It is not hard to see that
  #   F[x]_k Conj(F[x]_k) = F[R]_k, where
  #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
  # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].

  # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
  # based version of estimating RXX.
  # Note that this is a special case of the Wiener-Khinchin Theorem.
  with tf.name_scope(name, values=[x]):
    x = tf.convert_to_tensor(x, name='x')

    # Rotate dimensions of x in order to put axis at the rightmost dim.
    # FFT op requires this.
    rank = util.prefer_static_rank(x)
    if axis < 0:
      axis = rank + axis
    shift = rank - 1 - axis
    # Suppose x.shape[axis] = T, so there are T 'time' steps.
    #   ==> x_rotated.shape = B + [T],
    # where B is x_rotated's batch shape.
    x_rotated = util.rotate_transpose(x, shift)

    if center:
      x_rotated -= tf.reduce_mean(x_rotated, axis=-1, keepdims=True)

    # x_len = N / 2 from above explanation.  The length of x along axis.
    # Get a value for x_len that works in all cases.
    x_len = util.prefer_static_shape(x_rotated)[-1]

    # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
    # the moment is necessary so that all FFT implementations work.
    # Zero pad to the next power of 2 greater than 2 * x_len, which equals
    # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
    x_len_float64 = tf.cast(x_len, np.float64)
    target_length = tf.pow(
        np.float64(2.), tf.ceil(tf.log(x_len_float64 * 2) / np.log(2.)))
    pad_length = tf.cast(target_length - x_len_float64, np.int32)

    # We should have:
    # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
    #                     = B + [T + pad_length]
    x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length)

    dtype = x.dtype
    if not dtype.is_complex:
      if not dtype.is_floating:
        raise TypeError('Argument x must have either float or complex dtype'
                        ' found: {}'.format(dtype))
      x_rotated_pad = tf.complex(x_rotated_pad,
                                 dtype.real_dtype.as_numpy_dtype(0.))

    # Autocorrelation is IFFT of power-spectral density (up to some scaling).
    fft_x_rotated_pad = tf.fft(x_rotated_pad)
    spectral_density = fft_x_rotated_pad * tf.conj(fft_x_rotated_pad)
    # shifted_product is R[m] from above detailed explanation.
    # It is the inner product sum_n X[n] * Conj(X[n - m]).
    shifted_product = tf.ifft(spectral_density)

    # Cast back to real-valued if x was real to begin with.
    shifted_product = tf.cast(shifted_product, dtype)

    # Figure out if we can deduce the final static shape, and set max_lags.
    # Use x_rotated as a reference, because it has the time dimension in the far
    # right, and was created before we performed all sorts of crazy shape
    # manipulations.
    know_static_shape = True
    if not x_rotated.shape.is_fully_defined():
      know_static_shape = False
    if max_lags is None:
      max_lags = x_len - 1
    else:
      max_lags = tf.convert_to_tensor(max_lags, name='max_lags')
      max_lags_ = tensor_util.constant_value(max_lags)
      if max_lags_ is None or not know_static_shape:
        know_static_shape = False
        max_lags = tf.minimum(x_len - 1, max_lags)
      else:
        max_lags = min(x_len - 1, max_lags_)

    # Chop off the padding.
    # We allow users to provide a huge max_lags, but cut it off here.
    # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
    shifted_product_chopped = shifted_product[..., :max_lags + 1]

    # If possible, set shape.
    if know_static_shape:
      chopped_shape = x_rotated.shape.as_list()
      chopped_shape[-1] = min(x_len, max_lags + 1)
      shifted_product_chopped.set_shape(chopped_shape)

    # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
    # other terms were zeros arising only due to zero padding.
    # `denominator = (N / 2 - m)` (defined below) is the proper term to
    # divide by to make this an unbiased estimate of the expectation
    # E[X[n] Conj(X[n - m])].
    x_len = tf.cast(x_len, dtype.real_dtype)
    max_lags = tf.cast(max_lags, dtype.real_dtype)
    denominator = x_len - tf.range(0., max_lags + 1.)
    denominator = tf.cast(denominator, dtype)
    shifted_product_rotated = shifted_product_chopped / denominator

    if normalize:
      shifted_product_rotated /= shifted_product_rotated[..., :1]

    # Transpose dimensions back to those of x.
    return util.rotate_transpose(shifted_product_rotated, -shift)
Пример #36
0
def selection_margin(masks, margin):
    selection = tf.nn.conv2d(masks,
                             tf.ones([margin * 2 + 1, margin * 2 + 1, 1, 1]),
                             [1, 1, 1, 1], 'SAME')
    selection = tf.clip_by_value(tf.abs(tf.ceil(selection)), 0, 1)
    return selection
Пример #37
0
            def preprocess_image_tf(filename, bbox_tensor, keypoints_tensor, mask, D=D):
                """
                Returns:
                resized_image (N,D,D,3) - cropped, padded (if needed), scaled to square image of size D
                resized_mask (N,D,D,1) - cropped, padded (if needed), scaled to square mask of size D
                pts (N,2,17) - keypoint coordinates (i,j) scaled to match up with resized_image
                labels (N,1,17) - values corresponding to pts: {0: invalid, 1:occluded, 2:valid}
                """
                image_string = tf.read_file(filename)
                image_decoded = tf.image.decode_jpeg(image_string, channels=3)
                image = tf.cast(image_decoded, tf.float32)
                
                # subtract mean
                image = tf.subtract(image, tf.reduce_mean(image))

                mask = tf.transpose([mask],[1,2,0])
                bbox_tensor = tf.to_float(bbox_tensor)
                keypoints_tensor = tf.to_float(keypoints_tensor)

                sideLength = tf.reduce_max(bbox_tensor[2:],axis=0)
                centerX = tf.floor(bbox_tensor[0] + tf.divide(bbox_tensor[2],tf.constant(2.0)))
                centerY = tf.floor(bbox_tensor[1] + tf.divide(bbox_tensor[3],tf.constant(2.0)))
                center = tf.stack([centerX,centerY])

                corner1 = tf.to_int32(tf.minimum(tf.maximum(tf.subtract(center, tf.divide(sideLength,tf.constant(2.0))),0),
                                    tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
                corner2 = tf.to_int32(tf.minimum(tf.maximum(tf.add(center, tf.divide(sideLength,tf.constant(2.0))),0),
                                    tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
                i_shape = tf.subtract(corner2,corner1)
                d_shape = tf.subtract(tf.to_int32(sideLength),i_shape)

                scale = tf.divide(tf.constant(D,tf.float32), sideLength)
                cropped_image = tf.image.crop_to_bounding_box(image,corner1[1],corner1[0],
                                                            tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])
                cropped_mask = tf.image.crop_to_bounding_box(mask,corner1[1],corner1[0],
                                                            tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])

                dX = tf.floor(tf.divide(d_shape,tf.constant(2)))
                dY = tf.ceil(tf.divide(d_shape,tf.constant(2)))

                pts, labels = tf.split(keypoints_tensor,[2,1],axis=1)
                pts = tf.subtract(pts,tf.to_float(corner1)) # shift keypoints
                pts = tf.add(pts,tf.to_float(dX)) # shift keypoints
                pts = tf.multiply(pts,scale) # scale keypoints

                # set invalid pts to 0
                valid = tf.less(pts,tf.constant(D,tf.float32))
                valid = tf.multiply(tf.to_int32(valid), tf.to_int32(tf.greater(pts,0)))
                pts = tf.multiply(pts,tf.to_float(valid))
                pts = tf.transpose(pts,[1,0])
                labels = tf.transpose(labels,[1,0])
                labels = tf.to_float(tf.greater_equal(labels, 2))

                padded_image = tf.image.pad_to_bounding_box(cropped_image,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                            tf.to_int32(sideLength),tf.to_int32(sideLength))
                padded_mask = tf.image.pad_to_bounding_box(cropped_mask,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                            tf.to_int32(sideLength),tf.to_int32(sideLength))

                # if image size is not square, set labels to zero (so loss will be zero padding won't affect training)
                is_padded = tf.reduce_min(tf.to_float(tf.less(dX, 1.0)))
                labels = is_padded * labels

                resized_image = tf.image.resize_images(padded_image,tf.constant([D,D]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                # resized_image = resized_image - VGG_MEAN
                resized_mask = tf.image.resize_images(padded_mask,tf.constant([D,D]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                return resized_image, resized_mask, pts, labels
Пример #38
0
def build_model(embedding, options):
    """ Builds the entire computational graph used for training
    """
    # description string: #words x #samples
    with tf.device('/gpu:0'):
        with tf.variable_scope('input'):
            x = tf.placeholder(tf.int64, shape=[None, None, None],
                               name='x')  # 3D vector batch,N and instances(before embedding)40*32*13
            x_mask = tf.placeholder(tf.float32, shape=[None, None], name='x_mask')  # mask batch,N
            y = tf.placeholder(tf.int64, shape=[None], name='y') #group actual
            ##TODO important    
            keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
            is_training = tf.placeholder(tf.bool, name='is_training')
            alpha_balance = tf.placeholder(tf.float32,[],name = 'alpha_balance')
            ##TODO important
            sequence_mask = tf.cast(tf.abs(tf.sign(x)), tf.float32)  # 3D
            n_timesteps = tf.shape(x)[0]  # time steps
            ##TODO word embedding
            emb = tf.nn.embedding_lookup(embedding, x)
            
    with tf.device('/gpu:0'):
        # fed into the input of BILSTM from the official document
        with tf.name_scope('sentence_enc'):
            batch = tf.shape(emb)[0] #32
            N = tf.shape(emb)[1] #40 N instances in a group
            word = tf.shape(emb)[2]  #13
            ##TODO make instances prediction through attention encoding and MLP
            with tf.variable_scope(name_or_scope='sentence_enc', reuse=tf.AUTO_REUSE):
                word_level_inputs = tf.reshape(emb, [batch * N, word, options['dim_word']])
                word_level_mask = tf.reshape(sequence_mask, [batch * N, word])
                ##TODO word level LSTM
                word_encoder_out = bilstm_filter(word_level_inputs, word_level_mask, keep_prob,prefix='sequence_encode', dim=options['dim'],is_training=is_training)  # output shape: batch*news,sequence,2*lstm_units(32*40)*12*600
                word_encoder_out = tf.concat(word_encoder_out, 2) * tf.expand_dims(word_level_mask, -1)  # h = [h->,h<-]
                ################################### TODO word-attention
                word_level_output = attention_v2(word_encoder_out, word_level_mask, name='word_attention', keep=keep_prob,r=10,is_training=is_training)
            
                if options['use_dropout']:
                    word_level_output = layers.dropout(word_level_output, keep_prob=keep_prob, is_training=is_training,seed=None)
                #32*N,D
            
                att = tf.reshape(word_level_output, [batch, N, 2*options['dim']])
                ##TODO att shape 32*40*600
        with tf.name_scope('instance_prediction'):
            logit = tf.layers.dense(word_level_output, 150,activation=tf.nn.tanh,use_bias=True,kernel_initializer=layers.xavier_initializer(uniform=True,seed=None,dtype=tf.float32),name='inst_temp', reuse=tf.AUTO_REUSE)
            if options['use_dropout']:
                logit = layers.dropout(logit, keep_prob=keep_prob, is_training=is_training,seed=None)
            
            pred_sig_ = tf.layers.dense(logit, 1, activation=None, use_bias=True,kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),name='inst_pred', reuse=tf.AUTO_REUSE)
            inst_pred = tf.nn.sigmoid(pred_sig_)#32*N,1, float32
            L = tf.reshape(inst_pred,[batch,N])
        
            """with tf.name_scope('instance_prediction'):
            mini_batch = tf.shape(att)[0]  #32
            N = tf.shape(att)[1] #N
            emb_size = tf.shape(att)[2]  #600/100
            D = att.get_shape().as_list()[-1]
            att_input = tf.reshape(att,[mini_batch*N, emb_size])  #32*N,600
            theta = tf.get_variable('theta', [D, 1],initializer=tf.random_normal_initializer(stddev=0.1))
            ##TODO make instances prediction through softmax(sigmoid) function
            inst_pred = tf.sigmoid(tf.matmul(att_input,theta))  #32*N,1
            L = tf.reshape(inst_pred,[mini_batch,N])  #32,N
            #print(inst_pred)"""
            ##TODO make group prediction through average instance predictions
            group_ = tf.reduce_sum(L,1)/tf.cast(N,tf.float32)  # Do the instance_pred average  32,
            #group_ = tf.reduce_mean(L,1)  
            group_pred = tf.cast(tf.ceil(group_-0.5),tf.int64)    #32,
            #################################################################
            ################################################################
            ###################################################      why group_pred all 0/1???
            
            ##TODO new cost
            logger.info('Building f_cost...')
            x_simil = Euclidean_distance(att)  #32,N,N   有placeholder
            l_diff = instance_diff(L) #32,N,N   有placeholder
            simil_cost = tf.reduce_sum(tf.multiply(x_simil,l_diff),[1,2])/tf.cast(N*N,tf.float32)  #32,
            #group_cost = tf.cast(tf.square(y-group_pred),tf.float32)  #32
            group_cost = tf.cast(-y*tf.log(group_pred) - (1-y)*tf.log(1-group_pred),tf.float32)     ## log_loss
            
            # cost由int64变为float32
            total_cost = simil_cost + alpha_balance * group_cost  #[32,1]
            cost = tf.reshape(total_cost,(1,-1))  #1,32
            
            
            """pred = tf.layers.dense(logit, 2, activation=None, use_bias=True,
                                kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),
                                name='fout', reuse=tf.AUTO_REUSE)#32,2
            labels = tf.one_hot(y, depth=2, axis=1)#32,2
            preds = tf.nn.softmax(pred, 1,name='softmax')  #32,2
            cost = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=labels)  #1,32"""

        logger.info('Done')


        with tf.variable_scope('logging'):
            tf.summary.scalar('current_cost', tf.reduce_mean(cost))
            tf.summary.histogram('predicted_value', group_pred)
            summary = tf.summary.merge_all()

    return is_training, cost, x, x_mask, y, n_timesteps, group_pred, summary  
Пример #39
0
    def _set_learning_rate(self):
        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)

        if self.args.learning_rate_strategy == 'FIXED':
            self.lr = tf.minimum(
                self.args.learning_rate,
                self.args.learning_rate / tf.log(999.) *
                tf.log(tf.cast(self.global_step, tf.float32) + 1))
        elif self.args.learning_rate_strategy == 'HALF_COSINE_MAX':
            # from snapshot paper
            t_m = tf.constant(
                ceil(self.args.learning_rate_reset_epoch *
                     self.args.num_total_samples / self.args.batch_size),
                dtype=tf.int32)

            self.lr = (self.args.learning_rate / 2.0) * (tf.cos(
                tf.constant(3.1415, tf.float32) *
                tf.cast(tf.mod(self.global_step, t_m), tf.float32) /
                tf.cast(t_m, tf.float32)) + 1.0)
        elif self.args.learning_rate_strategy == 'HALF_COSINE_ZERO':
            # from snapshot paper
            t_m = tf.constant(
                ceil(self.args.learning_rate_reset_epoch *
                     self.args.num_total_samples / self.args.batch_size),
                dtype=tf.int32)

            self.lr = (self.args.learning_rate / 2.0) * (1.0 - tf.cos(
                tf.constant(3.1415, tf.float32) *
                tf.cast(tf.mod(self.global_step, t_m), tf.float32) /
                tf.cast(t_m, tf.float32)))
        elif self.args.learning_rate_strategy == 'COSINE_ZERO':
            t_m = tf.constant(
                ceil(self.args.learning_rate_reset_epoch *
                     self.args.num_total_samples / self.args.batch_size),
                dtype=tf.int32)

            self.lr = (self.args.learning_rate / 2.0) * (1.0 - tf.cos(
                tf.constant(2 * 3.1415, tf.float32) *
                tf.cast(tf.mod(self.global_step, t_m), tf.float32) /
                tf.cast(t_m, tf.float32)))
        elif self.args.learning_rate_strategy == 'COSINE_MAX':
            t_m = tf.constant(
                ceil(self.args.learning_rate_reset_epoch *
                     self.args.num_total_samples / self.args.batch_size),
                dtype=tf.int32)

            self.lr = (self.args.learning_rate / 2.0) * (1.0 + tf.cos(
                tf.constant(2 * 3.1415, tf.float32) *
                tf.cast(tf.mod(self.global_step, t_m), tf.float32) /
                tf.cast(t_m, tf.float32)))
        elif self.args.learning_rate_strategy == 'COSINE_ZERO_DECAY':
            t_m = tf.constant(
                ceil(self.args.learning_rate_reset_epoch *
                     self.args.num_total_samples / self.args.batch_size),
                dtype=tf.int32)

            self.lr = (self.args.learning_rate /
                       tf.ceil(tf.cast(self.global_step, tf.float32) / tf.cast(t_m, tf.float32)) + 1) \
                      * (1.0 - tf.cos(tf.constant(2 * 3.1415, tf.float32) *
                                      tf.cast(tf.mod(self.global_step, t_m), tf.float32)
                                      / tf.cast(t_m, tf.float32)))
        elif self.args.learning_rate_strategy in ['CYCLE_LINEAR', 'CYCLE_SIN']:
            self.lr = tf.get_variable('lr',
                                      shape=[],
                                      dtype=tf.float32,
                                      initializer=tf.constant_initializer(
                                          self.args.learning_rate),
                                      trainable=False)
        else:
            raise NotImplementedError
def augment_pipeline(
    images,                         # Input images: NCHW, float32, dynamic range [-1,+1].
    labels,                         # Input labels.
    strength         = 1,           # Overall multiplier for augmentation probability; can be a Tensor.
    debug_percentile = None,        # Percentile value for visualizing parameter ranges; None = normal operation.

    # Pixel blitting.
    xflip            = 0,           # Probability multiplier for x-flip.
    rotate90         = 0,           # Probability multiplier for 90 degree rotations.
    xint             = 0,           # Probability multiplier for integer translation.
    xint_max         = 0.125,       # Range of integer translation, relative to image dimensions.

    # General geometric transformations.
    scale            = 0,           # Probability multiplier for isotropic scaling.
    rotate           = 0,           # Probability multiplier for arbitrary rotation.
    aniso            = 0,           # Probability multiplier for anisotropic scaling.
    xfrac            = 0,           # Probability multiplier for fractional translation.
    scale_std        = 0.2,         # Log2 standard deviation of isotropic scaling.
    rotate_max       = 1,           # Range of arbitrary rotation, 1 = full circle.
    aniso_std        = 0.2,         # Log2 standard deviation of anisotropic scaling.
    xfrac_std        = 0.125,       # Standard deviation of frational translation, relative to image dimensions.

    # Color transformations.
    brightness       = 0,           # Probability multiplier for brightness.
    contrast         = 0,           # Probability multiplier for contrast.
    lumaflip         = 0,           # Probability multiplier for luma flip.
    hue              = 0,           # Probability multiplier for hue rotation.
    saturation       = 0,           # Probability multiplier for saturation.
    brightness_std   = 0.2,         # Standard deviation of brightness.
    contrast_std     = 0.5,         # Log2 standard deviation of contrast.
    hue_max          = 1,           # Range of hue rotation, 1 = full circle.
    saturation_std   = 1,           # Log2 standard deviation of saturation.

    # Image-space filtering.
    imgfilter        = 0,           # Probability multiplier for image-space filtering.
    imgfilter_bands  = [1,1,1,1],   # Probability multipliers for individual frequency bands.
    imgfilter_std    = 1,           # Log2 standard deviation of image-space filter amplification.

    # Image-space corruptions.
    noise            = 0,           # Probability multiplier for additive RGB noise.
    cutout           = 0,           # Probability multiplier for cutout.
    noise_std        = 0.1,         # Standard deviation of additive RGB noise.
    cutout_size      = 0.5,         # Size of the cutout rectangle, relative to image dimensions.
):
    # Determine input shape.
    batch, channels, height, width = images.shape.as_list()
    if batch is None:
        batch = tf.shape(images)[0]

    # -------------------------------------
    # Select parameters for pixel blitting.
    # -------------------------------------

    # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in
    I_3 = tf.eye(3, batch_shape=[batch])
    G_inv = I_3

    # Apply x-flip with probability (xflip * strength).
    if xflip > 0:
        i = tf.floor(tf.random_uniform([batch], 0, 2))
        i = gate_augment_params(xflip * strength, i, 0)
        if debug_percentile is not None:
            i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2)
        G_inv @= scale_2d_inv(1 - 2 * i, 1)

    # Apply 90 degree rotations with probability (rotate90 * strength).
    if rotate90 > 0:
        i = tf.floor(tf.random_uniform([batch], 0, 4))
        i = gate_augment_params(rotate90 * strength, i, 0)
        if debug_percentile is not None:
            i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 4)
        G_inv @= rotate_2d_inv(-np.pi / 2 * i)

    # Apply integer translation with probability (xint * strength).
    if xint > 0:
        t = tf.random_uniform([batch, 2], -xint_max, xint_max)
        t = gate_augment_params(xint * strength, t, 0)
        if debug_percentile is not None:
            t = (tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xint_max
        G_inv @= translate_2d_inv(tf.rint(t[:,0] * width), tf.rint(t[:,1] * height))

    # --------------------------------------------------------
    # Select parameters for general geometric transformations.
    # --------------------------------------------------------

    # Apply isotropic scaling with probability (scale * strength).
    if scale > 0:
        s = 2 ** tf.random_normal([batch], 0, scale_std)
        s = gate_augment_params(scale * strength, s, 1)
        if debug_percentile is not None:
            s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * scale_std)
        G_inv @= scale_2d_inv(s, s)

    # Apply pre-rotation with probability p_rot.
    p_rot = 1 - tf.sqrt(tf.cast(tf.maximum(1 - rotate * strength, 0), tf.float32)) # P(pre OR post) = p
    if rotate > 0:
        theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max)
        theta = gate_augment_params(p_rot, theta, 0)
        if debug_percentile is not None:
            theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * rotate_max
        G_inv @= rotate_2d_inv(-theta) # Before anisotropic scaling.

    # Apply anisotropic scaling with probability (aniso * strength).
    if aniso > 0:
        s = 2 ** tf.random_normal([batch], 0, aniso_std)
        s = gate_augment_params(aniso * strength, s, 1)
        if debug_percentile is not None:
            s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * aniso_std)
        G_inv @= scale_2d_inv(s, 1 / s)

    # Apply post-rotation with probability p_rot.
    if rotate > 0:
        theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max)
        theta = gate_augment_params(p_rot, theta, 0)
        if debug_percentile is not None:
            theta = tf.zeros([batch])
        G_inv @= rotate_2d_inv(-theta) # After anisotropic scaling.

    # Apply fractional translation with probability (xfrac * strength).
    if xfrac > 0:
        t = tf.random_normal([batch, 2], 0, xfrac_std)
        t = gate_augment_params(xfrac * strength, t, 0)
        if debug_percentile is not None:
            t = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xfrac_std
        G_inv @= translate_2d_inv(t[:,0] * width, t[:,1] * height)

    # ----------------------------------
    # Execute geometric transformations.
    # ----------------------------------

    # Execute if the transform is not identity.
    if G_inv is not I_3:

        # Setup orthogonal lowpass filter.
        Hz = wavelets['sym6']
        Hz = np.asarray(Hz, dtype=np.float32)
        Hz = np.reshape(Hz, [-1, 1, 1]).repeat(channels, axis=1) # [tap, channel, 1]
        Hz_pad = Hz.shape[0] // 4

        # Calculate padding.
        cx = (width - 1) / 2
        cy = (height - 1) / 2
        cp = np.transpose([[-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1]]) # [xyz, idx]
        cp = G_inv @ cp[np.newaxis] # [batch, xyz, idx]
        cp = cp[:, :2, :] # [batch, xy, idx]
        m_lo = tf.ceil(tf.reduce_max(-cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2)
        m_hi = tf.ceil(tf.reduce_max( cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2)
        m_lo = tf.clip_by_value(m_lo, [0, 0], [width-1, height-1])
        m_hi = tf.clip_by_value(m_hi, [0, 0], [width-1, height-1])

        # Pad image and adjust origin.
        images = tf.transpose(images, [0, 2, 3, 1]) # NCHW => NHWC
        pad = [[0, 0], [m_lo[1], m_hi[1]], [m_lo[0], m_hi[0]], [0, 0]]
        images = tf.pad(tensor=images, paddings=pad, mode='REFLECT')
        T_in = translate_2d(cx + m_lo[0], cy + m_lo[1])
        T_out = translate_2d_inv(cx + Hz_pad, cy + Hz_pad)
        G_inv = T_in @ G_inv @ T_out

        # Upsample.
        shape = [batch, tf.shape(images)[1] * 2, tf.shape(images)[2] * 2, channels]
        images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[np.newaxis, :], out_backprop=images, strides=[1,2,2,1], padding='SAME', data_format='NHWC')
        images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[:, np.newaxis], out_backprop=images, strides=[1,1,1,1], padding='SAME', data_format='NHWC')
        G_inv = scale_2d(2, 2) @ G_inv @ scale_2d_inv(2, 2) # Account for the increased resolution.

        # Execute transformation.
        transforms = tf.reshape(G_inv, [-1, 9])[:, :8]
        shape = [(height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2]
        images = tf.contrib.image.transform(images=images, transforms=transforms, output_shape=shape, interpolation='BILINEAR')

        # Downsample and crop.
        images = tf.nn.depthwise_conv2d(input=images, filter=Hz[np.newaxis,:], strides=[1,1,1,1], padding='SAME', data_format='NHWC')
        images = tf.nn.depthwise_conv2d(input=images, filter=Hz[:,np.newaxis], strides=[1,2,2,1], padding='SAME', data_format='NHWC')
        images = images[:, Hz_pad : height + Hz_pad, Hz_pad : width + Hz_pad, :]
        images = tf.transpose(images, [0, 3, 1, 2]) # NHWC => NCHW

    # --------------------------------------------
    # Select parameters for color transformations.
    # --------------------------------------------

    # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out
    I_4 = tf.eye(4, batch_shape=[batch])
    C = I_4

    # Apply brightness with probability (brightness * strength).
    if brightness > 0:
        b = tf.random_normal([batch], 0, brightness_std)
        b = gate_augment_params(brightness * strength, b, 0)
        if debug_percentile is not None:
            b = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * brightness_std
        C = translate_3d(b, b, b) @ C

    # Apply contrast with probability (contrast * strength).
    if contrast > 0:
        c = 2 ** tf.random_normal([batch], 0, contrast_std)
        c = gate_augment_params(contrast * strength, c, 1)
        if debug_percentile is not None:
            c = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * contrast_std)
        C = scale_3d(c, c, c) @ C

    # Apply luma flip with probability (lumaflip * strength).
    v = np.array([1, 1, 1, 0]) / np.sqrt(3) # Luma axis.
    if lumaflip > 0:
        i = tf.floor(tf.random_uniform([batch], 0, 2))
        i = gate_augment_params(lumaflip * strength, i, 0)
        if debug_percentile is not None:
            i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2)
        i = tf.reshape(i, [batch, 1, 1])
        C = (I_4 - 2 * np.outer(v, v) * i) @ C # Householder reflection.

    # Apply hue rotation with probability (hue * strength).
    if hue > 0 and channels > 1:
        theta = tf.random_uniform([batch], -np.pi * hue_max, np.pi * hue_max)
        theta = gate_augment_params(hue * strength, theta, 0)
        if debug_percentile is not None:
            theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * hue_max
        C = rotate_3d(v, theta) @ C # Rotate around v.

    # Apply saturation with probability (saturation * strength).
    if saturation > 0 and channels > 1:
        s = 2 ** tf.random_normal([batch], 0, saturation_std)
        s = gate_augment_params(saturation * strength, s, 1)
        if debug_percentile is not None:
            s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * saturation_std)
        s = tf.reshape(s, [batch, 1, 1])
        C = (np.outer(v, v) + (I_4 - np.outer(v, v)) * s) @ C

    # ------------------------------
    # Execute color transformations.
    # ------------------------------

    # Execute if the transform is not identity.
    if C is not I_4:
        images = tf.reshape(images, [batch, channels, height * width])
        if channels == 3:
            images = C[:, :3, :3] @ images + C[:, :3, 3:]
        elif channels == 1:
            C = tf.reduce_mean(C[:, :3, :], axis=1, keepdims=True)
            images = images * tf.reduce_sum(C[:, :, :3], axis=2, keepdims=True) + C[:, :, 3:]
        else:
            raise ValueError('Image must be RGB (3 channels) or L (1 channel)')
        images = tf.reshape(images, [batch, channels, height, width])

    # ----------------------
    # Image-space filtering.
    # ----------------------

    if imgfilter > 0:
        num_bands = 4
        assert len(imgfilter_bands) == num_bands
        expected_power = np.array([10, 1, 1, 1]) / 13 # Expected power spectrum (1/f).

        # Apply amplification for each band with probability (imgfilter * strength * band_strength).
        g = tf.ones([batch, num_bands]) # Global gain vector (identity).
        for i, band_strength in enumerate(imgfilter_bands):
            t_i = 2 ** tf.random_normal([batch], 0, imgfilter_std)
            t_i = gate_augment_params(imgfilter * strength * band_strength, t_i, 1)
            if debug_percentile is not None:
                t_i = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * imgfilter_std) if band_strength > 0 else tf.ones([batch])
            t = tf.ones([batch, num_bands]) # Temporary gain vector.
            t = tf.concat([t[:, :i], t_i[:, np.newaxis], t[:, i+1:]], axis=-1) # Replace i'th element.
            t /= tf.sqrt(tf.reduce_sum(expected_power * tf.square(t), axis=-1, keepdims=True)) # Normalize power.
            g *= t # Accumulate into global gain.

        # Construct filter bank.
        Hz_lo = wavelets['sym2']
        Hz_lo = np.asarray(Hz_lo, dtype=np.float32)     # H(z)
        Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z)
        Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2    # H(z) * H(z^-1) / 2
        Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2    # H(-z) * H(-z^-1) / 2
        Hz_bands = np.eye(num_bands, 1)                 # Bandpass(H(z), b_i)
        for i in range(1, num_bands):
            Hz_bands = np.dstack([Hz_bands, np.zeros_like(Hz_bands)]).reshape(num_bands, -1)[:, :-1]
            Hz_bands = scipy.signal.convolve(Hz_bands, [Hz_lo2])
            Hz_bands[i, (Hz_bands.shape[1] - Hz_hi2.size) // 2 : (Hz_bands.shape[1] + Hz_hi2.size) // 2] += Hz_hi2

        # Construct combined amplification filter.
        Hz_prime = g @ Hz_bands # [batch, tap]
        Hz_prime = tf.transpose(Hz_prime) # [tap, batch]
        Hz_prime = tf.tile(Hz_prime[:, :, np.newaxis], [1, 1, channels]) # [tap, batch, channels]
        Hz_prime = tf.reshape(Hz_prime, [-1, batch * channels, 1]) # [tap, batch * channels, 1]

        # Apply filter.
        images = tf.reshape(images, [1, -1, height, width])
        pad = Hz_bands.shape[1] // 2
        pad = [[0,0], [0,0], [pad, pad], [pad, pad]]
        images = tf.pad(tensor=images, paddings=pad, mode='REFLECT')
        images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[np.newaxis,:], strides=[1,1,1,1], padding='VALID', data_format='NCHW')
        images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[:,np.newaxis], strides=[1,1,1,1], padding='VALID', data_format='NCHW')
        images = tf.reshape(images, [-1, channels, height, width])

    # ------------------------
    # Image-space corruptions.
    # ------------------------

    # Apply additive RGB noise with probability (noise * strength).
    if noise > 0:
        sigma = tf.abs(tf.random_normal([batch], 0, noise_std))
        sigma = gate_augment_params(noise * strength, sigma, 0)
        if debug_percentile is not None:
            sigma = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch])) * noise_std
        sigma = tf.reshape(sigma, [-1, 1, 1, 1])
        images += tf.random_normal([batch, channels, height, width]) * sigma

    # Apply cutout with probability (cutout * strength).
    if cutout > 0:
        size = tf.fill([batch, 2], cutout_size)
        size = gate_augment_params(cutout * strength, size, 0)
        center = tf.random_uniform([batch, 2], 0, 1)
        if debug_percentile is not None:
            size = tf.fill([batch, 2], cutout_size)
            center = tf.broadcast_to(debug_percentile, [batch, 2])
        size = tf.reshape(size, [batch, 2, 1, 1, 1])
        center = tf.reshape(center, [batch, 2, 1, 1, 1])
        coord_x = tf.reshape(tf.range(width, dtype=tf.float32), [1, 1, 1, width])
        coord_y = tf.reshape(tf.range(height, dtype=tf.float32), [1, 1, height, 1])
        mask_x = (tf.abs((coord_x + 0.5) / width - center[:, 0]) >= size[:, 0] / 2)
        mask_y = (tf.abs((coord_y + 0.5) / height - center[:, 1]) >= size[:, 1] / 2)
        mask = tf.cast(tf.logical_or(mask_x, mask_y), tf.float32)
        images *= mask

    return images, labels
Пример #41
0
def pad_to_multiple(tensor, multiple):
    """Returns the tensor zero padded to the specified multiple.

  Appends 0s to the end of the first and second dimension (height and width) of
  the tensor until both dimensions are a multiple of the input argument
  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
  be of shape [1, 4, 8, 1].

  Args:
    tensor: rank 4 float32 tensor, where
            tensor -> [batch_size, height, width, channels].
    multiple: the multiple to pad to.

  Returns:
    padded_tensor: the tensor zero padded to the specified multiple.
  """
    if multiple == 1:
        return tensor

    tensor_shape = tensor.get_shape()
    batch_size = static_shape.get_batch_size(tensor_shape)
    tensor_height = static_shape.get_height(tensor_shape)
    tensor_width = static_shape.get_width(tensor_shape)
    tensor_depth = static_shape.get_depth(tensor_shape)

    if batch_size is None:
        batch_size = tf.shape(tensor)[0]

    if tensor_height is None:
        tensor_height = tf.shape(tensor)[1]
        padded_tensor_height = tf.cast(tf.ceil(
            tf.cast(tensor_height, dtype=tf.float32) /
            tf.cast(multiple, dtype=tf.float32)),
                                       dtype=tf.int32) * multiple
    else:
        padded_tensor_height = int(
            math.ceil(float(tensor_height) / multiple) * multiple)

    if tensor_width is None:
        tensor_width = tf.shape(tensor)[2]
        padded_tensor_width = tf.cast(tf.ceil(
            tf.cast(tensor_width, dtype=tf.float32) /
            tf.cast(multiple, dtype=tf.float32)),
                                      dtype=tf.int32) * multiple
    else:
        padded_tensor_width = int(
            math.ceil(float(tensor_width) / multiple) * multiple)

    if tensor_depth is None:
        tensor_depth = tf.shape(tensor)[3]

    # Use tf.concat instead of tf.pad to preserve static shape
    if padded_tensor_height != tensor_height:
        height_pad = tf.zeros([
            batch_size, padded_tensor_height - tensor_height, tensor_width,
            tensor_depth
        ])
        tensor = tf.concat([tensor, height_pad], 1)
    if padded_tensor_width != tensor_width:
        width_pad = tf.zeros([
            batch_size, padded_tensor_height,
            padded_tensor_width - tensor_width, tensor_depth
        ])
        tensor = tf.concat([tensor, width_pad], 2)

    return tensor
Пример #42
0
def fm(tensor):
    return tf.ceil(tf.math.subtract(tensor, MASK_THRESHOLD))
Пример #43
0
def main():

    # Placeholders
    learning_rate = tf.placeholder(tf.float32)
    feature_seq = tf.placeholder(
        tf.float32, [args.batch_size, args.max_seqlen, args.feature_size])
    labels = tf.placeholder(tf.float32, [args.batch_size, args.num_class])

    seq_len = tf.cast(
        tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(feature_seq), axis=2)),
                      axis=1), tf.int32)
    fseq = feature_seq[:, :tf.reduce_max(seq_len), :]

    sgn = tf.sign(tf.reduce_sum(tf.abs(fseq), keep_dims=True, axis=2))
    seq_len = tf.cast(
        tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(fseq), axis=2)), axis=1),
        tf.int32)
    k = tf.cast(tf.ceil(tf.cast(seq_len, tf.float32) / 8), tf.int32)

    # Model
    with tf.device('/gpu:0'):
        with tf.variable_scope('Fully_Connected'):
            fc_W = _variable_with_weight_decay(
                'fc_w', [args.feature_size, args.feature_size], 0.0005)
            fc_b = _variable_with_weight_decay('fc_b', [args.feature_size],
                                               0.0000)
            feature = tf.matmul(
                fseq, tf.tile(tf.expand_dims(fc_W, 0),
                              [args.batch_size, 1, 1])) + fc_b
            feature = tf.nn.relu(feature)
            feature = tf.nn.dropout(feature, 0.3)

        with tf.variable_scope('Attention') as an:
            atn_W = _variable_with_weight_decay(
                'atn_w', [args.feature_size, args.num_class], 0.0005)
            atn_b = _variable_with_weight_decay('atn_b', [args.num_class],
                                                0.0000)
            temporal_logits = tf.matmul(
                feature,
                tf.tile(tf.expand_dims(atn_W, 0),
                        [args.batch_size, 1, 1])) + atn_b

        # MILL
        logits = []
        for i in range(args.batch_size):
            tmp, _ = tf.nn.top_k(tf.transpose(
                temporal_logits[i, :seq_len[i], :], [1, 0]),
                                 k=k[i])
            logits.append(tf.reduce_mean(tf.transpose(tmp, [1, 0]), axis=0))
        logits = tf.stack(logits)
        mill = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                    logits=logits))
        tf.add_to_collection('losses', mill * args.Lambda)
        tf.summary.scalar('MILL', mill)

        # CASL
        tmp = tf.exp(temporal_logits) * sgn
        attention = tf.div(tmp, tf.reduce_sum(tmp, axis=1, keep_dims=True))
        attn_classwise_feat = tf.matmul(tf.transpose(feature, [0, 2, 1]),
                                        attention)
        norm_comp_attention = sgn * (1 - attention) / tf.cast(
            tf.expand_dims(tf.expand_dims(tf.maximum(seq_len - 1, 1), axis=1),
                           axis=1), tf.float32)
        comp_attn_classwise_feat = tf.matmul(tf.transpose(feature, [0, 2, 1]),
                                             norm_comp_attention)
        casl, n_tmp = 0., 0.
        for i in range(0, args.num_similar * 2, 2):
            f1 = attn_classwise_feat[i, :, :]
            f2 = attn_classwise_feat[i + 1, :, :]
            f3 = comp_attn_classwise_feat[i, :, :]
            f4 = comp_attn_classwise_feat[i + 1, :, :]
            d1 = 1 - tf.reduce_sum(
                f1 * f2, axis=0) / (tf.norm(f1, axis=0) * tf.norm(f2, axis=0))
            d2 = 1 - tf.reduce_sum(
                f1 * f4, axis=0) / (tf.norm(f1, axis=0) * tf.norm(f4, axis=0))
            d3 = 1 - tf.reduce_sum(
                f2 * f3, axis=0) / (tf.norm(f2, axis=0) * tf.norm(f3, axis=0))
            casl = casl + tf.reduce_sum(
                tf.maximum(0., d1 - d2 + 0.5) * 0.5 *
                tf.cast(tf.greater(labels[i, :], 0), tf.float32) *
                tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32))
            casl = casl + tf.reduce_sum(
                tf.maximum(0., d1 - d3 + 0.5) * 0.5 *
                tf.cast(tf.greater(labels[i, :], 0), tf.float32) *
                tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32))
            n_tmp = n_tmp + tf.reduce_sum(
                tf.cast(tf.greater(labels[i, :], 0), tf.float32) *
                tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32))
        casl = casl / n_tmp
        tf.add_to_collection('losses', casl * (1 - args.Lambda))
        tf.summary.scalar('CASL', casl)

        total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
        tf.summary.scalar('Total Loss', total_loss)

        apply_gradient_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(total_loss)

    # Initialize tensorflow graph
    init = tf.global_variables_initializer()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(init)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./tensorboards/' + args.model_name,
                                         sess.graph)
    saver = tf.train.Saver(max_to_keep=200)

    # Start from scratch or load model
    if args.pretrained_ckpt is None:
        iter_num = 0
    else:
        iter_num = np.load('iter_num.npy')
        saver.restore(
            sess,
            tf.train.latest_checkpoint('./ckpt/' + args.pretrained_ckpt + '/'))

    # Initialize dataset
    dataset = Dataset(args)

    #Start training
    for i in range(iter_num, args.max_iter):

        # Train
        batch_feature_seq, batch_labels = dataset.load_data(
            n_similar=args.num_similar)
        batch_labels = batch_labels / np.sum(
            batch_labels, axis=1, keepdims=True)
        _, cost, sumry = sess.run(
            [apply_gradient_op, total_loss, merged],
            feed_dict={
                feature_seq: batch_feature_seq,
                labels: batch_labels,
                learning_rate: args.lr
            })
        train_writer.add_summary(sumry, i)

        print('Iteration: %d, Loss: %.5f' % (i, cost))

        if i % 500 == 0:
            #sumry = sess.run(merged, feed_dict={feature_seq: batch_feature_seq, labels:batch_labels, learning_rate: lr, keep_prob: None})
            #train_writer.add_summary(sumry, i)
            np.save('iter_num.npy', i)
            saver.save(sess,
                       './ckpt/' + args.model_name + '/model',
                       global_step=i)
            test(dataset, args, i)
    def refine_feature_op(self, points, feature_map, name):

        h, w = tf.cast(tf.shape(feature_map)[1],
                       tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32)

        xmin = tf.maximum(0.0, tf.floor(points[:, 0]))
        xmin = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(xmin))

        ymin = tf.maximum(0.0, tf.floor(points[:, 1]))
        ymin = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(ymin))

        xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0]))
        xmax = tf.maximum(0.0, tf.floor(xmax))

        ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1]))
        ymax = tf.maximum(0.0, tf.floor(ymax))

        left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)),
                           tf.int32)
        right_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmax], axis=0)),
                               tf.int32)
        left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)),
                              tf.int32)
        right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)),
                            tf.int32)

        feature_1x5 = slim.conv2d(
            inputs=feature_map,
            num_outputs=cfgs.FPN_CHANNEL,
            kernel_size=[1, 5],
            weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            scope='refine_1x5_{}'.format(name))

        feature5x1 = slim.conv2d(
            inputs=feature_1x5,
            num_outputs=cfgs.FPN_CHANNEL,
            kernel_size=[5, 1],
            weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            scope='refine_5x1_{}'.format(name))

        feature_1x1 = slim.conv2d(
            inputs=feature_map,
            num_outputs=cfgs.FPN_CHANNEL,
            kernel_size=[1, 1],
            weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            scope='refine_1x1_{}'.format(name))

        feature = feature5x1 + feature_1x1

        left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top)
        right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom)
        left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom)
        right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top)

        refine_feature = right_bottom_feature * tf.tile(
            tf.reshape((tf.abs((points[:, 0] - xmin) * (points[:, 1] - ymin))), [-1, 1]),
            [1, cfgs.FPN_CHANNEL]) \
                         + left_top_feature * tf.tile(
            tf.reshape((tf.abs((xmax - points[:, 0]) * (ymax - points[:, 1]))), [-1, 1]),
            [1, cfgs.FPN_CHANNEL]) \
                         + right_top_feature * tf.tile(
            tf.reshape((tf.abs((points[:, 0] - xmin) * (ymax - points[:, 1]))), [-1, 1]),
            [1, cfgs.FPN_CHANNEL]) \
                         + left_bottom_feature * tf.tile(
            tf.reshape((tf.abs((xmax - points[:, 0]) * (points[:, 1] - ymin))), [-1, 1]),
            [1, cfgs.FPN_CHANNEL])

        refine_feature = tf.reshape(
            refine_feature,
            [1,
             tf.cast(h, tf.int32),
             tf.cast(w, tf.int32), cfgs.FPN_CHANNEL])

        # refine_feature = tf.reshape(refine_feature, [1, tf.cast(feature_size[1], tf.int32),
        #                                              tf.cast(feature_size[0], tf.int32), 256])

        return refine_feature + feature
Пример #45
0
    def body1(self, num, object_num, loss, predict, labels, nilboy):
        """
    calculate loss
    Args:
      predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
      labels : [max_objects, 5]  (x_center, y_center, w, h, class)
    """
        label = labels[num:num + 1, :]
        label = tf.reshape(label, [-1])

        #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
        min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size)
        max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size)

        min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size)
        max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size)

        min_x = tf.floor(min_x)
        min_y = tf.floor(min_y)

        max_x = tf.ceil(max_x)
        max_y = tf.ceil(max_y)

        temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32)
        objects = tf.ones(temp, tf.float32)

        temp = tf.cast(
            tf.pack(
                [min_y, self.cell_size - max_y, min_x,
                 self.cell_size - max_x]), tf.int32)
        temp = tf.reshape(temp, (2, 2))
        objects = tf.pad(objects, temp, "CONSTANT")

        #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
        #calculate responsible tensor [CELL_SIZE, CELL_SIZE]
        center_x = label[0] / (self.image_size / self.cell_size)
        center_x = tf.floor(center_x)

        center_y = label[1] / (self.image_size / self.cell_size)
        center_y = tf.floor(center_y)

        response = tf.ones([1, 1], tf.float32)

        temp = tf.cast(
            tf.pack([
                center_y, self.cell_size - center_y - 1, center_x,
                self.cell_size - center_x - 1
            ]), tf.int32)
        temp = tf.reshape(temp, (2, 2))
        response = tf.pad(response, temp, "CONSTANT")
        #objects = response

        #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:]

        predict_boxes = tf.reshape(
            predict_boxes,
            [self.cell_size, self.cell_size, self.boxes_per_cell, 4])

        predict_boxes = predict_boxes * [
            self.image_size / self.cell_size, self.image_size / self.cell_size,
            self.image_size, self.image_size
        ]

        base_boxes = np.zeros([self.cell_size, self.cell_size, 4])

        for y in range(self.cell_size):
            for x in range(self.cell_size):
                #nilboy
                base_boxes[y, x, :] = [
                    self.image_size / self.cell_size * x,
                    self.image_size / self.cell_size * y, 0, 0
                ]
        base_boxes = np.tile(
            np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]),
            [1, 1, self.boxes_per_cell, 1])

        predict_boxes = base_boxes + predict_boxes

        iou_predict_truth = self.iou(predict_boxes, label[0:4])
        #calculate C [cell_size, cell_size, boxes_per_cell]
        C = iou_predict_truth * tf.reshape(response,
                                           [self.cell_size, self.cell_size, 1])

        #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        I = iou_predict_truth * tf.reshape(response,
                                           (self.cell_size, self.cell_size, 1))

        max_I = tf.reduce_max(I, 2, keep_dims=True)

        I = tf.cast((I >= max_I), tf.float32) * tf.reshape(
            response, (self.cell_size, self.cell_size, 1))

        #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        no_I = tf.ones_like(I, dtype=tf.float32) - I

        p_C = predict[:, :,
                      self.num_classes:self.num_classes + self.boxes_per_cell]

        #calculate truth x,y,sqrt_w,sqrt_h 0-D
        x = label[0]
        y = label[1]

        sqrt_w = tf.sqrt(tf.abs(label[2]))
        sqrt_h = tf.sqrt(tf.abs(label[3]))
        #sqrt_w = tf.abs(label[2])
        #sqrt_h = tf.abs(label[3])

        #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        p_x = predict_boxes[:, :, :, 0]
        p_y = predict_boxes[:, :, :, 1]

        #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1)
        #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1)
        #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2]))
        #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3]))
        #p_sqrt_w = predict_boxes[:, :, :, 2]
        #p_sqrt_h = predict_boxes[:, :, :, 3]
        p_sqrt_w = tf.sqrt(
            tf.minimum(self.image_size * 1.0,
                       tf.maximum(0.0, predict_boxes[:, :, :, 2])))
        p_sqrt_h = tf.sqrt(
            tf.minimum(self.image_size * 1.0,
                       tf.maximum(0.0, predict_boxes[:, :, :, 3])))
        #calculate truth p 1-D tensor [NUM_CLASSES]
        P = tf.one_hot(tf.cast(label[4], tf.int32),
                       self.num_classes,
                       dtype=tf.float32)

        #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES]
        p_P = predict[:, :, 0:self.num_classes]

        #class_loss
        class_loss = tf.nn.l2_loss(
            tf.reshape(objects, (self.cell_size, self.cell_size, 1)) *
            (p_P - P)) * self.class_scale
        #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale

        #object_loss
        object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale
        #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale

        #noobject_loss
        #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale
        noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale

        #coord_loss
        coord_loss = (tf.nn.l2_loss(I * (p_x - x) /
                                    (self.image_size / self.cell_size)) +
                      tf.nn.l2_loss(I * (p_y - y) /
                                    (self.image_size / self.cell_size)) +
                      tf.nn.l2_loss(I *
                                    (p_sqrt_w - sqrt_w)) / self.image_size +
                      tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) /
                      self.image_size) * self.coord_scale

        nilboy = I

        return num + 1, object_num, [
            loss[0] + class_loss, loss[1] + object_loss,
            loss[2] + noobject_loss, loss[3] + coord_loss
        ], predict, labels, nilboy
Пример #46
0
def cnn(model, config, scope, connect=None):
    with tf.variable_scope(scope), tf.name_scope(scope):
        with tf.variable_scope('inputs'), tf.name_scope('inputs'):
            sizes = {
                size: config.getint(scope, '%s_size' % size)
                for size in ['clength', 'cstep', 'plength', 'pstep']
            }
            if connect is None:
                model['%s_in0length' % scope] = config.getint(
                    'global', 'batch_size')
                model['%s_in1length' % scope] = config.getint(
                    'global', 'input_size')
                model['%s_in2length' % scope] = tf.placeholder(
                    tf.int32, [model['%s_in0length' % scope]],
                    '%s_in2length' % scope)
                model['%s_maxin2length' % scope] = config.getint(
                    'global', 'time_size')
                model['%s_inputs' % scope] = tf.placeholder(
                    tf.float32, [
                        model['%s_maxin2length' % scope],
                        model['%s_in0length' % scope],
                        model['%s_in1length' % scope]
                    ], '%s_inputs' % scope)
            else:
                model['%s_in0length' % scope] = model['%s_out0length' %
                                                      connect]
                model['%s_in1length' % scope] = model['%s_out1length' %
                                                      connect]
                model['%s_in2length' % scope] = model['%s_out2length' %
                                                      connect]
                model['%s_maxin2length' % scope] = model['%s_maxout2length' %
                                                         connect]
                model['%s_inputs' % scope] = model['%s_outputs' % connect]
            model['%s_transform' % scope] = tf.transpose(
                tf.reshape(model['%s_inputs' % scope], [
                    model['%s_maxin2length' % scope],
                    model['%s_in0length' % scope],
                    model['%s_in1length' % scope], 1
                ]), [1, 0, 2, 3], '%s_transform' % scope)
            model['%s_out0length' % scope] = model['%s_in0length' % scope]
            model['%s_out1length' % scope] = model['%s_in1length' % scope]
            model['%s_out2length' % scope] = model['%s_in2length' % scope]
            model['%s_maxout2length' % scope] = model['%s_maxin2length' %
                                                      scope]

        for _ in xrange(config.getint(scope, 'layer_size')):
            if _ == 0:
                model['%s_transform%i' % (scope, _)] = model['%s_transform' %
                                                             scope]
            else:
                model['%s_transform%i' % (scope, _)] = model['%s_pooling%i' %
                                                             (scope, _ - 1)]

            with tf.variable_scope('filter%i' % _), tf.name_scope('filter%s' %
                                                                  _):
                model['%s_filter%i' % (scope, _)] = tf.Variable(
                    tf.truncated_normal(
                        [sizes['clength'], sizes['clength'], 1, 1]))
                model['%s_stride%i' %
                      (scope, _)] = [1, sizes['cstep'], sizes['cstep'], 1]

            with tf.variable_scope('convolution%i' % _), tf.name_scope(
                    'convolution%i' % _):
                model['%s_convolution%i' % (scope, _)] = tf.nn.conv2d(
                    model['%s_transform%i' % (scope, _)],
                    model['%s_filter%i' % (scope, _)],
                    model['%s_stride%i' % (scope, _)], 'VALID')
                model['%s_out1length' % scope] = int(
                    math.ceil(
                        float(model['%s_out1length' % scope] -
                              sizes['clength'] + 1) / float(sizes['cstep'])))
                model['%s_out2length' % scope] = tf.to_int32(
                    tf.ceil(
                        tf.div(
                            tf.to_float(
                                tf.subtract(model['%s_out2length' % scope],
                                            sizes['clength'] - 1)),
                            tf.to_float(sizes['cstep']))))
                model['%s_maxout2length' % scope] = int(
                    math.ceil(
                        float(model['%s_maxout2length' % scope] -
                              sizes['clength'] + 1) / float(sizes['cstep'])))
                model['%s_pooling%i' % (scope, _)] = getattr(
                    tf.nn, '%s_pool' % config.get(scope, 'pool'))(
                        model['%s_convolution%i' % (scope, _)],
                        [1, sizes['plength'], sizes['plength'], 1],
                        [1, sizes['pstep'], sizes['pstep'], 1], 'VALID')
                model['%s_out1length' % scope] = int(
                    math.ceil(
                        float(model['%s_out1length' % scope] -
                              sizes['plength'] + 1) / float(sizes['pstep'])))
                model['%s_out2length' % scope] = tf.to_int32(
                    tf.ceil(
                        tf.div(
                            tf.to_float(
                                tf.subtract(model['%s_out2length' % scope],
                                            sizes['plength'] - 1)),
                            tf.to_float(sizes['pstep']))))
                model['%s_maxout2length' % scope] = int(
                    math.ceil(
                        float(model['%s_maxout2length' % scope] -
                              sizes['plength'] + 1) / float(sizes['pstep'])))

        with tf.variable_scope('outputs'), tf.name_scope('outputs'):
            model['%s_outputs' % scope] = tf.transpose(
                tf.squeeze(model['%s_pooling%i' % (scope, _)], [3],
                           '%s_outputs' % scope), [1, 0, 2])

    return model
Пример #47
0
def build_bow_cnn_custompool_from_options_dict(x, x_lengths, keep_prob,
                                               options_dict):
    cnn = blocks.build_cnn(x,
                           options_dict["input_shape"],
                           options_dict["filter_shapes"],
                           options_dict["pool_shapes"],
                           padding="VALID")

    # Create mask
    n_padded_after_cnn = cnn.get_shape().as_list()[-2]
    # def get_lengths_after_cnn():
    lengths_after_cnn = tf.cast(x_lengths, dtype=TF_DTYPE)
    for i_cnn_layer in xrange(len(options_dict["pool_shapes"])):

        lengths_after_cnn = tf.maximum(
            1.0, lengths_after_cnn -
            options_dict["filter_shapes"][i_cnn_layer][1] + 1)
        # assert False, "check this"

        if options_dict["pool_shapes"][i_cnn_layer] is not None:
            lengths_after_cnn = tf.ceil(
                lengths_after_cnn /
                options_dict["pool_shapes"][i_cnn_layer][1])
    # lengths_after_cnn = tf.cast(tf.minimum(float(n_padded_after_cnn), lengths_after_cnn), dtype=TF_ITYPE)
    mask = sequence_mask(lengths_after_cnn, n_padded_after_cnn)

    # Pooling
    with tf.variable_scope("pooling_final"):
        axis = 1
        if options_dict["pooling"] == "mean":
            assert cnn.get_shape().as_list()[axis] == 1
            cnn = tf.squeeze(cnn, [axis])
            frame_scores = cnn
            cnn = cnn * tf.cast(mask, dtype=TF_DTYPE)[:, :, None]
            cnn = tf.reduce_sum(cnn, reduction_indices=axis) / tf.cast(
                lengths_after_cnn, dtype=TF_DTYPE)[:, None]
            print "Average pool layer shape:", cnn.get_shape().as_list()
        elif options_dict["pooling"] == "max":
            assert cnn.get_shape().as_list()[axis] == 1
            cnn = tf.squeeze(cnn, [axis])
            frame_scores = cnn
            cnn = cnn * tf.cast(mask, dtype=TF_DTYPE)[:, :, None]
            cnn = tf.reduce_max(cnn, reduction_indices=axis)
            print "Max pool layer shape:", cnn.get_shape().as_list()
        elif options_dict["pooling"] == "logsumexp":
            assert "r" in options_dict
            assert cnn.get_shape().as_list()[axis] == 1

            # Logsumexp-trick to calculate logsumexp score
            cnn = tf.squeeze(cnn, [axis])
            # frame_scores = cnn
            add_mask = tf.select(
                mask, tf.zeros_like(mask, dtype=tf.float32),
                -np.inf * tf.ones_like(mask, dtype=tf.float32))
            frame_scores_masked = cnn + add_mask[:, :, None]
            max_vec = tf.reduce_max(options_dict["r"] * frame_scores_masked,
                                    reduction_indices=axis,
                                    keep_dims=True)
            sequence_score_logsumexp_trick = 1. / options_dict["r"] * (tf.log(
                1. / tf.cast(lengths_after_cnn[:, None, None], dtype=TF_DTYPE)
            ) + tf.log(
                tf.reduce_sum(
                    tf.exp(options_dict["r"] * frame_scores_masked - max_vec),
                    reduction_indices=axis,
                    keep_dims=True)) + max_vec)
            cnn = tf.squeeze(sequence_score_logsumexp_trick, [axis])
            print "Logsumexp pool layer shape:", cnn.get_shape().as_list()
        else:
            assert False

    # Fully-connected and output layers, if specified
    if "n_hiddens" in options_dict:
        cnn = blocks.build_feedforward(cnn,
                                       options_dict["n_hiddens"],
                                       keep_prob=keep_prob)
    if "d_out" in options_dict:
        with tf.variable_scope("ff_layer_final"):
            cnn = blocks.build_linear(cnn, options_dict["d_out"])
            print "Final linear layer shape:", cnn.get_shape().as_list()

    return cnn
Пример #48
0
    def _inference(self):
        with tf.device('/cpu:0'):
            self.emb_sents = tf.nn.embedding_lookup(
                self.embeddings, self.sents)
            # Expand dimension so meet input requirement of 2d-conv
            self.emb_expand = tf.expand_dims(self.emb_sents, -1)

        # Convolution network
        with tf.name_scope('cnn'):
            # After conv and pooling, 
            max_length = tf.reduce_max(self.sent_lengths)
            div_value = tf.div(tf.cast(max_length, tf.float32), self.paras.max_pool_size)
            reduced_size = tf.cast(tf.ceil(div_value), tf.int32) 
            pooled_concat = []
            for filter_size in self.paras.filter_sizes:
                with tf.name_scope('conv-pool-%s' % filter_size):
                    # Padding zero to keep conv output has same dimention as input
                    # shape is : [batch_size, sent_length, emb_size, channel]
                    num_prio = (filter_size - 1) // 2
                    num_post = (filter_size - 1) - num_prio
                    pad_prio = tf.concat([self.pad] * num_prio, 1)
                    pad_post = tf.concat([self.pad] * num_post, 1)
                    emb_pad = tf.concat([pad_prio, self.emb_expand, pad_post], 1)
                    # Prepare filter for conv
                    filter_ = tf.get_variable(
                        name = 'filter-%s' % filter_size,
                        shape = [filter_size, self.paras.embedding_size, 1, self.paras.num_filters])
                    # conv: [batch_size, sent_length, 1, num_filters]
                    conv = tf.nn.conv2d(
                        input = emb_pad,
                        filter = filter_,
                        strides = [1, 1, 1, 1],
                        padding = 'VALID',
                        name = 'conv')
                    # Bias
                    b = tf.get_variable(
                        name = 'bias-%s' % filter_size,
                        shape = [self.paras.num_filters])
                    h = tf.nn.relu(tf.nn.bias_add(conv, b))
                    # Max pooling over the outputs
                    pooled = tf.nn.max_pool(
                        value = h, 
                        ksize = [1, self.paras.max_pool_size, 1, 1],
                        strides = [1, self.paras.max_pool_size, 1, 1], 
                        padding ='SAME', 
                        name ='pool')
                    pooled = tf.reshape(pooled, [-1, reduced_size, self.paras.num_filters])
                    pooled_concat.append(pooled)
            # pooled_concat: (batch_size, reduced_size, filter_sizes * num_filters)
            self.pooled_concat = tf.concat(pooled_concat, 2)
            if self.mode == tf.contrib.learn.ModeKeys.TRAIN: 
                self.pooled_concat = tf.nn.dropout(self.pooled_concat, 1.0 - self.paras.cnn_dropout)

        # RNN network 
        with tf.name_scope('rnn'):
            cells_fw = model_helper.create_rnn_cell(
                'lstm', 
                self.paras.cell_num_units,
                self.paras.num_layers,
                self.paras.rnn_dropout,
                self.mode)
            cells_bw = model_helper.create_rnn_cell(
                'lstm', 
                self.paras.cell_num_units,
                self.paras.num_layers,
                self.paras.rnn_dropout,
                self.mode)
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cells_fw, 
                cells_bw,
                inputs = self.pooled_concat,
                dtype = tf.float32)
            # states_fw: (batch_size, reduced_size, cell_size)
            states_fw, states_bw = outputs 
            concat_states = tf.concat([states_fw, states_bw], axis = 2)
            # sent_states: (batch_size, 2 * cell_size)
            self.sent_states = tf.reduce_max(concat_states, axis = 1)

        with tf.name_scope('classify'):
            hidden1 = tf.contrib.layers.fully_connected(
                inputs = self.sent_states,
                num_outputs = 512)
            hidden2 = tf.contrib.layers.fully_connected(
                inputs = hidden1,
                num_outputs = 5)
            self.predicts = tf.reduce_max(tf.contrib.layers.fully_connected(
                inputs = hidden2,
                activation_fn = None,
                num_outputs = 1), axis = 1)
            self.mse = tf.reduce_mean(tf.cast(
                tf.squared_difference(
                    self.labels, 
                    tf.cast(tf.round(self.predicts), tf.int32)),
                tf.float32))

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(self.labels, 
                tf.cast(tf.round(self.predicts), tf.int32))
            self.accuracy = tf.reduce_mean(tf.cast(
                correct_prediction, tf.float32))
Пример #49
0
 def test_Ceil(self):
     t = tf.ceil(self.random(4, 3) - 0.5)
     self.check(t)
Пример #50
0
def bernoulli_sample(x):
    """
    return tensor with element yi turned "on"
    with probability xi
    """
    return tf.ceil(x - tf.random_uniform(tf.shape(x), minval=0, maxval=1))
Пример #51
0
        def __init__(self, session, num_actions, train_net):
            self.sess = session

            # Input
            self.x = tf.placeholder(name="state",
                                    dtype=tf.uint8,
                                    shape=(None, params.STATE_DIMENSIONS[0],
                                           params.STATE_DIMENSIONS[1],
                                           params.HISTORY_LEN))

            self.normalized_x = tf.cast(self.x, dtype=tf.float32) / 255.0

            with tf.variable_scope("common"):
                # Convolutional Layers
                self.conv_outputs = []
                for CONV_LAYER_SPEC in params.CONVOLUTIONAL_LAYERS_SPEC:
                    self.conv_outputs.append(
                        tf.layers.conv2d(
                            name="conv_layer_" +
                            str(len(self.conv_outputs) + 1),
                            inputs=self.normalized_x if len(self.conv_outputs)
                            == 0 else self.conv_outputs[-1],
                            filters=CONV_LAYER_SPEC["filters"],
                            kernel_size=CONV_LAYER_SPEC["kernel_size"],
                            strides=CONV_LAYER_SPEC["strides"],
                            activation=tf.nn.relu))

                # Flatten
                self.flattened_conv_output = tf.layers.flatten(
                    name="conv_output_flattener", inputs=self.conv_outputs[-1])

                # Hidden Layer
                self.dense_outputs = []
                for DENSE_LAYER_SPEC in params.DENSE_LAYERS_SPEC:
                    self.dense_outputs.append(
                        tf.layers.dense(name="dense_layer_" +
                                        str(len(self.dense_outputs) + 1),
                                        inputs=self.flattened_conv_output
                                        if len(self.dense_outputs) == 0 else
                                        self.dense_outputs[-1],
                                        units=DENSE_LAYER_SPEC,
                                        activation=tf.nn.relu))

                # State-Action-Value Distributions (as a flattened vector)
                self.flattened_q_dist = tf.layers.dense(
                    name="flattened_action_value_dist_logits",
                    inputs=self.dense_outputs[-1],
                    units=num_actions * params.NB_ATOMS)

                # Unflatten
                self.q_dist_logits = tf.reshape(
                    self.flattened_q_dist, [-1, num_actions, params.NB_ATOMS],
                    name="reshape_q_dist_logits")

                # Softmax State-Action-Value Distributions (per action)
                self.q_dist = tf.nn.softmax(self.q_dist_logits,
                                            name="action_value_dist",
                                            axis=-1)

                # Multiply bin probabilities by value
                self.delta_z = (params.V_MAX -
                                params.V_MIN) / (params.NB_ATOMS - 1)
                self.Z = tf.range(start=params.V_MIN,
                                  limit=params.V_MAX + self.delta_z,
                                  delta=self.delta_z)
                self.post_mul = self.q_dist * tf.reshape(
                    self.Z, [1, 1, params.NB_ATOMS])

                # Take sum to get the expected state-action values for each action
                self.actions = tf.reduce_sum(self.post_mul, axis=2)

                self.batch_size_range = tf.range(start=0,
                                                 limit=tf.shape(self.x)[0])

            if not train_net:
                self.targ_q_net_max = tf.summary.scalar(
                    "targ_q_net_max", tf.reduce_max(self.actions))
                self.targ_q_net_mean = tf.summary.scalar(
                    "targ_q_net_mean", tf.reduce_mean(self.actions))
                self.targ_q_net_min = tf.summary.scalar(
                    "targ_q_net_min", tf.reduce_min(self.actions))

                # Find argmax action given expected state-action values at next state
                self.argmax_action = tf.argmax(self.actions,
                                               axis=-1,
                                               output_type=tf.int32)

                # Get it's corresponding distribution (this is the target distribution)
                self.argmax_action_distribution = tf.gather_nd(
                    self.q_dist,
                    tf.stack((self.batch_size_range, self.argmax_action),
                             axis=1))  # Axis = 1 => [N, 2]

                self.mean_argmax_next_state_value = tf.summary.scalar(
                    "mean_argmax_q_target",
                    tf.reduce_mean(self.Z * self.argmax_action_distribution))

                # Placeholder for reward
                self.r = tf.placeholder(name="reward",
                                        dtype=tf.float32,
                                        shape=(None, ))
                self.t = tf.placeholder(name="terminal",
                                        dtype=tf.uint8,
                                        shape=(None, ))

                # Compute Tz (Bellman Operator) on atom of expected state-action-value
                # r + gamma * z clipped to [V_min, V_max]
                self.Tz = tf.clip_by_value(
                    tf.reshape(self.r, [-1, 1]) + 0.99 *
                    tf.cast(tf.reshape(self.t, [-1, 1]), tf.float32) * self.Z,
                    clip_value_min=params.V_MIN,
                    clip_value_max=params.V_MAX)

                # Compute bin number (will be floating point).
                self.b = (self.Tz - params.V_MIN) / self.delta_z

                # Lower and Upper Bins.
                self.l = tf.floor(self.b)
                self.u = tf.ceil(self.b)

                # Add weight to the lower bin based on distance from upper bin to
                # approximate bin index b. (0--b--1. If b = 0.3. Then, assign bin
                # 0, p(b) * 0.7 weight and bin 1, p(Z = z_b) * 0.3 weight.)
                self.indexable_l = tf.stack(
                    (
                        tf.reshape(self.batch_size_range, [-1, 1]) * tf.ones(
                            (1, params.NB_ATOMS), dtype=tf.int32),
                        # BATCH_SIZE_RANGE x NB_ATOMS [[0, ...], [1, ...], ...]
                        tf.cast(self.l, dtype=tf.int32)),
                    axis=-1)
                self.m_l_vals = self.argmax_action_distribution * (self.u -
                                                                   self.b)
                self.m_l = tf.scatter_nd(tf.reshape(self.indexable_l, [-1, 2]),
                                         tf.reshape(self.m_l_vals, [-1]),
                                         tf.shape(self.l))

                # Add weight to the lower bin based on distance from upper bin to
                # approximate bin index b.
                self.indexable_u = tf.stack(
                    (
                        tf.reshape(self.batch_size_range, [-1, 1]) * tf.ones(
                            (1, params.NB_ATOMS), dtype=tf.int32),
                        # BATCH_SIZE_RANGE x NB_ATOMS [[0, ...], [1, ...], ...]
                        tf.cast(self.u, dtype=tf.int32)),
                    axis=-1)
                self.m_u_vals = self.argmax_action_distribution * (self.b -
                                                                   self.l)
                self.m_u = tf.scatter_nd(tf.reshape(self.indexable_u, [-1, 2]),
                                         tf.reshape(self.m_u_vals, [-1]),
                                         tf.shape(self.u))

                # Add Contributions of both upper and lower parts and
                # stop gradient to not update the target network.
                self.m = tf.stop_gradient(tf.squeeze(self.m_l + self.m_u))

                self.weighted_m = tf.clip_by_value(self.m * self.Z,
                                                   clip_value_min=params.V_MIN,
                                                   clip_value_max=params.V_MAX)

                self.weighted_m_mean = tf.summary.scalar(
                    "mean_q_target", tf.reduce_mean(self.weighted_m))

                self.targ_dist = tf.summary.histogram("target_distribution",
                                                      self.weighted_m)

                self.targn_summary = tf.summary.merge([
                    self.targ_dist, self.weighted_m_mean, self.targ_q_net_max,
                    self.targ_q_net_mean, self.targ_q_net_min,
                    self.mean_argmax_next_state_value
                ])
            else:
                self.trn_q_net_max = tf.summary.scalar(
                    "trn_q_net_max", tf.reduce_max(self.actions))
                self.trn_q_net_mean = tf.summary.scalar(
                    "trn_q_net_mean", tf.reduce_mean(self.actions))
                self.trn_q_net_min = tf.summary.scalar(
                    "trn_q_net_min", tf.reduce_min(self.actions))

                # Given you took this action.
                self.action_placeholder = tf.placeholder(name="action",
                                                         dtype=tf.int32,
                                                         shape=[
                                                             None,
                                                         ])

                # Compute Q-Dist. for the action.
                self.action_q_dist = tf.gather_nd(
                    self.q_dist,
                    tf.stack((self.batch_size_range, self.action_placeholder),
                             axis=1))

                self.weighted_q_dist = tf.clip_by_value(
                    self.action_q_dist * self.Z,
                    clip_value_min=params.V_MIN,
                    clip_value_max=params.V_MAX)

                tnd_summary = tf.summary.histogram("training_net_distribution",
                                                   self.weighted_q_dist)

                tnd_mean_summary = tf.summary.scalar(
                    "training_net_distribution_mean",
                    tf.reduce_mean(self.weighted_q_dist))

                # Get target distribution.
                self.m_placeholder = tf.placeholder(dtype=tf.float32,
                                                    shape=(None,
                                                           params.NB_ATOMS),
                                                    name="m_placeholder")
                self.loss_sum = -tf.reduce_sum(
                    self.m_placeholder * tf.log(self.action_q_dist + 1e-5),
                    axis=-1)

                self.loss = tf.reduce_mean(self.loss_sum)

                l_summary = tf.summary.scalar("loss", self.loss)

                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=params.LEARNING_RATE,
                    epsilon=params.EPSILON_ADAM)
                gradients, variables = zip(
                    *self.optimizer.compute_gradients(self.loss))
                grad_norm_summary = tf.summary.histogram(
                    "grad_norm", tf.global_norm(gradients))
                gradients, _ = tf.clip_by_global_norm(gradients,
                                                      params.GRAD_NORM_CLIP)
                self.train_step = self.optimizer.apply_gradients(
                    zip(gradients, variables))
                self.trnn_summary = tf.summary.merge([
                    tnd_mean_summary, tnd_summary, l_summary,
                    grad_norm_summary, self.trn_q_net_max, self.trn_q_net_mean,
                    self.trn_q_net_min
                ])
Пример #52
0
def D_logistic_r1(G,
                  D,
                  opt,
                  training_set,
                  minibatch_size,
                  reals,
                  labels,
                  gamma=10.0):
    rotation_offset = 108

    _ = opt, training_set
    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    real_scores_out = D.get_output_for(reals, labels, is_training=True)
    fake_scores_out = D.get_output_for(fake_images_out,
                                       labels,
                                       is_training=True)

    fake_scores_out_without_rotation = tf.concat([
        fake_scores_out[:, :rotation_offset],
        fake_scores_out[:, rotation_offset + 2:]
    ],
                                                 axis=-1)

    real_scores_out_without_rotation = tf.concat([
        real_scores_out[:, :rotation_offset],
        real_scores_out[:, rotation_offset + 2:]
    ],
                                                 axis=-1)

    labels_rotation = labels[:, rotation_offset:rotation_offset + 2]
    real_rotations = real_scores_out[:, rotation_offset:rotation_offset + 2]

    fake_scores_out_sum = tf.reduce_sum(fake_scores_out_without_rotation,
                                        axis=1,
                                        keepdims=True)
    real_scores_out_sum = tf.reduce_sum(real_scores_out_without_rotation,
                                        axis=1,
                                        keepdims=True)

    rotation_distance = tf.norm(labels_rotation - real_rotations,
                                axis=-1,
                                keepdims=True)
    rotation_distance = rotation_distance * tf.reduce_max(
        tf.ceil(tf.abs(labels_rotation)),
        axis=-1)  # remove non set rotation labels

    real_scores_out_sum = autosummary('Loss/scores/real', real_scores_out_sum)
    fake_scores_out_sum = autosummary('Loss/scores/fake', fake_scores_out_sum)
    rotation_distance = autosummary('Loss/rotation_distance/real',
                                    rotation_distance)

    loss = tf.nn.softplus(
        fake_scores_out_sum)  # -log(1-sigmoid(fake_scores_out))
    loss += tf.nn.softplus(
        -real_scores_out_sum)  # -log(sigmoid(real_scores_out))
    loss = autosummary('Loss/discriminator_sum', loss)

    loss += tf.square(rotation_distance) * 10

    with tf.name_scope('GradientPenalty'):
        real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0]
        gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1, 2, 3])
        gradient_penalty = autosummary('Loss/gradient_penalty',
                                       gradient_penalty)
        reg = gradient_penalty * (gamma * 0.5)
    return loss, reg
    def build(self, x):
        """Run the backprop version of the Circuit."""
        self.prepare_tensors()

        # Calculate l2 hidden state size
        x_shape = tf.cast(tf.shape(x), tf.float32)
        if self.include_pooling and len(self.intermediate_ff):
            # pooling_factor = (len(
            #     self.intermediate_ff)) * np.sum(self.pool_strides)
            array_pooling_factor = float(self.pool_strides[0]**len(
                self.intermediate_ff))
            pooling_factor = tf.constant(array_pooling_factor,
                                         dtype=tf.float32)
            l2_shape = tf.stack([
                x_shape[0],
                tf.ceil(x_shape[1] / pooling_factor),
                tf.ceil(x_shape[2] / pooling_factor),
                self.hgru_ids[1].values()[0]
            ])
        else:
            l2_shape = tf.identity(x_shape)
            self.pooling_factor = 1
            array_pooling_factor = 1
        x_shape = tf.cast(x_shape, tf.int32)
        l2_shape = tf.cast(l2_shape, tf.int32)
        np_xsh = np.array(x.get_shape().as_list()).astype(float)
        np_xsh[1:3] /= array_pooling_factor

        if len(self.hgru_ids) > 1:
            np_xsh[-1] = self.hgru_ids[1].values()[0]
            print '*' * 20
            print 'fgru embedding shape is: '
            print np_xsh
            print '*' * 20
        else:
            print '*' * 20
            print 'Horizontal only: '

        # Initialize hidden layer activities
        if self.hidden_init == 'identity':
            l1_h2 = tf.identity(x, dtype=self.dtype)
            l2_h2 = tf.zeros(l2_shape, dtype=self.dtype)
            fb_act_1 = tf.identity(x)
        elif self.hidden_init == 'random':
            l1_h2 = tf.random_normal(x_shape, dtype=self.dtype)
            l2_h2 = tf.random_normal(l2_shape, dtype=self.dtype)
            fb_act_1 = tf.random_normal(x_shape, dtype=self.dtype)
        elif self.hidden_init == 'zeros':
            l1_h2 = tf.zeros(x_shape, dtype=self.dtype)
            l2_h2 = tf.zeros(l2_shape, dtype=self.dtype)
            fb_act_1 = tf.zeros(x_shape, dtype=self.dtype)
        else:
            raise RuntimeError

        # While loop
        if self.while_loop:
            i0 = tf.constant(0)
            elems = [i0, x, l1_h2, l2_h2, fb_act_1]
            returned = tf.while_loop(self.condition,
                                     self.full,
                                     loop_vars=elems,
                                     back_prop=True,
                                     swap_memory=False)

            # Prepare output
            i0, x, l1_h2, l2_h2, fb_act_1 = returned

        else:
            i0 = 0
            for idx in range(self.timesteps):
                i0, x, l1_h2, l2_h2, fb_act_1 = self.full(i0=i0,
                                                          x=x,
                                                          l1_h2=l1_h2,
                                                          l2_h2=l2_h2,
                                                          fb_act_1=fb_act_1)
        if self.readout == 'fb':
            return fb_act_1
        else:
            raise NotImplementedError('Select an hGRU layer to readout from.')
Пример #54
0
def G_logistic_ns_pathreg(G,
                          D,
                          opt,
                          training_set,
                          minibatch_size,
                          pl_minibatch_shrink=2,
                          pl_decay=0.01,
                          pl_weight=2.0):
    _ = opt
    rotation_offset = 108
    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    labels = training_set.get_random_labels_tf(minibatch_size)

    all_rotations = tf.constant(
        [[1.0, 0.0], [0.7071, 0.7071], [0.0, 1.0], [-0.7071, 0.7071],
         [-1.0, 0.0], [-0.7071, -0.7071], [0.0, -1.0], [0.7071, -0.7071]],
        dtype=tf.float32)

    indices = tf.cast(tf.floor(
        tf.random_uniform(shape=[minibatch_size], minval=0, maxval=8)),
                      dtype=tf.int32)
    random_rotation = tf.gather(all_rotations, indices)

    labels = tf.concat([
        labels[:, :rotation_offset], random_rotation,
        labels[:, rotation_offset + 2:]
    ],
                       axis=1)

    fake_images_out, fake_dlatents_out = G.get_output_for(latents,
                                                          labels,
                                                          is_training=True,
                                                          return_dlatents=True)
    fake_scores_out = D.get_output_for(fake_images_out,
                                       labels,
                                       is_training=True)

    fake_scores_out_without_rotation = tf.concat([
        fake_scores_out[:, :rotation_offset],
        fake_scores_out[:, rotation_offset + 2:]
    ],
                                                 axis=-1)

    labels_rotation = labels[:, rotation_offset:rotation_offset + 2]

    disc_pred_rotations = fake_scores_out[:,
                                          rotation_offset:rotation_offset + 2]

    loss = tf.nn.softplus(
        -tf.reduce_sum(fake_scores_out_without_rotation, axis=1,
                       keepdims=True))  # -log(1-sigmoid(fake_scores_out))
    loss = autosummary('Loss/generator', loss)

    rotation_distance = tf.norm(labels_rotation - disc_pred_rotations,
                                axis=-1,
                                keepdims=True)

    rotation_distance = rotation_distance * tf.reduce_max(
        tf.ceil(tf.abs(labels_rotation)),
        axis=-1)  # remove non set rotation labels
    rotation_distance = autosummary('Loss/rotation_distance/generator',
                                    rotation_distance)
    loss += tf.square(rotation_distance) * 10

    # Path length regularization.
    with tf.name_scope('PathReg'):
        # Evaluate the regularization term using a smaller minibatch to conserve memory.
        if pl_minibatch_shrink > 1:
            pl_minibatch = minibatch_size // pl_minibatch_shrink
            pl_latents = tf.random_normal([pl_minibatch] +
                                          G.input_shapes[0][1:])
            pl_labels = training_set.get_random_labels_tf(pl_minibatch)
            fake_images_out, fake_dlatents_out = G.get_output_for(
                pl_latents, pl_labels, is_training=True, return_dlatents=True)

        # Compute |J*y|.
        pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(
            np.prod(G.output_shape[2:]))
        pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise),
                                [fake_dlatents_out])[0]
        pl_lengths = tf.sqrt(
            tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
        pl_lengths = autosummary('Loss/pl_lengths', pl_lengths)

        # Track exponential moving average of |J*y|.
        with tf.control_dependencies(None):
            pl_mean_var = tf.Variable(name='pl_mean',
                                      trainable=False,
                                      initial_value=0.0,
                                      dtype=tf.float32)
        pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) -
                                            pl_mean_var)
        pl_update = tf.assign(pl_mean_var, pl_mean)

        # Calculate (|J*y|-a)^2.
        with tf.control_dependencies([pl_update]):
            pl_penalty = tf.square(pl_lengths - pl_mean)
            pl_penalty = autosummary('Loss/pl_penalty', pl_penalty)

        # Apply weight.
        #
        # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
        # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
        #
        # gamma_pl = pl_weight / num_pixels / num_affine_layers
        # = 2 / (r^2) / (log2(r) * 2 - 2)
        # = 1 / (r^2 * (log2(r) - 1))
        # = ln(2) / (r^2 * (ln(r) - ln(2))
        #
        reg = pl_penalty * pl_weight

    return loss, reg
Пример #55
0
def model_fn(features, labels, mode, params):
    """
    This is a function for creating a computational tensorflow graph.
    The function is in format required by tf.estimator.
    """
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def backbone(images, is_training):
        return mobilenet_v1(images,
                            is_training,
                            depth_multiplier=params['depth_multiplier'])

    subnet = KeypointSubnet(features['images'], is_training, backbone, params)

    if not is_training:
        predictions = subnet.get_predictions()

    if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = tf.estimator.export.PredictOutput({
            name: tf.identity(tensor, name)
            for name, tensor in predictions.items()
        })
        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs={'outputs': export_outputs})

    # add l2 regularization
    with tf.name_scope('weight_decay'):
        add_weight_decay(params['weight_decay'])
        regularization_loss = tf.losses.get_regularization_loss()
        tf.summary.scalar('regularization_loss', regularization_loss)

    with tf.name_scope('losses'):

        batch_size = tf.shape(labels['heatmaps'])[0]
        normalizer = tf.to_float(batch_size)

        heatmaps = labels['heatmaps']
        segmentation_masks = tf.expand_dims(labels['segmentation_masks'], 3)
        loss_masks = tf.expand_dims(labels['loss_masks'], 3)

        heatmaps = tf.concat([heatmaps, segmentation_masks], axis=3)
        losses = {
            'regression_loss': (1.0 / normalizer) *
            tf.nn.l2_loss(loss_masks * (subnet.heatmaps - heatmaps))
        }

        for level in range(2, 6):
            p = subnet.enriched_features['p' + str(level)]
            f = tf.expand_dims(p[:, :, :, 0], 3)
            losses['segmentation_loss_at_level_' + str(level)] = (
                2.0 / normalizer) * tf.nn.l2_loss(f - segmentation_masks)
            shape = tf.shape(segmentation_masks)
            height, width = shape[1], shape[2]
            new_size = [
                tf.to_int32(tf.ceil(height / 2)),
                tf.to_int32(tf.ceil(width / 2))
            ]
            segmentation_masks = tf.image.resize_images(segmentation_masks,
                                                        new_size,
                                                        align_corners=True)

    for n, v in losses.items():
        tf.losses.add_loss(v)
        tf.summary.scalar(n, v)

    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    with tf.name_scope('eval_metrics'):
        h = tf.shape(heatmaps)[1]
        w = tf.shape(heatmaps)[2]
        area = tf.to_float(h * w)
        per_pixel_reg_loss = tf.nn.l2_loss(
            loss_masks * (subnet.heatmaps - heatmaps)) / (normalizer * area)
        tf.summary.scalar('per_pixel_reg_loss', per_pixel_reg_loss)

    if mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {
            'eval_regression_loss':
            tf.metrics.mean(losses['regression_loss']),
            'eval_per_pixel_reg_loss':
            tf.metrics.mean(per_pixel_reg_loss),
            'eval_segmentation_loss_at_level_2':
            tf.metrics.mean(losses['segmentation_loss_at_level_2'])
        }

        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=eval_metric_ops)

    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('learning_rate'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.piecewise_constant(global_step,
                                                    params['lr_boundaries'],
                                                    params['lr_values'])
        tf.summary.scalar('learning_rate', learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads_and_vars = optimizer.compute_gradients(total_loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)

    for g, v in grads_and_vars:
        tf.summary.histogram(v.name[:-2] + '_hist', v)
        tf.summary.histogram(v.name[:-2] + '_grad_hist', g)

    with tf.control_dependencies([train_op]), tf.name_scope('ema'):
        ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                num_updates=global_step)
        train_op = ema.apply(tf.trainable_variables())

    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
Пример #56
0
    def build(self, depth_input, boxes, box_indices, ref_depth_min,
              ref_depth_max, n_split, mask_size, img_size,
              mask_quantile_level):
        """
        masking the feature, if the median of depth_map in the bounding box is less than the ref_depth_min, and larger than ref_depth_max
        img_input: H * W * C
        depth_input: H * W
        boxes_norm: [num_boxes, 4]
        n_split: int that divides H and W
        ref_height: [num_boxes, depth]
        """
        with tf.variable_scope("occ_mask"):
            self.n_split = n_split
            #self.n_batch = tf.cast(boxes.shape[0],tf.int32)
            self.n_batch = tf.shape(boxes)[0]
            sub_box = self.slice_box_gen(boxes)
            #duplicate the reference depth
            ref_depth_min = tf.expand_dims(ref_depth_min, -1)
            ref_depth_min_dup = tf.tile(ref_depth_min, [1, self.n_split**2])
            ref_depth_min_dup = tf.reshape(ref_depth_min_dup,
                                           [self.n_batch * self.n_split**2, 1],
                                           name='duplicated_depth_min')

            ref_depth_max = tf.expand_dims(ref_depth_max, -1)
            ref_depth_max_dup = tf.tile(ref_depth_max, [1, self.n_split**2])
            ref_depth_max_dup = tf.reshape(ref_depth_max_dup,
                                           [self.n_batch * self.n_split**2, 1],
                                           name='duplicated_depth_max')

            #duplicate the reference depth
            box_indices = tf.expand_dims(box_indices, -1)
            box_indices_dup = tf.tile(box_indices, [1, self.n_split**2])
            box_indices_dup = tf.reshape(box_indices_dup,
                                         [self.n_batch * self.n_split**2],
                                         name='duplicated_box_indices')

            # must use nearest neighbour method
            depth_size = mask_size[0] * mask_size[1]
            crop_depth = tf.image.crop_and_resize(depth_input,
                                                  sub_box,
                                                  box_indices_dup,
                                                  mask_size,
                                                  method='nearest')
            crop_depth = tf.reshape(
                crop_depth, [self.n_batch * self.n_split**2, depth_size])
            #map_params = (crop_depth,ref_depth_dup)

            method = 'median'
            #if method == 'median':
            #medidan
            # avoid empty case
            fill_in = tf.tile([[0.1, 100.0]],
                              [self.n_batch * self.n_split**2, 1])
            crop_depth = tf.concat([crop_depth, fill_in], axis=1)

            num_nonzero = tf.count_nonzero(crop_depth, axis=1)
            # roll out value = 0 and calculate the median of the rest
            quantile_idx = tf.ceil(depth_size -
                                   tf.cast(num_nonzero, dtype=tf.float32) *
                                   mask_quantile_level)
            quantile_idx = tf.cast(quantile_idx, dtype=tf.int32)
            quantile_idx = tf.expand_dims(quantile_idx, -1)
            batch_range = tf.expand_dims(
                tf.range(0, self.n_batch * self.n_split**2), -1)
            cat_idx = tf.concat([batch_range, quantile_idx], axis=1)

            sorted_crop_depth = tf.contrib.framework.sort(crop_depth, axis=1)
            depth_val = tf.gather_nd(sorted_crop_depth, cat_idx)
            """
            f(x) = 1  if x_min <= x <= x_max
                   0  otherwise

            f(x) = g1(x,x_min) + g(x_max,x) - 1

            where g(x, y) = 1   if x >= y
                            0   otherwise
            """
            occ = self.step_f(depth_val, tf.squeeze(
                ref_depth_min_dup, 1)) + self.step_f(
                    tf.squeeze(ref_depth_max_dup, 1), depth_val) - 1
            dep_zero = tf.cast(tf.less(depth_val, 0.2), dtype=tf.float32)
            occ += dep_zero

            occ = tf.expand_dims(occ, 0)
            occ = tf.reshape(occ, [self.n_batch, self.n_split * self.n_split],
                             name='occ_mask_base')
            num_masks = tf.count_nonzero(occ, axis=1)
            mask_weights = (tf.cast(num_masks, tf.float32) +
                            0.01) / (self.n_split * self.n_split)
            occ = occ / tf.expand_dims(mask_weights, -1)
            occ = tf.reshape(occ, [self.n_batch, self.n_split, self.n_split],
                             name='occ_mask_base')
            occ = tf.expand_dims(occ, -1)
            occ_mask = tf.image.resize_nearest_neighbor(occ,
                                                        img_size,
                                                        name='occ_mask')
            #occ_mask = tf.squeeze(occ_mask,axis=-1,name='occ_mask')
            return occ_mask
  def body1(self, num, object_num, loss, predict, labels, nilboy):
    """
    calculate loss
    Args:
      predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
      labels : [max_objects, 5]  (x_center, y_center, w, h, class)
    """
    label = labels[num:num+1, :]
    label = tf.reshape(label, [-1])

    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
    min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size)
    max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size)

    min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size)
    max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size)

    min_x = tf.floor(min_x)
    min_y = tf.floor(min_y)

    max_x = tf.ceil(max_x)
    max_y = tf.ceil(max_y)

    temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32)
    objects = tf.ones(temp, tf.float32)

    temp = tf.cast(tf.stack([min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32)
    temp = tf.reshape(temp, (2, 2))
    objects = tf.pad(objects, temp, "CONSTANT")

    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
    #calculate responsible tensor [CELL_SIZE, CELL_SIZE]
    center_x = label[0] / (self.image_size / self.cell_size)
    center_x = tf.floor(center_x)

    center_y = label[1] / (self.image_size / self.cell_size)
    center_y = tf.floor(center_y)

    response = tf.ones([1, 1], tf.float32)

    temp = tf.cast(tf.stack([center_y, self.cell_size - center_y - 1, center_x, self.cell_size -center_x - 1]), tf.int32)
    temp = tf.reshape(temp, (2, 2))
    response = tf.pad(response, temp, "CONSTANT")
    #objects = response

    #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:]
    

    predict_boxes = tf.reshape(predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4])

    predict_boxes = predict_boxes * [self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size]

    base_boxes = np.zeros([self.cell_size, self.cell_size, 4])

    for y in range(self.cell_size):
      for x in range(self.cell_size):
        #nilboy
        base_boxes[y, x, :] = [self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0]
    base_boxes = np.tile(np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1])

    predict_boxes = base_boxes + predict_boxes

    iou_predict_truth = self.iou(predict_boxes, label[0:4])
    #calculate C [cell_size, cell_size, boxes_per_cell]
    C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1])

    #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1))
    
    max_I = tf.reduce_max(I, 2, keep_dims=True)

    I = tf.cast((I >= max_I), tf.float32) * tf.reshape(response, (self.cell_size, self.cell_size, 1))

    #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    no_I = tf.ones_like(I, dtype=tf.float32) - I 


    p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell]

    #calculate truth x,y,sqrt_w,sqrt_h 0-D
    x = label[0]
    y = label[1]

    sqrt_w = tf.sqrt(tf.abs(label[2]))
    sqrt_h = tf.sqrt(tf.abs(label[3]))
    #sqrt_w = tf.abs(label[2])
    #sqrt_h = tf.abs(label[3])

    #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    p_x = predict_boxes[:, :, :, 0]
    p_y = predict_boxes[:, :, :, 1]

    #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1)
    #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1)
    #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2]))
    #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3]))
    #p_sqrt_w = predict_boxes[:, :, :, 2]
    #p_sqrt_h = predict_boxes[:, :, :, 3]
    p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2])))
    p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3])))
    #calculate truth p 1-D tensor [NUM_CLASSES]
    P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32)

    #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES]
    p_P = predict[:, :, 0:self.num_classes]

    #class_loss
    class_loss = tf.nn.l2_loss(tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale
    #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale

    #object_loss
    object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale
    #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale

    #noobject_loss
    #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale
    noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale

    #coord_loss
    coord_loss = (tf.nn.l2_loss(I * (p_x - x)/(self.image_size/self.cell_size)) +
                 tf.nn.l2_loss(I * (p_y - y)/(self.image_size/self.cell_size)) +
                 tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w))/ self.image_size +
                 tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))/self.image_size) * self.coord_scale

    nilboy = I

    return num + 1, object_num, [loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss], predict, labels, nilboy
Пример #58
0
number_of_classes = 2
log_folder = os.path.expanduser('segment_log_folder')

vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt')

# Convert image to float32 before subtracting the
# mean pixel value
image_float = tf.to_float(image_tensor, name='ToFloat')

original_shape = tf.shape(image_float)[0:2]

# Subtract the mean pixel value from each pixel
mean_centered_image = _mean_image_subtraction(image_float,
                                              [_R_MEAN, _G_MEAN, _B_MEAN])

target_input_size_factor = tf.ceil(
    tf.div(tf.to_float(original_shape), tf.to_float(upsample_factor)))
target_input_size = tf.to_int32(
    tf.multiply(target_input_size_factor, upsample_factor))
padding_size = (target_input_size - original_shape) // 2

mean_centered_image = tf.image.pad_to_bounding_box(mean_centered_image,
                                                   padding_size[0],
                                                   padding_size[1],
                                                   target_input_size[0],
                                                   target_input_size[1])

processed_images = tf.expand_dims(mean_centered_image, 0)

upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                               number_of_classes)
Пример #59
0
    def prediction_layers(
        self,
        features,
        end_points,
        input_shape,
        reuse=None,
        is_training=False,
        scope="pose",
    ):
        cfg = self.cfg
        if "resnet" in cfg.net_type:
            num_layers = re.findall("resnet_([0-9]*)", cfg.net_type)[0]
            layer_name = ("resnet_v1_{}".format(num_layers) +
                          "/block{}/unit_{}/bottleneck_v1")
            mid_pt = layer_name.format(2, 3)
        elif "mobilenet" in cfg.net_type:
            mid_pt = "layer_7"
        elif "efficientnet" in cfg.net_type:
            mid_pt = "block_" + parallel_layers[cfg.net_type.split('-')[1]]

        final_dims = tf.ceil(
            tf.divide(input_shape[1:3], tf.convert_to_tensor(16)))
        interim_dims = tf.scalar_mul(2, final_dims)
        interim_dims = tf.cast(interim_dims, tf.int32)
        bank_3 = end_points[mid_pt]
        bank_3 = tf.image.resize_images(bank_3, interim_dims)

        with slim.arg_scope(
            [slim.conv2d],
                padding="SAME",
                normalizer_fn=None,
                weights_regularizer=slim.l2_regularizer(cfg.weight_decay),
        ):
            with tf.variable_scope("decoder_filters"):
                bank_3 = slim.conv2d(bank_3,
                                     cfg.bank3,
                                     1,
                                     scope="decoder_parallel_1")

        with slim.arg_scope(
            [slim.conv2d_transpose],
                padding="SAME",
                normalizer_fn=None,
                weights_regularizer=slim.l2_regularizer(cfg.weight_decay),
        ):
            with tf.variable_scope("upsampled_features"):
                upsampled_features = slim.conv2d_transpose(features,
                                                           cfg.bank5,
                                                           kernel_size=[3, 3],
                                                           stride=2,
                                                           scope="block4")
        net = tf.concat([bank_3, upsampled_features], 3)

        out = {}
        with tf.variable_scope(scope, reuse=reuse):
            out["part_pred"] = prediction_layer(
                cfg, net, "part_pred",
                cfg.num_joints + cfg.get("num_idchannel", 0))
            if cfg.location_refinement:
                out["locref"] = prediction_layer(cfg, net, "locref_pred",
                                                 cfg.num_joints * 2)
            if cfg.pairwise_predict and "multi-animal" not in cfg.dataset_type:
                out["pairwise_pred"] = prediction_layer(
                    cfg, net, "pairwise_pred",
                    cfg.num_joints * (cfg.num_joints - 1) * 2)
            if cfg.partaffinityfield_predict and "multi-animal" in cfg.dataset_type:
                out["pairwise_pred"] = prediction_layer(
                    cfg, net, "pairwise_pred", cfg.num_limbs * 2)

            if cfg.intermediate_supervision and "efficientnet" not in cfg.net_type:
                if "mobilenet" in cfg.net_type:
                    out["part_pred_interm"] = prediction_layer(
                        cfg,
                        end_points["layer_" +
                                   str(cfg["intermediate_supervision_layer"])],
                        "intermediate_supervision",
                        cfg.num_joints,
                    )
                elif "resnet" in cfg.net_type:
                    interm_name = layer_name.format(
                        3, cfg.intermediate_supervision_layer)
                    block_interm_out = end_points[interm_name]
                    out["part_pred_interm"] = prediction_layer(
                        cfg,
                        block_interm_out,
                        "intermediate_supervision",
                        cfg.num_joints + cfg.get("num_idchannel", 0),
                    )

        return out
Пример #60
0
def G_logistic_ns_pathreg(G,
                          D,
                          opt,
                          training_set,
                          minibatch_size,
                          pl_minibatch_shrink=2,
                          pl_decay=0.01,
                          pl_weight=2.0,
                          int_reg_clip=5.0,
                          rotation_step_size=0.01):
    _ = opt
    rotation_offset = 108
    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    labels = training_set.get_random_labels_tf(minibatch_size)

    # Mirror some labels to balance the rotatinos
    random_vector = tf.random_uniform([minibatch_size]) < 0.5
    rotation_cos = tf.expand_dims(labels[:, rotation_offset], axis=-1)
    rotation_sin = tf.expand_dims(labels[:, rotation_offset + 1], axis=-1)
    angle = tf.atan2(rotation_sin, rotation_cos)
    new_rotation_cos = tf.cos(angle)
    new_rotation_sin = tf.sin(angle) * -1
    mirrored_labels = tf.concat([
        labels[:, :rotation_offset], new_rotation_cos, new_rotation_sin,
        labels[:, rotation_offset + 2:]
    ],
                                axis=1)
    labels = tf.where(random_vector, labels, mirrored_labels)

    # Remove half of front left and front right to balance the rotation label
    zero_rotation = tf.expand_dims(tf.zeros([minibatch_size]), axis=-1)
    removed_labels = tf.concat([
        labels[:, :rotation_offset], zero_rotation, zero_rotation,
        labels[:, rotation_offset + 2:]
    ],
                               axis=1)
    condition = tf.equal(labels[:, 108], 0.7071)
    random_vector = tf.random_uniform([minibatch_size]) < 0.5
    remove_condition = tf.logical_and(condition, random_vector)
    labels = tf.where(remove_condition, removed_labels, labels)

    fake_images_out, fake_dlatents_out = G.get_output_for(latents,
                                                          labels,
                                                          is_training=True,
                                                          return_dlatents=True)
    fake_scores_out = D.get_output_for(fake_images_out,
                                       labels,
                                       is_training=True)

    fake_scores_out_without_rotation = tf.concat([
        fake_scores_out[:, :rotation_offset],
        fake_scores_out[:, rotation_offset + 2:]
    ],
                                                 axis=-1)

    labels_rotation = labels[:, rotation_offset:rotation_offset + 2]

    disc_pred_rotations = fake_scores_out[:,
                                          rotation_offset:rotation_offset + 2]

    loss = tf.nn.softplus(
        -tf.reduce_sum(fake_scores_out_without_rotation, axis=1,
                       keepdims=True))  # -log(1-sigmoid(fake_scores_out))
    loss = autosummary('Loss/generator', loss)

    rotation_distance = tf.norm(labels_rotation - disc_pred_rotations,
                                axis=-1,
                                keepdims=True)

    rotation_distance = rotation_distance * tf.reduce_max(
        tf.ceil(tf.abs(labels_rotation)),
        axis=-1)  # remove non set rotation labels
    rotation_distance = autosummary('Loss/rotation_distance/generator',
                                    rotation_distance)
    loss += tf.square(rotation_distance) * 10

    # Path length regularization.
    with tf.name_scope('PathReg'):
        # Evaluate the regularization term using a smaller minibatch to conserve memory.
        if pl_minibatch_shrink > 1:
            pl_minibatch = minibatch_size // pl_minibatch_shrink
            pl_latents = tf.random_normal([pl_minibatch] +
                                          G.input_shapes[0][1:])
            pl_labels = training_set.get_random_labels_tf(pl_minibatch)
            fake_images_out, fake_dlatents_out = G.get_output_for(
                pl_latents, pl_labels, is_training=True, return_dlatents=True)

        # Compute |J*y|.
        pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(
            np.prod(G.output_shape[2:]))
        pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise),
                                [fake_dlatents_out])[0]
        pl_lengths = tf.sqrt(
            tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
        pl_lengths = autosummary('Loss/pl_lengths', pl_lengths)

        # Track exponential moving average of |J*y|.
        with tf.control_dependencies(None):
            pl_mean_var = tf.Variable(name='pl_mean',
                                      trainable=False,
                                      initial_value=0.0,
                                      dtype=tf.float32)
        pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) -
                                            pl_mean_var)
        pl_update = tf.assign(pl_mean_var, pl_mean)

        # Calculate (|J*y|-a)^2.
        with tf.control_dependencies([pl_update]):
            pl_penalty = tf.square(pl_lengths - pl_mean)
            pl_penalty = autosummary('Loss/pl_penalty', pl_penalty)

        # Apply weight.
        #
        # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
        # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
        #
        # gamma_pl = pl_weight / num_pixels / num_affine_layers
        # = 2 / (r^2) / (log2(r) * 2 - 2)
        # = 1 / (r^2 * (log2(r) - 1))
        # = ln(2) / (r^2 * (ln(r) - ln(2))
        #
        reg = pl_penalty * pl_weight

        # Interpolation Reg
        label_int_pl = labels[:1]
        random_angle = tf.random_uniform([1]) * 2 * np.pi
        interpolation_rotation_cos = tf.expand_dims(tf.cos(random_angle),
                                                    axis=-1)
        interpolation_rotation_sin = tf.expand_dims(tf.sin(random_angle),
                                                    axis=-1)
        label_int_pl_1 = tf.concat([
            label_int_pl[:, :rotation_offset], interpolation_rotation_cos,
            interpolation_rotation_sin, label_int_pl[:, rotation_offset + 2:]
        ],
                                   axis=1)

        random_sign = tf.where(
            tf.random_uniform([1], -1, 1) > 0, tf.ones([1]), -1 * tf.ones([1]))
        random_angle = random_angle + rotation_step_size * 2 * 2 * np.pi * random_sign
        interpolation_rotation_cos = tf.expand_dims(tf.cos(random_angle),
                                                    axis=-1)
        interpolation_rotation_sin = tf.expand_dims(tf.sin(random_angle),
                                                    axis=-1)
        label_int_pl_2 = tf.concat([
            label_int_pl[:, :rotation_offset], interpolation_rotation_cos,
            interpolation_rotation_sin, label_int_pl[:, rotation_offset + 2:]
        ],
                                   axis=1)

        label_interpolate = tfutil.slerp(label_int_pl_1, label_int_pl_2, 0.5)

        pl_grads = tf.gradients(
            G.get_output_for(latents[:1],
                             label_interpolate,
                             randomize_noise=False), [label_int_pl_1])[0]
        int_pl_lengths = tf.norm(pl_grads, axis=-1, keepdims=True)

        with tf.control_dependencies(None):
            int_pl_mean_var = tf.Variable(name='int_pl_mean',
                                          trainable=False,
                                          initial_value=0.0,
                                          dtype=tf.float32)
        int_pl_mean = int_pl_mean_var + pl_decay * (
            tf.reduce_mean(int_pl_lengths) - int_pl_mean_var)
        int_pl_update = tf.assign(int_pl_mean_var, int_pl_mean)

        with tf.control_dependencies([int_pl_update]):
            int_pl_penalty = tf.square(int_pl_lengths - int_pl_mean)
            # clip penalty
            int_pl_penalty = tf.clip_by_value(int_pl_penalty, 0.0,
                                              int_reg_clip)
            int_pl_penalty = autosummary('Loss/int_pl_penalty', int_pl_penalty)

        reg += int_pl_penalty

    return loss, reg