def angle_to_theta(angles): """ algle to 2x3 transform matrix theta :param angles: assume radian :return: outdim : [angleshapes, 2, 3] """ c = tf.cos(angles) s = tf.sin(angles) z = tf.zeros_like(angles) t = tf.stack([[c, -s, z], [s, c, z]]) t = t.shiftdim(-2) return t
def alpha_composite(images, order=None, **whileopt): """ example alpha_composite([fg1, ... bg]) shape [B,H,W,C], C==4 alpha_composite(fg1, ... bg) == alpha_composite([fg1, ... bg]) alpha_composite(batch_layered_images) # shape [B, Layer, H, W, C], C= 4 :param images: order from fg to bg [bhwc] assert c == 4 (RGBA) :param order: None for guess, 'BL' or 'LB', meaning of 2 leading axis. check this value and decide order if arg is a tensor. :return: composed images """ # https://en.wikipedia.org/wiki/Alpha_compositing # alpha_out = alpha_fg + alpha_bg * ( 1 - alpha_fg) # if alpha_out: # rgb_out = (rgb_fg * alpha_fg + rgb_bg * alpha_bg * ( 1 - alpha_fg)) / alpha_out # else: # rgb_out = 0 if len(images) == 1: # alpha_composite for layered tensor # images : shape [ Layer, B, H, W, C] images = images[0] # assert tensor.. if images.ndim == 4 and order is None: # [LHWC] Layer, H, W, C pass elif images.ndim == 5 and order == 'BL': images = images.transpose([1, 0, 2, 3, 4]) else: raise ValueError('check image tensor and order arguments') # stack all images # assert all rgba # [Layer, B, H, W, C] else: images = tf.stack(images) def step(bg, fg): alpha_fg = fg[..., -1:] alpha_bg = bg[..., -1:] rgb_fg = fg[..., :-1] rgb_bg = bg[..., :-1] alpha = alpha_fg + alpha_bg * (1. - alpha_fg) visible = tf.not_equal(alpha, 0.) # nan check alphadiv = tf.select(visible, alpha, 1.) rgb = tf.select(visible, (rgb_fg * alpha_fg + rgb_bg * alpha_bg * (1. - alpha_fg))/alphadiv, 0.) out = tf.concat(-1, [rgb, alpha]) return out composed = tf.foldleft(step, images, **whileopt) return composed
def deconv(x, outdim, kernel, stride=1, padding='SAME', initializer=tf.he_uniform, bias=False, extra=None, **kwargs): nd = x.ndim - 2 out_shape = _deconv_outshape(nd, x.dims, outdim, kernel, stride, padding, extra) oshape = tf.TensorShape(out_shape) if out_shape[0] is None: out_shape[0] = tf.shape(x)[0] out_shape = tf.stack(out_shape) kernel_shape = _kernel_shape(nd, kernel, outdim, x.dims[-1]) # swap in and out channel stride = _stride_shape(nd, stride) # stride W = tf.get_weight('W', shape=kernel_shape, initializer=initializer(kernel_shape)) if nd == 2: out = tf.nn.conv2d_transpose(x, W, out_shape, strides=stride, padding=padding) elif nd == 3: out = tf.nn.conv3d_transpose(x, W, out_shape, strides=stride, padding=padding) else: raise NotImplementedError('not implementd for ndim [{0}]'.format(nd)) if bias: b = tf.get_bias('b', shape=(outdim, ), initializer=tf.zeros_initializer(), **kwargs) out = tf.nn.bias_add(out, b) out.set_shape(oshape) return out
def rand_bbox(shape, count, starts=None, sizes=None, dtype=tf.bool): if starts is None: starts = [(0.0, 1.0), (0.0, 1.0)] if sizes is None: sizes = [(0.0, 1.0), (0.0, 1.0)] rand_shape = tf.stack([shape[0], count, 1], axis=0).to_int32() xstart = tf.random_uniform(shape=rand_shape, minval=starts[0][0], maxval=starts[0][1]) ystart = tf.random_uniform(shape=rand_shape, minval=starts[1][0], maxval=starts[1][1]) xsize = tf.random_uniform(shape=rand_shape, minval=sizes[0][0], maxval=sizes[0][1]) ysize = tf.random_uniform(shape=rand_shape, minval=sizes[1][0], maxval=sizes[1][1]) bbox = tf.concat(-1, [xstart, ystart, xsize, ysize]) bbox.set_shape([shape[0], None, 4]) return bbox
def composite(images, order=None, **whileopt): """ example composite([fg1, ... bg]) shape [B,H,W,C], C==3 composite(fg1, ... bg) == alpha_composite([fg1, ... bg]) composite(batch_layered_images) # shape [B, Layer, H, W, C], C= 3 :param images: order from fg to bg [bhwc] assert c == 4 (RGBA) :param order: None for guess, 'BL' or 'LB', meaning of 2 leading axis. check this value and decide order if arg is a tensor. :return: composed images """ # https://en.wikipedia.org/wiki/Alpha_compositing # alpha_out = alpha_fg + alpha_bg * ( 1 - alpha_fg) # if alpha_out: # rgb_out = (rgb_fg * alpha_fg + rgb_bg * alpha_bg * ( 1 - alpha_fg)) / alpha_out # else: # rgb_out = 0 if len(images) == 1: # alpha_composite for layered tensor # images : shape [ Layer, B, H, W, C] images = images[0] # assert tensor.. if images.ndim == 4 and order is None: # [LHWC] Layer, H, W, C pass elif images.ndim == 5 and order == 'BL': images = images.transpose([1, 0, 2, 3, 4]) else: raise ValueError('check image tensor and order arguments') # stack all images # assert all rgba # [Layer, B, H, W, C] else: images = tf.stack(images) def step(bg, fg): transparent = tf.equal(fg, 0.).all(axis=-1, keepdims=True) res = tf.select(transparent, bg, fg) return res composed = tf.foldleft(step, images, **whileopt) return composed
def sparsemax(logits, axis=-1, name=None): """ :param logits: tf.Tensor :param axis: :param name: :return: """ # https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/sparsemax/python/ops/sparsemax.py logits = tf.shiftdim(logits, -axis - 1) # lshape = logits.shape tshape = tf.shape(logits) dims = tshape[axis] logits = tf.reshape(logits, (-1, dims)) obs = tf.shape(logits)[0] # sort z z = logits - tf.mean(logits, axis=1, keepdims=True) z_sorted = tf.sort(z) # calculate k(z) z_cumsum = tf.cumsum(z_sorted, axis=1) k = tf.range(1, dims + 1).astype(dtype=logits.dtype) z_check = 1 + k * z_sorted > z_cumsum k_z = tf.sum(z_check.astype(tf.int32), axis=1) # calculate tau(z) indices = tf.stack([tf.range(0, obs), k_z - 1], axis=1) tau_sum = tf.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / k_z.astype(logits.dtype) res = tf.maximum(tf.cast(0, logits.dtype), z - tau_z[:, tf.newaxis]) # rotate axis res = tf.reshape(res, tshape) # res.set_shape(lshape) res = tf.shiftdim(res, axis + 1) return res
def transform_3r(img, theta, outsize=None, oob=None): """ :param img: [HWC] :param theta: [2,3] :param outsize: [H',W'] or [H,W] if none :param oob: out of bound value [C,] or [1] :return: [H'W'C] """ assert img.ndim == 3 if outsize is None: outsize = img.shapes[:2] # HWC # # todo improve later # if None in img.dims[0:2]: # outsize = tf.shape(img)[0:2] # else: # outsize = img.dims[0:2] h, w = outsize[0], outsize[1] H, W, C = img.shapes # # height, width normalization to (-1, 1) # cx = tf.linspace(-1., 1., w) # cy = tf.linspace(-1., 1., h) # height, width normalization to (-.5, .5) cx = tf.linspace(-0.5, 0.5, w) cy = tf.linspace(-0.5, 0.5, h) xt, yt = tf.meshgrid(cx, cy) # target coord xyt = tf.stack([xt.flat(), yt.flat(), tf.ones((w * h, ))]) # matching source coord [x; y] [2, pixels] xys = theta.dot(xyt) # xs, ys = xys.split() # split along 0 axis # reshape to [2, H', W'] # xys = xys.reshape((2, h, w)) return sampling_xy_3r(img, xys, outsize, oob=oob)
def sampling_xy_3r(img, xys, outsize=None, oob=None): """ differentiable image sampling (with interpolation) :param img: source image [HWC] :param xys: source coord [2, H'*W'] if outsize given :param outsize: [H',W'] or None, xys must has rank3 :return: [B,H',W',C] """ assert img.ndim == 3 oobv = oob if oobv is None: # oobv = tf.zeros(shape=(img.dims[-1]), dtype=tf.float32) # [0., 0., 0.] oobv = 0. # oobv = [0., 0., 0.] oobv = tf.convert_to_tensor(oobv) if outsize is None: outsize = tf.shape(xys)[1:] xys = xys.flat2d() H, W, C = img.shapes WH = tf.stack([W, H]).to_float().reshape((2, 1)) # XYf = (xys + 1.) * WH * 0.5 # scale to HW coord ( + 1 for start from 0) XYf = (xys + 0.5) * WH # * 0.5 # scale to HW coord ( + 1 for start from 0) XYS = tf.ceil(XYf) # left top weight # prepare weights w00 = XYS - XYf # [2, p] w11 = 1. - w00 # [2, p] # get near 4 pixels per pixel XYS = XYS.to_int32() # [2, p] # todo check xy order XYs = XYS - 1 Xs = tf.stack([XYs[0], XYS[0]]) Ys = tf.stack([XYs[1], XYS[1]]) # get mask of outof bound # leave option for filling value Xi = Xs.clip_by_value(0, W - 1) Yi = Ys.clip_by_value(0, H - 1) inb = tf.logical_and(Xi.equal(Xs), Yi.equal(Ys)) # [2, p] inb = tf.reduce_any(inb, axis=0, keepdims=True) # all oob? [1, p]- # inb = inb.expand_dims(2).to_float() # [1, p] inb = inb.reshape((-1, 1)).to_float() # [p, 1] 1 for channel # get 4 pixels [p, C] p00 = getpixel(img, tf.stack([Yi[0], Xi[0]]).T) p01 = getpixel(img, tf.stack([Yi[0], Xi[1]]).T) p10 = getpixel(img, tf.stack([Yi[1], Xi[0]]).T) p11 = getpixel(img, tf.stack([Yi[1], Xi[1]]).T) # stacked nearest : [4, p, C] near4 = tf.stack([p00, p01, p10, p11], axis=0) # XYw : 4 near point weights [4, pixel] w4 = tf.stack([ w00[1] * w00[0], # left top w00[1] * w11[0], # right top w11[1] * w00[0], # left bottom w11[1] * w11[0] ]) # right bottom # weighted sum of 4 nearest pixels broadcasting w4 = w4.reshape((4, -1, 1)) # interpolated = tf.sum(w4 * near4.to_float(), axis=1) # [p, C] interpolated = tf.sum(w4 * near4.to_float(), axis=0) # [p, C] # assign oob value # fill oob by broadcasting oobv = oobv.reshape((1, -1)) # [p, C] interpolated = interpolated * inb + oobv * (1. - inb) output = interpolated.reshape((outsize[0], outsize[1], C)) # reshape [p, C] => [H', W', C] return output