def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h_branch = h # Convblock 3 h = conv_unit(h_branch, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u0 = conv_unit(h_branch, "u0", 10, k=1, s=1, p=0, act=act, test=test) u0 = F.average_pooling(u0, (6, 6)) with nn.parameter_scope("u0bn"): u0 = PF.batch_normalization(u0, batch_stat=not test) log_var = F.reshape(u0, (u0.shape[0], np.prod(u0.shape[1:]))) # Uncertainty for uncertainty u1 = conv_unit(h_branch, "u1", 10, k=1, s=1, p=0, act=act, test=test) u1 = F.average_pooling(u1, (6, 6)) with nn.parameter_scope("u1bn"): u1 = PF.batch_normalization(u1, batch_stat=not test) log_s = F.reshape(u1, (u1.shape[0], np.prod(u1.shape[1:]))) return pred, log_var, log_s
def cnn_model_003(ctx, h, act=F.elu, do=True, test=False): with nn.context_scope(ctx): if not test: b, c, s, s = h.shape h = F.image_augmentation(h, (c, s, s), min_scale=1.0, max_scale=1.5, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True) # Convblock0 h = conv_unit(h, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (6, 6)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def cnn_model_003(ctx, x, act=F.relu, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) # Convblock 3 h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) return h
def attention(k, q, v, div_dim=True, softmax=True): v_shape = v.shape k = F.identity(k) q = F.identity(q) k = F.reshape(k, (k.shape[0], np.prod(k.shape[1:]))) q = F.reshape(q, (q.shape[0], np.prod(q.shape[1:]))) v = q # F.reshape is inplace cf = F.affine(q, F.transpose(k, (1, 0))) if div_dim: dim = np.prod(v_shape[1:]) cf /= np.sqrt(dim) h = cf if softmax: h = F.softmax(h) h = F.affine(h, v)x h = F.reshape(h, v_shape) return h
def cnn_model_003(ctx, x, act=F.relu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (6, 6)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def _spectral_norm_outer_most_dim_backward(dw_sn, w, u, itr=1, eps=1e-12): # Forward recomputation w_shape = w.shape d0 = np.prod(w.shape[0:-1]) # In d1 = w.shape[-1] # Out w = F.reshape(w, [d0, d1]) u = F.reshape(u, [d1, 1]) # Power method for _ in range(itr): # v v = F.affine(w, u) v = v / ((F.sum(v**2.0, keepdims=True) + eps)**0.5) v = F.reshape(v, [1, d0]) # u u = F.affine(v, w) u = u / ((F.sum(u**2.0, keepdims=True) + eps)**0.5) u = F.reshape(u, [d1, 1]) # No grad u = no_grad(u) v = no_grad(v) # Spectral normalization vw = F.affine(v, w) sigma = F.affine(vw, u) w_sn = w / sigma # The fowllowing process is not necessary for gradient calculation # w_sn = F.reshape(w_sn, w_shape) # Backward for spectral norm dw_sn = dw_sn.reshape(w.shape) # Sum for broadcast backward S = sum_for_arithmetics(dw_sn * w_sn, sigma) # Add batch axis S = S.reshape((1, ) + S.shape) u = u.reshape((1, ) + u.shape) v = v.reshape((1, ) + v.shape) m = F.batch_matmul(v, S, transpose_a=True) m = F.batch_matmul(m, u, transpose_b=True) # Remove batch axis m = m.reshape((m.shape[1], m.shape[2])) dw = (dw_sn - m) / sigma dw = dw.reshape(w_shape) return dw, None
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 28 -> 14 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 14 -> 7 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 7 -> 5 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (5, 5)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (5, 5)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def equivariance_jacobian_loss(kp_driving_jacobian, arithmetic_jacobian, trans_kp_jacobian, weight): jacobian_transformed = F.batch_matmul(arithmetic_jacobian, trans_kp_jacobian) normed_driving = F.reshape( F.batch_inv( F.reshape(kp_driving_jacobian, (-1, ) + kp_driving_jacobian.shape[-2:])), kp_driving_jacobian.shape) normed_transformed = jacobian_transformed value = F.batch_matmul(normed_driving, normed_transformed) eye = nn.Variable.from_numpy_array(np.reshape(np.eye(2), (1, 1, 2, 2))) jacobian_loss = F.mean(F.absolute_error(eye, value)) loss = weight * jacobian_loss return loss
def sum_backward(inputs, axes=None, keep_dims=False): dy = inputs[0] x0 = inputs[1] axes = [i for i in range(x0.ndim)] if axes is None else force_list(axes) if keep_dims: dx0 = F.broadcast(dy, x0.shape) else: shape = [1 if i in axes else s for i, s in enumerate(x0.shape)] dx0 = F.broadcast(F.reshape(dy, shape), x0.shape) return dx0
def get_d_data(conf, flow_hr, gen_outputs, r_targets, rnn_length): """ prepare data for temporal Discriminators """ # 3 frames are used as one entry, the last input images%3 frames are abandoned t_size = int(3 * (rnn_length // 3)) t_gen_output = F.reshape( gen_outputs[:, :t_size, :, :, :], (conf.train.batch_size * t_size, conf.train.crop_size * 4, conf.train.crop_size * 4, 3), inplace=False) t_targets = F.reshape( r_targets[:, :t_size, :, :, :], (conf.train.batch_size * t_size, conf.train.crop_size * 4, conf.train.crop_size * 4, 3), inplace=False) t_batch = conf.train.batch_size * t_size // 3 t_inputs_v_pre_batch = F.identity( flow_hr[:, 0:t_size:3, :, :, :]) # forward motion reused, t_inputs_v_batch = nn.Variable(t_inputs_v_pre_batch.shape) # no motion for middle frames t_inputs_v_batch.data.zero() t_inputs_v_nxt_batch = F.identity( flow_hr[:, -2:-1 - t_size:-3, :, :, :]) # backward motion t_vel = F.stack( *[t_inputs_v_pre_batch, t_inputs_v_batch, t_inputs_v_nxt_batch], axis=2) # batch, t_size/3, 3, FLAGS.crop_size*4, FLAGS.crop_size*4, 2 t_vel = F.reshape(t_vel, (conf.train.batch_size * t_size, conf.train.crop_size * 4, conf.train.crop_size * 4, 2), inplace=False) # Stop gradient to fnet from discriminator, details in TecoGAN supplemental paper t_vel.need_grad = False disc_data = collections.namedtuple( 'disc_data', 't_vel, t_gen_output, t_batch, t_targets, t_size') return disc_data(t_vel=t_vel, t_gen_output=t_gen_output, t_batch=t_batch, t_targets=t_targets, t_size=t_size)
def spectral_normalization_for_affine(w, itr=1, eps=1e-12, input_axis=1, test=False): W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0), False) if test: return W_sn d0 = np.prod(w.shape[0:-1]) # In d1 = np.prod(w.shape[-1]) # Out u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(), False) u = F.reshape(u0, [d1, 1]) # Power method for _ in range(itr): # v v = F.affine(w, u) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [1, d0]) # u u = F.affine(v, w) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [d1, 1]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(v, w) sigma = F.affine(wv, u) sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]), w.shape) w_sn = F.div2(w, sigma, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def easy_pcd(feature_p1, feature_p2, n_filt, name): """ easy 3 level pyramid cascade aligning input: features (feature_p1, feature_p2) feature size: f1 = f2 = [B, N, C, H, W] """ with nn.parameter_scope(name): # L1: level 1, original spatial size l1_fea = F.stack(*[feature_p1, feature_p2], axis=1) batch, num_frames, channels, height, width = l1_fea.shape l1_fea = l1_fea.reshape((-1, channels, height, width)) # L2: level 2, 1/2 spatial size l2_fea = F.leaky_relu(conv2d(l1_fea, n_filt, 3, 2, 1, bias=True, name='fea_l2_conv1') ) l2_fea = F.leaky_relu(conv2d(l2_fea, n_filt, 3, 1, 1, bias=True, name='fea_l2_conv2') ) # L3: level 3, 1/4 spatial size l3_fea = F.leaky_relu(conv2d(l2_fea, n_filt, 3, 2, 1, bias=True, name='fea_l3_conv1') ) l3_fea = F.leaky_relu(conv2d(l3_fea, n_filt, 3, 1, 1, bias=True, name='fea_l3_conv2') ) l1_fea = F.reshape(l1_fea, (batch, num_frames, -1, height, width), inplace=False) l2_fea = F.reshape(l2_fea, (batch, num_frames, -1, height // 2, width // 2), inplace=False) l3_fea = F.reshape(l3_fea, (batch, num_frames, -1, height // 4, width // 4), inplace=False) fea1 = [l1_fea[:, 0, :, :, :], l2_fea[:, 0, :, :, :], l3_fea[:, 0, :, :, :]] fea2 = [l1_fea[:, 1, :, :, :], l2_fea[:, 1, :, :, :], l3_fea[:, 1, :, :, :]] aligned_fea = pcd_align(fea1, fea2) fusion_fea = conv2d(aligned_fea, n_filt, 1, 1, 0, bias=True, name='fusion') return fusion_fea
def forward(self, output, inds, gt, reg_mask, channel_last=False): # TODO refactor loss implementation for channel_last without transposing if channel_last: output = F.transpose(output, (0, 3, 1, 2)) b = inds.shape[0] c = output.shape[1] max_objs = inds.shape[1] # divide by number of : num_objs = F.sum(reg_mask) * 2 f_map_size = output.shape[2] * output.shape[3] output = F.reshape(output, (-1, f_map_size)) inds = F.broadcast(inds.reshape((b, 1, max_objs)), (b, c, max_objs)) inds = inds.reshape((-1, max_objs)) y = output[F.broadcast(F.reshape(F.arange(0, b * c), (b * c, 1)), (b * c, max_objs)), inds].reshape( (b, c, max_objs)) y = F.transpose(y, (0, 2, 1)) loss = F.sum(reg_mask * F.absolute_error(y, gt)) loss = loss / (num_objs + 1e-4) return loss
def __call__(self, z, m): # m has target image shape: (N, emb, H, W) # z: (N, z_dim) N = m.shape[0] H, W = self.image_shape sh = H // (2 ** self.num_upsample) sw = W // (2 ** self.num_upsample) with ps("spade_generator"): with ps("z_embedding"): x = PF.affine(z, 16 * self.nf * sh * sw, w_init=w_init(z, 16 * self.nf * sh * sw)) x = F.reshape(x, (N, 16*self.nf, sh, sw)) with ps("head"): x = self.head_0(x, m) with ps("middle0"): x = self.up(x) x = self.G_middle_0(x, m) with ps("middel1"): if self.num_upsample > 5: x = self.up(x) x = self.G_middle_1(x, m) with ps("up0"): x = self.up(x) x = self.up_0(x, m) with ps("up1"): x = self.up(x) x = self.up_1(x, m) with ps("up2"): x = self.up(x) x = self.up_2(x, m) with ps("up3"): x = self.up(x) x = self.up_3(x, m) if self.num_upsample > 6: with ps("up4"): x = self.up(x) x = self.up_4(x, m) with ps("last_conv"): x = PF.convolution(F.leaky_relu(x, 2e-1), 3, kernel=(3, 3), pad=(1, 1), w_init=w_init(x, 3)) x = F.tanh(x) return x
def __call__(self, conv_in, h=None): v_stack_in = conv_in h_stack_in = conv_in features = [] with nn.parameter_scope('ConditionalPixelCNN'): for i in range(self.num_layers): if i == 0: kernel_shape = (7, 7) mask_type = self.mask_type_A residual = False else: kernel_shape = (3, 3) mask_type = self.mask_type_B residual = True v_stack_gated, v_stack_conv = self.gated_conv( v_stack_in, kernel_shape, h, mask_type=mask_type, return_payload=True, scope_name='vertical_stack_gated_' + str(i)) h_stack_gated = self.gated_conv( h_stack_in, (1, kernel_shape[0]), h, mask_type=mask_type, payload=v_stack_conv, scope_name='horizontal_stack_gated_' + str(i)) h_stack_conv = self.gated_conv( h_stack_gated, (1, 1), h, mask_type=mask_type, gated=False, scope_name='horizontal_stack_conv_' + str(i)) if residual: h_stack_conv += h_stack_in v_stack_in = v_stack_gated h_stack_in = h_stack_conv fc_1 = self.gated_conv(h_stack_in, (1, 1), gated=False, scope_name='fc_1') fc_2 = PF.convolution(fc_1, self.out_channels, (1, 1), apply_w=self.mask_type_B, name='fc_2') fc_2 = F.transpose(fc_2, (0, 2, 3, 1)) fc_2 = F.reshape(fc_2, (-1, fc_2.shape[-1]), inplace=True) return fc_2
def get_fnet_output(conf, rnn_length, frame_t_pre, frame_t, scope_name): """ Return the flow estimations for LR and HR from flow-estimator network """ fnet_input = F.concatenate(frame_t_pre, frame_t) fnet_input = F.reshape(fnet_input, (conf.train.batch_size * (rnn_length - 1), conf.train.crop_size, conf.train.crop_size, 2 * 3)) with nn.parameter_scope(scope_name + "fnet"): flow_lr = flow_estimator(fnet_input) flow_hr = upscale_four(flow_lr * 4.0) # a linear up-sampling flow_hr = F.reshape(flow_hr, (conf.train.batch_size, (rnn_length - 1), conf.train.crop_size * 4, conf.train.crop_size * 4, 2), inplace=False) fnet_output = collections.namedtuple('fnet_output', 'flow_lr, flow_hr') return fnet_output( flow_lr=flow_lr, flow_hr=flow_hr, )
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False): w_shape = w.shape W_sn = get_parameter_or_create("W_sn", w_shape, ConstantInitializer(0), False) if test: return W_sn d0 = w.shape[0] # Out d1 = np.prod(w.shape[1:]) # In w = F.reshape(w, [d0, d1], inplace=False) u0 = get_parameter_or_create("singular-vector", [d0], NormalInitializer(), False) u = F.reshape(u0, [1, d0]) # Power method for _ in range(itr): # v v = F.affine(u, w) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [d1, 1]) # u u = F.affine(w, v) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [1, d0]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(w, v) sigma = F.affine(u, wv) w_sn = F.div2(w, sigma) w_sn = F.reshape(w_sn, w_shape) w_sn = F.identity(w_sn, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def get_preds_fromhm(hm, center=None, scale=None): """Obtain (x,y) coordinates given a set of N heatmaps. If the center and the scale is provided the function will return the points also in the original coordinate frame. Arguments: hm {numpy.array} -- the predicted heatmaps, of shape [B, N, W, H] Keyword Arguments: center {numpy.array} -- the center of the bounding box (default: {None}) scale {float} -- face scale (default: {None}) """ idx = F.max(F.reshape( hm, (hm.shape[0], hm.shape[1], hm.shape[2] * hm.shape[3])), axis=2, only_index=True) idx.d += 1 idx = F.reshape(idx, (1, 68, 1)) preds = F.concatenate(idx, idx, axis=2) preds.d[..., 0] = preds[..., 0].apply( d=(preds[..., 0].d - 1) % hm.shape[3] + 1).d preds.d[..., 1] = preds[..., 1].apply( d=(preds[..., 1].d + 1) // hm.shape[2] + 1).d for i in range(preds.shape[0]): for j in range(preds.shape[1]): hm_ = hm[i, j, :] pX, pY = int(preds[i, j, 0].d) - 1, int(preds[i, j, 1].d) - 1 if pX > 0 and pX < 63 and pY > 0 and pY < 63: preds.d[i, j] += np.sign(hm_.d[pY, pX + 1] - hm_.d[pY, pX - 1]) * .25, np.sign( hm_.d[pY + 1, pX] - hm_.d[pY - 1, pX]) * .25 preds.d -= .5 preds_orig = F.constant(shape=preds.shape) if center is not None and scale is not None: for i in range(hm.shape[0]): for j in range(hm.shape[1]): d = transform(list(preds.d[i][j]), center, scale, hm.shape[2], True) preds_orig.d[i, j] = d[0], d[1] return preds, preds_orig
def mse(x, y, mask=None, eps=1e-5): # l2 distance and reduce mean se = F.squared_error(x, y) if mask is not None: assert se.shape[:2] == mask.shape[:2] se *= F.reshape(mask, se.shape) return F.sum(se) / (F.sum(mask) + eps) return F.mean(se)
def mae(x, y, mask=None, eps=1e-5): # l1 distance and reduce mean ae = F.absolute_error(x, y) if mask is not None: assert ae.shape[:2] == mask.shape[:2] ae *= F.reshape(mask, ae.shape) return F.sum(ae) / (F.sum(mask) + eps) return F.mean(ae)
def position_encoding(x: nn.Variable) -> nn.Variable: batch_size, sequence_length, dim = x.shape position = F.reshape(F.arange(0, sequence_length), shape=(sequence_length, 1)) # -> (sequence_length, 1) div_term = F.exp(F.arange(0, dim, 2) * -(np.log(10000.0) / dim)) # -> (dim//2, ) sin_val = F.sin(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) cos_val = F.cos(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) ret = [] for i in range(dim): if i % 2 == 0: ret.append(sin_val[:, i // 2:i // 2 + 1]) else: ret.append(cos_val[:, i // 2:i // 2 + 1]) pe = F.reshape(F.concatenate(*ret, axis=1), shape=(1, sequence_length, dim)) return x + F.broadcast(pe, shape=x.shape)
def positional_encoding(x, N=6, include_input=True): """ Args: x: Input (B, R, 3) N: Number of bands, N=6 for implicit network and N=4 for rendering network. """ gamma = [x] if include_input else [] bands = 2**np.arange(0, N + 1) data_holder = nn.Variable if isinstance(x, nn.Variable) else nn.NdArray bands = data_holder.from_numpy_array(bands) bands = F.reshape(bands, tuple([1] * x.ndim) + (N + 1, )) \ * F.reshape(x, x.shape + (1, )) bands = F.reshape(bands, bands.shape[:-2] + (-1, )) cos_x = F.cos(bands) sin_x = F.sin(bands) gamma += [cos_x, sin_x] gamma = F.concatenate(*gamma, axis=-1) return gamma
def test_nnp_graph_reshape(tmpdir, variable_batch_size, batch_size, shape): x = nn.Variable([10, 2, 10, 10]) h = PF.convolution(x, 4, kernel=(3, 3), stride=(1, 1)) y = F.reshape(h, shape=shape) x2, y2 = check_nnp_graph_save_load( tmpdir, x, y, batch_size, variable_batch_size) if not variable_batch_size: return shape2 = list(y.shape) shape2[0] = batch_size x2.d = np.random.randn(*x2.shape) y2.forward()
def vgg_pre_process(x): x_bgr = F.concatenate(x[:, 2:3, :, :], x[:, 1:2, :, :], x[:, 0:1, :, :], axis=1) # tensor_bgr = tensor[:, [2, 1, 0], ...] x_sub = F.reshape( nn.Variable.from_numpy_array( np.array([0.40760392, 0.45795686, 0.48501961])), (1, 3, 1, 1)) x_bgr_ml = x_bgr - x_sub x_rst = x_bgr_ml * 255 return x_rst
def pf_affine(r, num_classes=1000, channel_last=False): # Initializer supposes the final classifaction layer fan_in = int(np.prod(r.shape[1:])) k = 1 / np.sqrt(fan_in) init = I.UniformInitializer((-k, k), rng=RNG) r = PF.convolution(r, num_classes, (1, 1), channel_last=channel_last, w_init=init, b_init=init, name='fc') return F.reshape(r, (r.shape[0], -1), inplace=False)
def test_nnp_graph_reshape(tmpdir, variable_batch_size, batch_size, shape): x = nn.Variable([10, 1, 28, 28, 10, 10]) y = F.reshape(x, shape=shape) x2, y2 = check_nnp_graph_save_load(tmpdir, x, y, batch_size, variable_batch_size) if not variable_batch_size: return shape2 = list(y.shape) shape2[0] = batch_size x2.d = np.random.randn(*x2.shape) y2.forward() assert np.allclose(y2.d, x2.d.reshape(shape2))
def create_network(batchsize, imheight, imwidth, args, seen): import gc gc.collect() nnabla_ext.cuda.clear_memory_cache() anchors = args.num_anchors classes = args.num_classes yolo_x = nn.Variable((batchsize, 3, imheight, imwidth)) target = nn.Variable((batchsize, 50 * 5)) yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False) nB = yolo_features.shape[0] nA = args.num_anchors nC = args.num_classes nH = yolo_features.shape[2] nW = yolo_features.shape[3] # Bouding box regression loss # pred.shape = [nB, nA, 4, nH, nW] output = F.reshape(yolo_features, (nB, nA, (5 + nC), nH, nW)) xy = F.sigmoid(output[:, :, :2, ...]) wh = output[:, :, 2:4, ...] bbox_pred = F.concatenate(xy, wh, axis=2) conf_pred = F.sigmoid(output[:, :, 4:5, ...]) cls_pred = output[:, :, 5:, ...] region_loss_targets = RegionLossTargets(nC, args.anchors, seen, args.coord_scale, args.noobject_scale, args.object_scale, args.class_scale, args.thresh) tcoord, mcoord, tconf, mconf, tcls, mcls = region_loss_targets( bbox_pred, target) for v in tcoord, mcoord, tconf, mconf, tcls, mcls: v.need_grad = False # Bounding box regression bbox_loss = F.sum(F.squared_error(bbox_pred, tcoord) * mcoord) # Conf (IoU) regression loss conf_loss = F.sum(F.squared_error(conf_pred, tconf) * mconf) # Class probability regression loss cls_loss = F.sum(F.softmax_cross_entropy(cls_pred, tcls, axis=2) * mcls) # Note: # loss is devided by 2.0 due to the fact that the original darknet # code doesn't multiply the derivative of square functions by 2.0 # in region_layer.c. loss = (bbox_loss + conf_loss) / 2.0 + cls_loss return yolo_x, target, loss, region_loss_targets
def kp2gaussian(kp, spatial_size, kp_variance): mean = kp['value'] coordinate_grid = make_coordinate_grid(spatial_size) number_of_leading_dimensions = len(mean.shape) - 1 shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape coordinate_grid = F.reshape(coordinate_grid, shape) coordinate_grid = F.broadcast( coordinate_grid, mean.shape[:number_of_leading_dimensions] + coordinate_grid.shape[number_of_leading_dimensions:]) # Preprocess kp shape shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2) mean = F.reshape(mean, shape, inplace=False) mean_sub = coordinate_grid - mean out = F.exp(-0.5 * F.sum( (mean_sub**2), axis=mean_sub.ndim - 1) / kp_variance) return out
def upscale_four(inputs, scope='upscale_four'): """ Mimic the tensorflow bilinear-upscaling for a fix ratio of 4. """ with nn.parameter_scope(scope): b, h, w, c = inputs.shape p_inputs = F.concatenate( inputs, inputs[:, -1:, :, :], axis=1) # pad bottom p_inputs = F.concatenate( p_inputs, p_inputs[:, :, -1:, :], axis=2) # pad right hi_res_bin = [ [ inputs, # top-left p_inputs[:, :-1, 1:, :] # top-right ], [ p_inputs[:, 1:, :-1, :], # bottom-left p_inputs[:, 1:, 1:, :] # bottom-right ] ] hi_res_array = [] for hi in range(4): for wj in range(4): hi_res_array.append( hi_res_bin[0][0] * (1.0 - 0.25 * hi) * (1.0 - 0.25 * wj) + hi_res_bin[0][1] * (1.0 - 0.25 * hi) * (0.25 * wj) + hi_res_bin[1][0] * (0.25 * hi) * (1.0 - 0.25 * wj) + hi_res_bin[1][1] * (0.25 * hi) * (0.25 * wj) ) hi_res = F.stack(*hi_res_array, axis=3) # shape (b,h,w,16,c) hi_res_reshape = F.reshape(hi_res, (b, h, w, 4, 4, c)) hi_res_reshape = F.transpose(hi_res_reshape, (0, 1, 3, 2, 4, 5)) hi_res_reshape = F.reshape(hi_res_reshape, (b, h*4, w*4, c)) return hi_res_reshape
def build_model(): x = nn.Variable((batch_size, sentence_length_source)) mask = get_mask(x) y = nn.Variable((batch_size, sentence_length_target)) enc_input = time_distributed(PF.embed)( x, vocab_size_source, embedding_size, name='enc_embeddings') * mask # -> (batch_size, sentence_length_source, embedding_size) dec_input = F.concatenate(F.constant(w2i_target['<bos>'], shape=(batch_size, 1)), y[:, :sentence_length_target - 1], axis=1) dec_input = time_distributed(PF.embed)(dec_input, vocab_size_target, embedding_size, name='dec_embeddings') # -> (batch_size, sentence_length_target, embedding_size) # encoder with nn.parameter_scope('encoder'): enc_output, c, h = lstm(enc_input, hidden, mask=mask, return_sequences=True, return_state=True) # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden) # decoder with nn.parameter_scope('decoder'): dec_output = lstm(dec_input, hidden, initial_state=(c, h), return_sequences=True) # -> (batch_size, sentence_length_target, hidden) output = time_distributed(PF.affine)(dec_output, vocab_size_target, name='output') # -> (batch_size, sentence_length_target, vocab_size_target) t = F.reshape(y, (batch_size, sentence_length_target, 1)) entropy = time_distributed_softmax_cross_entropy(output, t) mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'. count = F.sum(mask, axis=1) entropy *= mask loss = F.mean(F.sum(entropy, axis=1) / count) return x, y, loss
def __init__(self, model="alex", params_dir="./converted_weights", spatial=False, apply_scale=True): """ Args: model(str): network used for feature extractor. "alex"(AlexNet) or "vgg"(VGG16). AlexNet is reported to work best. params_dir(str): directory containing the weights. Note that the weights must be the same as the one used for the paper. spatial(bool): if True, returns the distance map instead of single value. False by default. apply_scale(bool): if True, the input values will be scaled. True by default. using version 0.1 requires this scaling. """ super(LPIPS, self).__init__() self.model = model params_path = os.path.join(params_dir, f"{model}_with_LPIPS.h5") if self.model == "alex": print("Use AlexNet's features") load_parameters(params_path) self.feat_extractor = get_alex_feat elif self.model == "vgg": print("Use VGG's features") load_parameters(params_path) self.feat_extractor = get_vgg_feat else: # currently only vgg and alexnet are supported. return NotImplementedError self.spatial = spatial self.apply_scale = apply_scale self._shift = F.reshape( nn.Variable.from_numpy_array([-.030, -.088, -.188]), (1, 3, 1, 1)) self._scale = F.reshape( nn.Variable.from_numpy_array([.458, .448, .450]), (1, 3, 1, 1))
def top_k_error(target_action, target_action_type, target_action_mask, rule_prob, terminal_gen_action_prob, token_prob, copy_prob, k=5): batch_size, max_action_length, _ = target_action.shape _, _, rule_num = rule_prob.shape _, _, token_num = token_prob.shape _, _, max_query_length = copy_prob.shape # (batch_size, max_action_length) rule_mask, token_mask, copy_mask = F.split(target_action_type, axis=2) # (batch_size, max_action_length) target_rule, target_token, target_copy = F.split(target_action, axis=2) target_rule = F.reshape(target_rule, (batch_size, max_action_length, 1)) # (batch_size, max_action_length) gen_token_prob, copy_token_prob = F.split(terminal_gen_action_prob, axis=2) gen_token_prob = F.reshape(gen_token_prob, (batch_size, max_action_length, 1)) gen_token_prob = F.broadcast(gen_token_prob, (batch_size, max_action_length, token_num)) copy_token_prob = F.reshape(copy_token_prob, (batch_size, max_action_length, 1)) copy_token_prob = F.broadcast( copy_token_prob, (batch_size, max_action_length, max_query_length)) # (batch_size, max_action_length, token_num) token_prob = gen_token_prob * token_prob # (batch_size, max_action_length, max_query_length) copy_prob = copy_token_prob * copy_prob # (batch_size, max_action_length, token_num + max_query_length) gen_or_copy = F.concatenate(token_prob, copy_prob, axis=2) # (batch_size, max_action_length) token_label = token_mask * target_token + (copy_mask * (target_copy + token_num)) token_label = F.reshape(token_label, (batch_size, max_action_length, 1)) # (batch_size, max_action_length, 1) rule_err = F.top_n_error(rule_prob, target_rule, axis=2, n=k) rule_err = F.reshape(rule_err, (batch_size, max_action_length)) # (batch_size, max_action_length, 1) token_err = F.top_n_error(gen_or_copy, token_label, axis=2, n=k) token_err = F.reshape(token_err, (batch_size, max_action_length)) # (batch_size, max_action_length) err = rule_mask * rule_err + (token_mask + copy_mask) * token_err # (batch_size,) num = F.sum(rule_mask, axis=1) + F.sum(token_mask, axis=1) + F.sum( copy_mask, axis=1) # (batch_size,) err = F.sum(err, axis=1) # (batch_size,) err = err / (num + 1e-7) return F.mean(err)
def sigmoid_ce(logits, value, mask=None, eps=1e-5): # sigmoid cross entropy and reduce_mean sce = F.sigmoid_cross_entropy( logits, F.constant(val=value, shape=logits.shape)) if mask is not None: assert sce.shape[:2] == mask.shape[:2] sce *= F.reshape(mask, sce.shape) return F.sum(sce) / (F.sum(mask) + eps) return F.mean(sce)
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False): """Attention block""" x = h # 1x1 convolutions b, c, s0, s1 = h.shape c_r = c // r assert c_r > 0 f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f", with_bias=False, sn=sn, test=test) g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g", with_bias=False, sn=sn, test=test) h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h", with_bias=False, sn=sn, test=test) # Attend attn = F.batch_matmul(f_x.reshape([b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True) attn = F.softmax(attn, 1) h_x = h_x.reshape([b, c, -1]) o = F.batch_matmul(h_x, attn) o = F.reshape(o, [b, c, s0, s1]) # Shortcut gamma = get_parameter_or_create("gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters) y = gamma * o + x return y
def anti_alias_interpolate(input, channels, scale): # no trainable parameters exist. if scale == 1.0: # no interpolation executed return F.identity(input) sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 ka = kernel_size // 2 if kernel_size % 2 == 0: kb = ka - 1 else: kb = ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] kernel = 1 xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1)) ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1)) meshgrids = (F.tile(xa, (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1))) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2)) kernel = kernel / F.sum(kernel, keepdims=True) # Reshape to depthwise convolutional weight kernel = F.reshape(kernel, (1, 1) + kernel.shape) kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size)) # if using the pre-computed kernel, no need to compute here. out = F.pad(input, (ka, kb, ka, kb)) out = F.convolution(out, weight=kernel, group=channels) out = F.interpolate(out, scale=(scale, scale), mode="nearest") return out
def cnn_model_003_with_cross_attention(ctx, x_list, act=F.relu, test=False): """With attention before pooling """ with nn.context_scope(ctx): # Convblock0 h0_list = [] for x in x_list: h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h0_list.append(h) # Corss attention ca0 = attention(h0_list[0], h0_list[1], h0_list[1], div_dim=True, softmax=True) ca1 = attention(h0_list[1], h0_list[0], h0_list[0], div_dim=True, softmax=True) # Maxpooing, Batchnorm, Dropout h0_list = [] for h in [ca0, ca1]: h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h0_list.append(h) # Convblock 1 h1_list = [] for h in h0_list: h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h1_list.append(h) # Corss attention ca0 = attention(h1_list[0], h1_list[1], h1_list[1], div_dim=True, softmax=True) ca1 = attention(h1_list[1], h1_list[0], h1_list[0], div_dim=True, softmax=True) # Maxpooing, Batchnorm, Dropout h1_list = [] for h in [ca0, ca1]: h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h1_list.append(h) # Convblock 2 h2_list = [] for h in h1_list: h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h2_list.append(h) # Corss attention ca0 = attention(h2_list[0], h2_list[1], h2_list[1], div_dim=True, softmax=True) ca1 = attention(h2_list[1], h2_list[0], h2_list[0], div_dim=True, softmax=True) # Convblock 3 h3_list = [] for h in [ca0, ca1]: h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) h3_list.append(h) return h3_list