def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args epsilon = self.forward_func.info.args["epsilon"] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[2]: # Simply using " / dy" causes the numerical instability diff = x0 - x1 mask = F.greater_scalar(F.abs(diff), epsilon) maskp = F.greater_scalar(diff, 0.0) maskn = 1.0 - maskp g_dy_ = (g_dx0 - g_dx1) * (maskp - maskn) * mask if accum[2]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def epsilon_insensitive_loss_backward(inputs, epsilon): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] d = x0 - x1 m0 = F.greater_scalar(F.abs(d), epsilon) m1 = 1 - m0 mg = F.greater(x0, x1) ml = 1 - mg m0 = no_grad(m0) mg = no_grad(mg) ml = no_grad(ml) t0 = m0 * mg t1 = -m0 * ml dx0 = dy * (t0 + t1) dx1 = -dx0 return dx0, dx1
def norm_normalization_backward(inputs, p=None, axes=None, eps=1e-12): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] if p is None: p = 2.0 axes = list(range(x0.ndim)) if axes is None else force_list(axes) x_abs = F.abs(x0) x_pow = F.pow_scalar(x_abs, p) x_sum = F.sum(x_pow, axes, keepdims=True) # x_norm = x_sum ** (1./p) # Div2 backward dx = dy * x_sum**(-1. / p) dx_norm = -dy * x0 * x_sum**(-2. / p) dx_norm = sum_for_arithmetics(dx_norm, x_sum) # Norm backward x_sign = no_grad(F.sign(x0)) dx += dx_norm * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign return dx
def norm_backward(inputs, p=None, axes=None, keep_dims=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] if p is None: p = 2.0 axes = list(range(x0.ndim)) if axes is None else force_list(axes) x_abs = F.abs(x0) x_pow = F.pow_scalar(x_abs, p) x_sum = F.sum(x_pow, axes, keepdims=True) # Add axis for mul2 if not keep_dims: shape = list(x0.shape) for a in axes: shape[a] = 1 dy = dy.reshape(shape) x_sign = no_grad(F.sign(x0)) dx = dy * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign return dx
def huber_loss_backward(inputs, delta=1.0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] d = x0 - x1 m0 = F.less_scalar(F.abs(d), delta) m1 = 1 - m0 mg = F.greater(x0, x1) ml = 1 - mg m0 = no_grad(m0) m1 = no_grad(m1) mg = no_grad(mg) ml = no_grad(ml) t0 = 2 * d * m0 t1 = 2 * delta * m1 * mg t2 = -2 * delta * m1 * ml dx0 = dy * (t0 + t1 + t2) dx1 = -dx0 return dx0, dx1
def warp_coordinates(self, coordinates): theta = self.theta theta = F.reshape( theta, theta.shape[:1] + (1,) + theta.shape[1:], inplace=False) if coordinates.shape[0] == self.bs: transformed = F.batch_matmul( F.tile(theta[:, :, :, :2], (1, coordinates.shape[1], 1, 1)), F.reshape(coordinates, coordinates.shape + (1,), inplace=False)) + theta[:, :, :, 2:] else: transformed = F.batch_matmul( F.tile(theta[:, :, :, :2], (1, coordinates.shape[1], 1, 1)), F.tile(F.reshape(coordinates, coordinates.shape + (1,), inplace=False), (self.bs / coordinates.shape[0], 1, 1, 1))) + theta[:, :, :, 2:] transformed = F.reshape( transformed, transformed.shape[:-1], inplace=False) if self.tps: control_points = self.control_points control_params = self.control_params distances = F.reshape( coordinates, (coordinates.shape[0], -1, 1, 2), inplace=False) - F.reshape(control_points, (1, 1, -1, 2)) distances = F.sum(F.abs(distances), axis=distances.ndim - 1) result = distances ** 2 result = result * F.log(distances + 1e-6) result = result * control_params result = F.sum(result, axis=2) result = F.reshape( result, (self.bs, coordinates.shape[1], 1), inplace=False) transformed = transformed + result return transformed
def sample_noise(inpt_size, out_size): _f = lambda x: F.sign(x) * F.pow_scalar(F.abs(x), 0.5) noise = _f(F.randn(shape=(inpt_size + out_size, ))) eps_w = F.batch_matmul(F.reshape(noise[:inpt_size], (1, -1)), F.reshape(noise[inpt_size:], (1, -1)), True) eps_b = noise[inpt_size:] return eps_w, eps_b
def forward(self, x): N, C, H, W = x.shape log_abs = F.log(F.abs(self.scale)) logdet = H*W*F.sum(log_abs) if self.logdet: return self.scale * (x + self.loc), logdet else: return self.scale * (x + self.loc)
def forward_impl(self, inputs, outputs): x = inputs[0].data M = inputs[1].data y = outputs[0].data y.copy_from(x) if not self.training: return Mb = F.max(F.abs(x), keepdims=True) F.maximum2(M, Mb, outputs=[M])
def softsign_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] dx0 = dy * (1 / (1 + F.abs(x0))**2) return dx0
def binary_tanh_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = F.less_scalar(F.abs(x0), 1.0) m0 = no_grad(m0) dx0 = dy * m0 return dx0
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args delta = self.forward_func.info.args["delta"] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[0] or prop_down[1] or prop_down[2]: mask = F.less_scalar(F.abs(x0 - x1), delta) if prop_down[0]: if accum[0]: g_x0 += mask * 2 * dy * (g_dx0 - g_dx1) else: g_x0.copy_from(mask * 2 * dy * (g_dx0 - g_dx1)) if prop_down[1]: if accum[1]: g_x1 += mask * 2 * dy * (g_dx1 - g_dx0) else: g_x1.copy_from(mask * 2 * dy * (g_dx1 - g_dx0)) if prop_down[2]: # Simply using " / dy" causes the numerical instability diff = x0 - x1 pmask = F.greater_scalar(diff, 0.0) nmask = (1.0 - pmask) omask = (1.0 - mask) g_dx_diff = g_dx0 - g_dx1 g_dy_ = 2.0 * g_dx_diff * \ (diff * mask + delta * omask * (pmask - nmask)) if accum[2]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def secant(x0, x1, implicit_function, max_post_itr, eps=1e-16): f0 = implicit_function(x0) # > 0 f1 = implicit_function(x1) # < 0 for i in range(max_post_itr): nu = f0 * (x1 - x0) de = f1 - f0 mask0 = F.greater_scalar(F.abs(de), eps) mask1 = 1 - mask0 nu = mask0 * nu + mask1 * 0 de = mask0 * de + mask1 * 1 xm = x0 - nu / de fm = implicit_function(xm) mp = F.greater_equal_scalar(fm, 0) mn = 1 - mp x0 = mp * xm + mn * x0 f0 = mp * fm + mn * f0 x1 = mn * xm + mp * x1 f1 = mn * fm + mp * f1 return x0, x1
def invertible_conv(x, reverse, rng, scope): r"""Invertible 1x1 Convolution Layer. Args: x (nn.Variable): Input variable. reverse (bool): Whether it's a reverse direction. rng (numpy.random.RandomState): A random generator. scope (str): The scope. Returns: nn.Variable: The output variable. """ batch_size, c, n_groups = x.shape with nn.parameter_scope(scope): # initialize w by an orthonormal matrix w_init = np.linalg.qr(rng.randn(c, c))[0][None, ...] W_var = get_parameter_or_create("W", (1, c, c), w_init, True, True) W = F.batch_inv(W_var) if reverse else W_var x = F.convolution(x, F.reshape(W, (c, c, 1)), None, stride=(1, )) if reverse: return x log_det = batch_size * n_groups * F.log(F.abs(F.batch_det(W))) return x, log_det
def get_d_layer(real_layers, fake_layers): """ discriminator layer loss """ fix_range = 0.02 # hard coded, all layers are roughly scaled to this value sum_layer_loss = 0 # adds-on for generator layer_loss_list = [] layer_n = len(real_layers) # hard coded, an overall average of all layers layer_norm = [12.0, 14.0, 24.0, 100.0] for layer_i in range(layer_n): real_layer = real_layers[layer_i] false_layer = fake_layers[layer_i] layer_diff = real_layer - false_layer layer_loss = F.mean(F.sum(F.abs(layer_diff), axis=[3])) # an l1 loss layer_loss_list += [layer_loss] scaled_layer_loss = fix_range * \ layer_loss / layer_norm[layer_i] sum_layer_loss += scaled_layer_loss return sum_layer_loss
def train(generator, discriminator, patch_gan, solver_gen, solver_dis, weight_l1, train_iterator, val_iterator, epoch, monitor, interval): # Create Network Graph # for training im, la = train_iterator.next() # for checking image shape real = nn.Variable(im.shape) # real x = nn.Variable(la.shape) # x # for validation real_val = nn.Variable(im.shape) # real x_val = nn.Variable(la.shape) # x # Generator fake = generator(x, test=False) # pix2pix infers just like training mode. fake_val = generator(x_val, test=False) fake_val.persistent = True # Keep to visualize # Discriminator fake_y = discriminator(x, fake, patch_gan=patch_gan, test=False) real_y = discriminator(x, real, patch_gan=patch_gan, test=False) real_target = nn.Variable(fake_y.shape) real_target.data.fill(1) fake_target = nn.Variable(real_y.shape) fake_target.data.zero() loss_gen = F.mean(weight_l1 * F.abs(real - fake)) + \ F.mean(F.sigmoid_cross_entropy(fake_y, real_target)) loss_dis = F.mean( F.sigmoid_cross_entropy(real_y, real_target) + F.sigmoid_cross_entropy(fake_y, fake_target)) # Setting Solvers with nn.parameter_scope('generator'): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope('discriminator'): solver_dis.set_parameters(nn.get_parameters()) # Create Monitors monitors = { 'loss_gen': nm.MonitorSeries("Generator loss", monitor, interval=interval), 'loss_dis': nm.MonitorSeries("Discriminator loss", monitor, interval=interval), 'time': nm.MonitorTimeElapsed("Training time", monitor, interval=interval), 'fake': nm.MonitorImageTile( "Fake images", monitor, interval=interval, num_images=2, normalize_method=lambda x: np.clip(np.divide(x, 255.0), 0.0, 1.0)), } i = 0 for e in range(epoch): logger.info('Epoch = {}'.format(e)) # Training while e == train_iterator.epoch: # forward / backward process real.d, x.d = train_iterator.next() solver_dis.zero_grad() solver_gen.zero_grad() # Discriminator loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.update() # Generator loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.update() monitors['time'].add(i) monitors['loss_gen'].add(i, loss_gen.d.copy()) monitors['loss_dis'].add(i, loss_dis.d.copy()) # Validation real_val.d, x_val.d = val_iterator.next() fake_val.forward() pix2pix_vis = np.stack( [label_to_image(x_val.d), normalize_image(fake_val.d)], axis=1).reshape((-1, ) + fake.shape[1:]) monitors['fake'].add(i, pix2pix_vis) i += 1 # save parameters of generator save_path = os.path.join(monitor._save_path, 'generator_model_{}.h5'.format(i)) with nn.parameter_scope('generator'): nn.save_parameters(save_path) return save_path
def main(): random.seed(args.seed) np.random.seed(args.seed) # Prepare for CUDA. ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) start_full_time = time.time() from iterator import data_iterator # Data list for sceneflow data set train_list = "./dataset/sceneflow_train.csv" test_list = "./dataset/sceneflow_test.csv" train = True validation = True # Set monitor path. monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S")) img_left, img_right, disp_img = read_csv(train_list) img_left_test, img_right_test, disp_img_test = read_csv(test_list) train_samples = len(img_left) test_samples = len(img_left_test) train_size = int(len(img_left) / args.batchsize_train) test_size = int(len(img_left_test) / args.batchsize_test) # Create data iterator. data_iterator_train = data_iterator( train_samples, args.batchsize_train, img_left, img_right, disp_img, train=True, shuffle=True, dataset=args.dataset) data_iterator_test = data_iterator( test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, train=False, shuffle=False, dataset=args.dataset) # Set data size print(train_size, test_size) # Define data shape for training. var_left = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_right = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_disp = nn.Variable( (args.batchsize_train, 1, args.crop_height, args.crop_width)) # Define data shape for testing. var_left_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_right_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_disp_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) mask_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) # === for Training === # Definition of pred pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True) mask_train = F.less_scalar(var_disp, args.maxdisp) sum_mask = F.maximum_scalar(F.sum(mask_train), 1) # Definition of loss loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss( pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask)) # === for Testing === # Definition of pred mask_test = F.less_scalar(var_disp_test, args.maxdisp) sum_mask_test = F.maximum_scalar(F.sum(mask_test), 1) pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False) test_loss = F.sum(F.abs(pred_test - var_disp_test)*mask_test)/sum_mask_test # Prepare monitors. monitor = Monitor(monitor_path) monitor_train = MonitorSeries('Training loss', monitor, interval=1) monitor_test = MonitorSeries('Validation loss', monitor, interval=1) monitor_time_train = MonitorTimeElapsed( "Training time/epoch", monitor, interval=1) # Create a solver (parameter updater) solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999) # Set Parameters params = nn.get_parameters() solver.set_parameters(params) params2 = nn.get_parameters(grad_only=False) solver.set_parameters(params2) for epoch in range(1, args.epochs+1): print('This is %d-th epoch' % (epoch)) if validation: ## teting ## total_test_loss = 0 index_test = 0 while index_test < test_size: var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next() test_loss.forward(clear_no_need_grad=True) total_test_loss += test_loss print('Iter %d test loss = %.3f' % (index_test, test_loss.d)) index_test += 1 test_error = total_test_loss/test_size print('epoch %d total 3-px error in val = %.3f' % (epoch, test_error.d)) # Pass validation loss to a monitor. monitor_test.add(epoch, test_error) if train: ## training ## total_train_loss = 0 index = 0 while index < train_size: # Get mini batch # Preprocess var_left.d, var_right.d, var_disp.d = data_iterator_train.next() loss.forward(clear_no_need_grad=True) # Initialize gradients solver.zero_grad() # Backward execution loss.backward(clear_buffer=True) # Update parameters by computed gradients solver.update() print('Iter %d training loss = %.3f' % (index, loss.d)) total_train_loss += loss.d index += 1 train_error = total_train_loss/train_size monitor_time_train.add(epoch) print('epoch %d total training loss = %.3f' % (epoch, train_error)) # Pass training loss to a monitor. monitor_train.add(epoch, train_error) print('full training time = %.2f HR' % ((time.time() - start_full_time)/3600)) # Save Parameter out_param_file = os.path.join( args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5') nn.save_parameters(out_param_file)
def sigma_regularization(ctx, log_var, one): with nn.context_scope(ctx): h = F.exp(log_var) h = F.pow_scalar(h, 0.5) r = F.mean(F.abs(h - one)) return r
def sr_loss(ctx, pred0, pred1): with nn.context_scope(ctx): loss_sr = F.mean(F.abs(pred0 - pred1)) return loss_sr
def reconstruction_loss(imgA, imgB): return F.mean(F.abs(imgA - imgB))
def parametric_pow2_quantize(x, sign=True, with_zero=True, n_init=8, n_min=1, n_max=16, m_init=1, m_min=-8, m_max=8, fix_parameters=False): """Parametric version of `pow2_quantize` where the bitwidth `n` and dynamic range `m` are learnable parameters. Args: x(~nnabla.Variable): N-D array as input sign (bool): keep sign information during quantization. with_zero (bool): quantize small weights to zero. n_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bitwidth parameter. n_min (int): lower bound for bitwidth. n_max (int): upper bound for bitwidth. m_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for dynamic range. m_min (float): lower bound for dynamic range. m_max (float): upper bound for dynamic range. fix_parameters (bool): When set to `True`, the negative slope values will not be updated. Returns: ~nnabla.Variable: N-D array. """ def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value) def broadcast_scalar(v, shape): return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False), shape=shape) def quantize_pow2(v): return 2**F.round(F.log(F.abs(v)) / np.log(2.)) n = get_parameter_or_create("n", (), ConstantInitializer(n_init), need_grad=True, as_need_grad=not fix_parameters) m = get_parameter_or_create("m", (), ConstantInitializer(m_init), need_grad=True, as_need_grad=not fix_parameters) # ensure that bitwidth is in specified range and an integer n_q = F.round(clip_scalar(n, n_min, n_max)) if sign: n_q = n_q - 1 if with_zero: n_q = n_q - 1 # ensure that dynamic range is in specified range and an integer m_q = F.round(clip_scalar(m, m_min, m_max)) # compute min/max value that we can represent x_max = 2**m_q x_min = 2**(m_q - (2**n_q) + 1) # broadcast variables to correct size x_min = broadcast_scalar(x_min, shape=x.shape) x_max = broadcast_scalar(x_max, shape=x.shape) # if unsigned, then quantize all negative values to zero if not sign: x = F.relu(x) # compute absolute value/sign of input ax = F.abs(x) sx = F.sign(x) if with_zero: # prune smallest elements (in magnitude) to zero if they are smaller # than `x_min / \sqrt(2)` x_threshold = x_min / np.sqrt(2) idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, x_min) idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max) idx3 = F.greater_equal(ax, x_max) else: idx1 = F.less(ax, x_min) idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max) idx3 = F.greater_equal(ax, x_max) # do not backpropagate gradient through indices idx1.need_grad = False idx2.need_grad = False idx3.need_grad = False # do not backpropagate gradient through sign sx.need_grad = False # take care of values outside of dynamic range return sx * (x_min * idx1 + quantize_pow2(ax) * idx2 + x_max * idx3)
def parametric_pow2_quantize_xmin_xmax(x, sign=True, with_zero=True, xmin_init=2**-7, xmin_min=2**-15, xmin_max=256, xmax_init=2**0, xmax_min=2**-8, xmax_max=256, fix_parameters=False): """Parametric version of `pow2_quantize` where the min value `xmin` and max value `xmax` are learnable parameters. Returns: ~nnabla.Variable: N-D array. """ def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value) def broadcast_scalar(v, shape): return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False), shape=shape) def quantize_pow2(v): return 2.**F.round(F.log(F.abs(v)) / np.log(2.)) xmin = get_parameter_or_create("xmin", (), ConstantInitializer(xmin_init), need_grad=True, as_need_grad=not fix_parameters) xmax = get_parameter_or_create("xmax", (), ConstantInitializer(xmax_init), need_grad=True, as_need_grad=not fix_parameters) # ensure that minimum dynamic range is in specified range and a power-of-two xmin = quantize_pow2(clip_scalar(xmin, xmin_min, xmin_max)) # ensure that minimum dynamic range is in specified range and a power-of-two xmax = quantize_pow2(clip_scalar(xmax, xmax_min, xmax_max)) # broadcast variables to correct size xmin = broadcast_scalar(xmin, shape=x.shape) xmax = broadcast_scalar(xmax, shape=x.shape) # if unsigned, then quantize all negative values to zero if not sign: x = F.relu(x) # compute absolute value/sign of input ax = F.abs(x) sx = F.sign(x) if with_zero: # prune smallest elements (in magnitude) to zero if they are smaller # than `x_min / \sqrt(2)` x_threshold = xmin / np.sqrt(2) idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, xmin) idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax) idx3 = F.greater_equal(ax, xmax) else: idx1 = F.less(ax, xmin) idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax) idx3 = F.greater_equal(ax, xmax) # do not backpropagate gradient through indices idx1.need_grad = False idx2.need_grad = False idx3.need_grad = False # do not backpropagate gradient through sign sx.need_grad = False # take care of values outside of dynamic range return sx * (xmin * idx1 + quantize_pow2(ax) * idx2 + xmax * idx3)
def quantize_pow2(v): return 2.**F.round(F.log(F.abs(v)) / np.log(2.))
def bidirectional_sphere_trace(self, camloc, raydir, t_start, t_finish): t_f = F.identity(t_start) x_f = camloc + t_f * raydir s_f = self.sdf(x_f) mask_hit_eps_f = 0 * F.identity(t_f) t_b = F.identity(t_finish) x_b = camloc + t_b * raydir s_b = self.sdf(x_b) mask_hit_eps_b = 0 * F.identity(t_b) for i in range(self.sphere_trace_itr - 1): # Forward direction mask_hit_eps_f_i = F.less_equal_scalar(F.abs(s_f), self.eps) mask_hit_eps_f += (1 - mask_hit_eps_f) * mask_hit_eps_f_i t_f += (1 - mask_hit_eps_f) * s_f x_f = camloc + t_f * raydir s_f_prev = F.identity(s_f) s_f = self.sdf(x_f) mask_pos_f_prev = (1 - mask_hit_eps_f) * \ F.greater_scalar(s_f_prev, 0) mask_neg_f = (1 - mask_hit_eps_f) * F.less_scalar(s_f, 0) mask_revert_f = mask_pos_f_prev * mask_neg_f t_f -= mask_revert_f * s_f_prev s_f = mask_revert_f * s_f_prev + (1 - mask_revert_f) * s_f # Backward direction mask_hit_eps_b_i = F.less_equal_scalar(F.abs(s_b), self.eps) mask_hit_eps_b += (1 - mask_hit_eps_b) * mask_hit_eps_b_i t_b -= (1 - mask_hit_eps_b) * s_b x_b = camloc + t_b * raydir s_b_prev = F.identity(s_b) s_b = self.sdf(x_b) mask_pos_b_prev = (1 - mask_hit_eps_b) * \ F.greater_scalar(s_b_prev, 0) mask_neg_b = (1 - mask_hit_eps_b) * F.less_scalar(s_b, 0) mask_revert_b = mask_pos_b_prev * mask_neg_b t_b += mask_revert_b * s_b_prev s_b = mask_revert_b * s_b_prev + (1 - mask_revert_b) * s_b ## print("s_f neg", np.sum(s_f.data < 0)) ## print("s_b neg", np.sum(s_b.data < 0)) # Fine grained start/finish points t_f0 = t_f t_f1 = t_f + mask_revert_f * s_f_prev x_hit_st0 = camloc + t_f0 * raydir ## x0, x1 = self.post_method(x_hit_st0, camloc + t_f1 * raydir) ## t_f0 = F.norm((x0 - camloc), axis=(x0.ndim - 1), keepdims=True) ## t_f1 = F.norm((x1 - camloc), axis=(x1.ndim - 1), keepdims=True) mask_hit_f1b = mask_revert_f * F.less(t_f1, t_b) t_b = t_f1 * mask_hit_f1b + t_b * (1 - mask_hit_f1b) # Reverse the opposite case mask_fb = F.less(t_f, t_b) t_f = t_f * mask_fb + t_start * (1 - mask_fb) t_b = t_b * mask_fb + t_finish * (1 - mask_fb) return x_hit_st0, t_f, t_b, mask_hit_eps_f
def get_tecogan_model(conf, r_inputs, r_targets, scope_name, tecogan=True): """ Create computation graph and variables for TecoGAN. """ # r_inputs, r_targets : shape (batch, conf.train.rnn_n, h, w, c) rnn_length = conf.train.rnn_n if tecogan: r_inputs, r_targets = get_tecogan_inputs(r_inputs, r_targets) rnn_length = rnn_length * 2 - 1 # get the consecutive frame sequences from the input sequence frame_t_pre, frame_t = r_inputs[:, 0:-1, :, :, :], r_inputs[:, 1:, :, :, :] # Get flow estimations fnet_output = get_fnet_output(conf, rnn_length, frame_t_pre, frame_t, scope_name) # Get the generated HR output frames gen_outputs = get_generator_output(conf, rnn_length, r_inputs, fnet_output.flow_hr, scope_name) s_gen_output = F.reshape( gen_outputs, (conf.train.batch_size * rnn_length, conf.train.crop_size * 4, conf.train.crop_size * 4, 3), inplace=False) s_targets = F.reshape( r_targets, (conf.train.batch_size * rnn_length, conf.train.crop_size * 4, conf.train.crop_size * 4, 3), inplace=False) # Content loss (l2 loss) content_loss = F.mean( F.sum(F.squared_error(s_gen_output, s_targets), axis=[3])) # Warp loss (l2 loss) warp_loss = get_warp_loss(conf, rnn_length, frame_t, frame_t_pre, fnet_output.flow_lr) if tecogan: d_data = get_d_data(conf, fnet_output.flow_hr, gen_outputs, r_targets, rnn_length) # Build the tempo discriminator for the real part and fake part t_d = get_t_d(conf, r_inputs, d_data) # Discriminator layer loss: d_layer_loss = get_d_layer(t_d.real_layers, t_d.fake_layers) # vgg loss (cosine similarity) loss_vgg = get_vgg_loss(s_gen_output, s_targets) # ping pong loss (an l1 loss) gen_out_first = gen_outputs[:, 0:conf.train.rnn_n - 1, :, :, :] gen_out_last_rev = gen_outputs[:, -1:-conf.train.rnn_n:-1, :, :, :] pp_loss = F.mean(F.abs(gen_out_first - gen_out_last_rev)) # adversarial loss t_adversarial_loss = F.mean(-F.log(t_d.tdiscrim_fake_output + conf.train.eps)) # Overall generator loss gen_loss = content_loss + pp_loss * conf.gan.pp_scaling + conf.gan.ratio * \ t_adversarial_loss + conf.gan.vgg_scaling * loss_vgg + \ conf.gan.dt_ratio_0 * d_layer_loss # Discriminator loss t_discrim_fake_loss = F.log(1 - t_d.tdiscrim_fake_output + conf.train.eps) t_discrim_real_loss = F.log(t_d.tdiscrim_real_output + conf.train.eps) t_discrim_loss = F.mean(-(t_discrim_fake_loss + t_discrim_real_loss)) fnet_loss = gen_loss + warp_loss set_persistent_all(r_targets, r_inputs, loss_vgg, gen_out_first, gen_out_last_rev, pp_loss, d_layer_loss, content_loss, warp_loss, gen_loss, t_adversarial_loss, t_discrim_loss, t_discrim_real_loss, d_data.t_vel, d_data.t_gen_output, s_gen_output, s_targets) Network = collections.namedtuple( 'Network', 'content_loss, warp_loss, fnet_loss, vgg_loss,' 'gen_loss, pp_loss, sum_layer_loss,t_adversarial_loss,' 't_discrim_loss,t_gen_output,t_discrim_real_loss') return Network(content_loss=content_loss, warp_loss=warp_loss, fnet_loss=fnet_loss, vgg_loss=loss_vgg, gen_loss=gen_loss, pp_loss=pp_loss, sum_layer_loss=d_layer_loss, t_adversarial_loss=t_adversarial_loss, t_discrim_loss=t_discrim_loss, t_gen_output=d_data.t_gen_output, t_discrim_real_loss=t_discrim_real_loss) gen_loss = content_loss fnet_loss = gen_loss + warp_loss set_persistent_all(content_loss, s_gen_output, warp_loss, gen_loss, fnet_loss) Network = collections.namedtuple( 'Network', 'content_loss, warp_loss, fnet_loss, gen_loss') return Network( content_loss=content_loss, warp_loss=warp_loss, fnet_loss=fnet_loss, gen_loss=gen_loss, )
def train(): parser = argparse.ArgumentParser() parser.add_argument("--num-train-examples", type=int, default=1600) parser.add_argument("--num-valid-examples", type=int, default=100) parser.add_argument("--accum-grad", type=int, default=32) parser.add_argument("--max-iter", type=int, default=6400) parser.add_argument("--valid-interval", type=int, default=100) parser.add_argument("--context", type=str, default="cpu") parser.add_argument("--device-id", type=int, default=0) args = parser.parse_args() from nnabla.ext_utils import get_extension_context extension_module = args.context ctx = get_extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # prepare dataset tdataset = [] for i in range(args.num_train_examples): V, E = random_graph(rng) deg = degrees(V, E) tdataset.append(([V], [utils.from_adjacency_list(E)], [deg])) vdataset = [] for i in range(args.num_valid_examples): V, E = random_graph(rng) deg = degrees(V, E) vdataset.append(([V], [utils.from_adjacency_list(E)], [deg])) # prepare data iterator tdata = data_iterator(SimpleDataSource2(tdataset, shuffle=True), 1, False, False, False) vdata = data_iterator(SimpleDataSource2(vdataset, shuffle=False), 1, False, False, False) # prepare monitors monitor = M.Monitor("./degree") tloss = M.MonitorSeries("Training Loss", monitor, interval=10) verror = M.MonitorSeries("Validation Error", monitor, interval=10) # prepare solver solver = S.Adam() # training loop for i in range(args.max_iter): l = 0 for b in range(args.accum_grad): # read data V, E, degree = tdata.next() V = V[0][0] E = E[0][0] degree = degree[0][0] # predict output = predict(V, E) # initialize solver if i == 0 and b == 0: solver.set_parameters(nn.get_parameters()) # calculate loss label = nn.Variable(degree.shape) label.data.data = degree label = F.reshape(label, (len(V), 1)) loss = F.mean(F.squared_error(output, label)) # training loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) l += loss.data.data solver.update() tloss.add(i, l / args.accum_grad) l = 0 if i % args.valid_interval == 0: # validation # read data e = 0 n = 0 for b in range(vdata.size): V, E, degree = vdata.next() V = V[0][0] E = E[0][0] degree = degree[0][0] output = predict(V, E) label = nn.Variable(degree.shape) label.data.data = degree label = F.reshape(label, (len(V), 1)) error = F.sum(F.less_scalar(F.abs(F.sub2(output, label)), 0.5)) error.forward() e += error.data.data n += len(V) verror.add(i, e / n)
def calculate_alpha( parameter_list, X, Y, Y_label, feature_valid, solver, output_valid, pred, loss, phi, l2, ): min_loss = 10000.0 feature_valid.d = X output_valid.d = Y for epoch in range(args.epoch): phi_loss = 0 loss.forward() solver.zero_grad() loss.backward() phi_loss = phi.d / len(X) temp_W = parameter_list grad_loss = F.add_n( *[F.mean(F.abs(p.grad)) for p in nn.get_parameters().values()]) grad_norm = F.add_n( *[F.norm(p.grad) for p in nn.get_parameters().values()]) if grad_loss.data < min_loss: if epoch == 0: init_grad = grad_loss.data min_loss = grad_loss.data best_W = temp_W if min_loss < init_grad / 200: print("stopping criteria reached in epoch :{}".format(epoch)) break parameter_list = backtracking_line_search(grad_norm, X, Y, loss, len(X), loss.d, l2) if epoch % 100 == 0: print("Epoch:{:4d}\tloss:{}\tphi_loss:{}\tl2(lmbd):{}\tgrad:{}". format(epoch, loss.d, phi_loss, args.lmbd * l2.d, grad_loss.data)) for weight, param in zip(nn.get_parameters().values(), best_W): weight.data.copy_from(param.data) softmax_value = F.softmax(pred) softmax_value.forward() # derivative of softmax cross entropy weight_matrix = softmax_value.d - Y weight_matrix = np.divide(weight_matrix, (-2.0 * args.lmbd * len(Y))) np.save(os.path.join(data_dir, "weight_matrix.npy"), weight_matrix) # computer alpha alpha = [] for ind, label in enumerate(Y_label.reshape(-1)): alpha.append(float(weight_matrix[ind, int(label)])) alpha = np.abs(np.array(alpha)) np.save(os.path.join(data_dir, "alpha_vgg_nnabla_score.npy"), alpha) # calculate correlation w = np.matmul(X.T, weight_matrix) temp = np.matmul(X, w) softmax_value = F.softmax(nn.Variable.from_numpy_array(temp)) softmax_value.forward() y_p = softmax_value.d print( "L1 difference between ground truth prediction and prediction by representer theorem decomposition" ) print(np.mean(np.abs(Y - y_p))) from scipy.stats.stats import pearsonr print( "pearson correlation between ground truth prediction and prediciton by representer theorem" ) corr, _ = pearsonr(Y.reshape(-1), y_p.reshape(-1)) print(corr)