def siamese_loss(e0, e1, t, margin=1.0, eps=1e-4): dist = F.sum(F.squared_error(e0, e1), axis=1) # Squared distance # Contrastive loss sim_cost = t * dist dissim_cost = (1 - t) * (F.maximum_scalar(margin - (dist + eps)**(0.5), 0)**2) return F.mean(sim_cost + dissim_cost)
def build_train_graph(self, batch): self.solver = S.Adam(self.learning_rate) obs, action, reward, terminal, newobs = batch # Create input variables s = nn.Variable(obs.shape) a = nn.Variable(action.shape) r = nn.Variable(reward.shape) t = nn.Variable(terminal.shape) snext = nn.Variable(newobs.shape) with nn.parameter_scope(self.name_q): q = self.q_builder(s, self.num_actions, test=False) self.solver.set_parameters(nn.get_parameters()) with nn.parameter_scope(self.name_qnext): qnext = self.q_builder(snext, self.num_actions, test=True) qnext.need_grad = False clipped_r = F.minimum_scalar(F.maximum_scalar( r, -self.clip_reward), self.clip_reward) q_a = F.sum( q * F.one_hot(F.reshape(a, (-1, 1), inplace=False), (q.shape[1],)), axis=1) target = clipped_r + self.gamma * (1 - t) * F.max(qnext, axis=1) loss = F.mean(F.huber_loss(q_a, target)) Variables = namedtuple( 'Variables', ['s', 'a', 'r', 't', 'snext', 'q', 'loss']) self.v = Variables(s, a, r, t, snext, q, loss) self.sync_models() self.built = True
def net(n_class, xs, xq, init_type='nnabla', embedding='conv4', net_type='prototypical', distance='euclid', test=False): ''' Similarity net function This function implements the network with settings as specified. Args: n_class (int): number of classes. Typical setting is 5 or 20. xs (~nnabla.Variable): support images. xq (~nnabla.Variable): query images. init_type (str, optional): initialization type for weights and bias parameters. See conv_initializer function. embedding(str, optional): embedding network. distance (str, optional): similarity metric to use. See similarity function. test (bool, optional): switch flag for training dataset and test dataset Returns: h (~nnabla.Variable): output variable indicating similarity between support and query. ''' # feature embedding for supports and queries n_shot = xs.shape[0] / n_class n_query = xq.shape[0] / n_class if embedding == 'conv4': fs = conv4(xs, test, init_type) # tensor of (n_support, fdim) fq = conv4(xq, test, init_type) # tensor of (n_query, fdim) if net_type == 'matching': # This example does not include the full-context-embedding of matching networks. fs = F.reshape(fs, (1, ) + fs.shape) # (1, n_way, fdim) # (n_way*n_query, 1, fdim) fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:]) h = similarity(fq, fs, distance) h = h - F.mean(h, axis=1, keepdims=True) if 1 < n_shot: h = F.minimum_scalar(F.maximum_scalar(h, -35), 35) h = F.softmax(h) h = F.reshape(h, (h.shape[0], n_class, n_shot)) h = F.mean(h, axis=2) # Reverse to logit to use same softmax cross entropy h = F.log(h) elif net_type == 'prototypical': if 1 < n_shot: fs = F.reshape(fs, (n_class, n_shot) + fs.shape[1:]) fs = F.mean(fs, axis=1) fs = F.reshape(fs, (1, ) + fs.shape) # (1, n_way, fdim) # (n_way*n_query, 1, fdim) fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:]) h = similarity(fq, fs, distance) h = h - F.mean(h, axis=1, keepdims=True) return h
def sample_pdf(bins, weights, N_samples, det=False): """Sample additional points for training fine network Args: bins: int. Height in pixels. weights: int. Width in pixels. N_samples: float. Focal length of pinhole camera. det Returns: samples: array of shape [batch_size, 3]. Depth samples for fine network """ weights += 1e-5 pdf = weights / F.sum(weights, axis=-1, keepdims=True) cdf = F.cumsum(pdf, axis=-1) # if isinstance(pdf, nn.Variable): # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1)) # else: # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1) if det: u = F.arange(0., 1., 1 / N_samples) u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, )) u = u.data if isinstance(cdf, nn.NdArray) else u else: u = F.rand(shape=cdf.shape[:-1] + (N_samples, )) indices = F.searchsorted(cdf, u, right=True) # if isinstance(cdf, nn.Variable): # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.d, u.d, side='right').numpy()) # else: # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.data, u.data, side='right').numpy()) below = F.maximum_scalar(indices - 1, 0) above = F.minimum_scalar(indices, cdf.shape[-1] - 1) indices_g = F.stack(below, above, axis=below.ndim) cdf_g = F.gather(cdf, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) bins_g = F.gather(bins, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) denom = (cdf_g[..., 1] - cdf_g[..., 0]) denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape), denom) t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def contrastive_loss(sd, l, margin=1.0, eps=1e-4): """ This implements contrustive loss function given squared difference `sd` and labels `l` in {0, 1}. f(sd, l) = l * sd + (1 - l) * max(0, margin - sqrt(sd))^2 NNabla implements various basic arithmetic operations. That helps write custom operations with composition like this. This is handy, but still implementing NNabla Function in C++ gives you better performance advantage. """ sim_cost = l * sd dissim_cost = (1 - l) * \ (F.maximum_scalar(margin - (sd + eps) ** (0.5), 0) ** 2) return sim_cost + dissim_cost
def contrastive_loss(sd, l, margin=1.0, eps=1e-4): """ This implements contrastive loss function given squared difference `sd` and labels `l` in {0, 1}. f(sd, l) = l * sd + (1 - l) * max(0, margin - sqrt(sd))^2 NNabla implements various basic arithmetic operations. That helps write custom operations with composition like this. This is handy, but still implementing NNabla Function in C++ gives you better performance advantage. """ sim_cost = l * sd dissim_cost = (1 - l) * \ (F.maximum_scalar(margin - (sd + eps) ** (0.5), 0) ** 2) return sim_cost + dissim_cost
def _focal_loss(pred, gt): '''Modified focal loss. Exactly the same as CornerNet. Modified for more stability by using log_sigmoid function Arguments: pred (batch x c x h x w): logit (must be values before sigmoid activation) gt_regr (batch x c x h x w) ''' alpha = 2 beta = 4 pos_inds = F.greater_equal_scalar(gt, 1) neg_inds = 1 - pos_inds neg_weights = F.pow_scalar(1.0 - gt, beta) prob_pred = F.sigmoid(pred) pos_loss = F.log_sigmoid(pred) * F.pow_scalar(1.0 - prob_pred, alpha) * pos_inds pos_loss = F.sum(pos_loss) neg_loss = F.log_sigmoid(-pred) * F.pow_scalar( prob_pred, alpha) * neg_weights * neg_inds neg_loss = F.sum(neg_loss) num_pos = F.maximum_scalar(F.sum(pos_inds), 1) loss = -(1 / num_pos) * (pos_loss + neg_loss) return loss
def network_size_activations(): """ Returns total number of activations and size in KBytes (NNabla variable using `max` or `sum` operator) """ kbytes = [] num_activations = 0 # get all parameters ps = nn.get_parameters(grad_only=False) for p in ps: if "Asize" in p: print(f"{p}\t{ps[p].d}") num_activations += ps[p].d if cfg.a_quantize is not None: if cfg.a_quantize in ['fp_relu', 'pow2_relu']: # fixed quantization n = nn.Variable((), need_grad=False) n.d = cfg.a_bitwidth elif cfg.a_quantize in [ 'parametric_fp_relu', 'parametric_fp_b_xmax_relu', 'parametric_fp_d_b_relu', 'parametric_pow2_b_xmax_relu', 'parametric_pow2_b_xmin_relu' ]: # parametric quantization s = p.replace( "/Asize", "/Aquant/" + cfg.a_quantize.replace("_relu", "") + "/n") n = F.round( clip_scalar(ps[s], cfg.a_bitwidth_min, cfg.a_bitwidth_max)) elif cfg.a_quantize in ['parametric_fp_d_xmax_relu']: # these quantization methods do not have n, so we need to compute it! # parametric quantization d = ps[p.replace( "/Asize", "/Aquant/" + cfg.a_quantize.replace("_relu", "") + "/d")] xmax = ps[p.replace( "/Asize", "/Aquant/" + cfg.a_quantize.replace("_relu", "") + "/xmax")] # ensure that stepsize is in specified range and a power of two d_q = quantize_pow2( clip_scalar(d, cfg.a_stepsize_min, cfg.a_stepsize_max)) # ensure that dynamic range is in specified range xmax = clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max) # compute real `xmax` xmax = F.round(xmax / d_q) * d_q n = F.maximum_scalar(F.ceil(log2(xmax / d_q + 1.0)), cfg.a_bitwidth_min) elif cfg.a_quantize in ['parametric_pow2_xmin_xmax_relu']: # these quantization methods do not have n, so we need to compute it! # parametric quantization xmin = ps[p.replace( "/Asize", "/Aquant/" + cfg.a_quantize.replace("_relu", "") + "/xmin")] xmax = ps[p.replace( "/Asize", "/Aquant/" + cfg.a_quantize.replace("_relu", "") + "/xmax")] # ensure that dynamic ranges are in specified range and a power-of-two xmin = quantize_pow2( clip_scalar(xmin, cfg.a_xmin_min, cfg.a_xmin_max)) xmax = quantize_pow2( clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max)) # use ceil rounding n = F.maximum_scalar( F.ceil(log2(log2(xmax / xmin) + 1.) + 1.), cfg.a_bitwidth_min) else: raise ValueError("Unknown quantization method {}".format( cfg.a_quantize)) else: # float precision n = nn.Variable((), need_grad=False) n.d = 32. kbytes.append( F.reshape(n * ps[p].d / 8. / 1024., (1, ), inplace=False)) if cfg.target_activation_type == 'max': _kbytes = F.max(F.concatenate(*kbytes)) elif cfg.target_activation_type == 'sum': _kbytes = F.sum(F.concatenate(*kbytes)) return num_activations, _kbytes
def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)
def network_size_weights(): """ Return total number of weights and network size (for weights) in KBytes """ kbytes = None num_params = None # get all parameters ps = nn.get_parameters() for p in ps: if ((p.endswith("quantized_conv/W") or p.endswith("quantized_conv/b") or p.endswith("quantized_affine/W") or p.endswith("quantized_affine/b"))): _num_params = np.prod(ps[p].shape) print(f"{p}\t{ps[p].shape}\t{_num_params}") if cfg.w_quantize is not None: if cfg.w_quantize in [ 'parametric_fp_b_xmax', 'parametric_fp_d_b', 'parametric_pow2_b_xmax', 'parametric_pow2_b_xmin' ]: # parametric quantization n_p = p + "quant/" + cfg.w_quantize + "/n" n = F.round( clip_scalar(ps[n_p], cfg.w_bitwidth_min, cfg.w_bitwidth_max)) elif cfg.w_quantize == 'parametric_fp_d_xmax': # this quantization methods do not have n, so we need to compute it d = ps[p + "quant/" + cfg.w_quantize + "/d"] xmax = ps[p + "quant/" + cfg.w_quantize + "/xmax"] # ensure that stepsize is in specified range and a power of two d_q = quantize_pow2( clip_scalar(d, cfg.w_stepsize_min, cfg.w_stepsize_max)) # ensure that dynamic range is in specified range xmax = clip_scalar(xmax, cfg.w_xmax_min, cfg.w_xmax_max) # compute real `xmax` xmax = F.round(xmax / d_q) * d_q # we do not clip to `cfg.w_bitwidth_max` as xmax/d_q could correspond to more than 8 bit n = F.maximum_scalar(F.ceil(log2(xmax / d_q + 1.0) + 1.0), cfg.w_bitwidth_min) elif cfg.w_quantize == 'parametric_pow2_xmin_xmax': # this quantization methods do not have n, so we need to compute it xmin = ps[p + "quant/" + cfg.w_quantize + "/xmin"] xmax = ps[p + "quant/" + cfg.w_quantize + "/xmax"] # ensure that minimum dynamic range is in specified range and a power-of-two xmin = quantize_pow2( clip_scalar(xmin, cfg.w_xmin_min, cfg.w_xmin_max)) # ensure that maximum dynamic range is in specified range and a power-of-two xmax = quantize_pow2( clip_scalar(xmax, cfg.w_xmax_min, cfg.w_xmax_max)) # use ceil to determine bitwidth n = F.maximum_scalar( F.ceil(log2(log2(xmax / xmin) + 1.0) + 1.), cfg.w_bitwidth_min) elif cfg.w_quantize == 'fp' or cfg.w_quantize == 'pow2': # fixed quantization n = nn.Variable((), need_grad=False) n.d = cfg.w_bitwidth else: raise ValueError( f'Unknown quantization method {cfg.w_quantize}') else: # float precision n = nn.Variable((), need_grad=False) n.d = 32. if kbytes is None: kbytes = n * _num_params / 8. / 1024. num_params = _num_params else: kbytes += n * _num_params / 8. / 1024. num_params += _num_params return num_params, kbytes
def main(): random.seed(args.seed) np.random.seed(args.seed) # Prepare for CUDA. ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) start_full_time = time.time() from iterator import data_iterator # Data list for sceneflow data set train_list = "./dataset/sceneflow_train.csv" test_list = "./dataset/sceneflow_test.csv" train = True validation = True # Set monitor path. monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S")) img_left, img_right, disp_img = read_csv(train_list) img_left_test, img_right_test, disp_img_test = read_csv(test_list) train_samples = len(img_left) test_samples = len(img_left_test) train_size = int(len(img_left) / args.batchsize_train) test_size = int(len(img_left_test) / args.batchsize_test) # Create data iterator. data_iterator_train = data_iterator( train_samples, args.batchsize_train, img_left, img_right, disp_img, train=True, shuffle=True, dataset=args.dataset) data_iterator_test = data_iterator( test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, train=False, shuffle=False, dataset=args.dataset) # Set data size print(train_size, test_size) # Define data shape for training. var_left = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_right = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_disp = nn.Variable( (args.batchsize_train, 1, args.crop_height, args.crop_width)) # Define data shape for testing. var_left_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_right_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_disp_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) mask_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) # === for Training === # Definition of pred pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True) mask_train = F.less_scalar(var_disp, args.maxdisp) sum_mask = F.maximum_scalar(F.sum(mask_train), 1) # Definition of loss loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss( pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask)) # === for Testing === # Definition of pred mask_test = F.less_scalar(var_disp_test, args.maxdisp) sum_mask_test = F.maximum_scalar(F.sum(mask_test), 1) pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False) test_loss = F.sum(F.abs(pred_test - var_disp_test)*mask_test)/sum_mask_test # Prepare monitors. monitor = Monitor(monitor_path) monitor_train = MonitorSeries('Training loss', monitor, interval=1) monitor_test = MonitorSeries('Validation loss', monitor, interval=1) monitor_time_train = MonitorTimeElapsed( "Training time/epoch", monitor, interval=1) # Create a solver (parameter updater) solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999) # Set Parameters params = nn.get_parameters() solver.set_parameters(params) params2 = nn.get_parameters(grad_only=False) solver.set_parameters(params2) for epoch in range(1, args.epochs+1): print('This is %d-th epoch' % (epoch)) if validation: ## teting ## total_test_loss = 0 index_test = 0 while index_test < test_size: var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next() test_loss.forward(clear_no_need_grad=True) total_test_loss += test_loss print('Iter %d test loss = %.3f' % (index_test, test_loss.d)) index_test += 1 test_error = total_test_loss/test_size print('epoch %d total 3-px error in val = %.3f' % (epoch, test_error.d)) # Pass validation loss to a monitor. monitor_test.add(epoch, test_error) if train: ## training ## total_train_loss = 0 index = 0 while index < train_size: # Get mini batch # Preprocess var_left.d, var_right.d, var_disp.d = data_iterator_train.next() loss.forward(clear_no_need_grad=True) # Initialize gradients solver.zero_grad() # Backward execution loss.backward(clear_buffer=True) # Update parameters by computed gradients solver.update() print('Iter %d training loss = %.3f' % (index, loss.d)) total_train_loss += loss.d index += 1 train_error = total_train_loss/train_size monitor_time_train.add(epoch) print('epoch %d total training loss = %.3f' % (epoch, train_error)) # Pass training loss to a monitor. monitor_train.add(epoch, train_error) print('full training time = %.2f HR' % ((time.time() - start_full_time)/3600)) # Save Parameter out_param_file = os.path.join( args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5') nn.save_parameters(out_param_file)
def srelus(x): return F.maximum_scalar(x, -1)
def clip_by_value(x, minimum, maximum): return F.minimum_scalar(F.maximum_scalar(x, minimum), maximum)