Exemplo n.º 1
0
def siamese_loss(e0, e1, t, margin=1.0, eps=1e-4):
    dist = F.sum(F.squared_error(e0, e1), axis=1)  # Squared distance
    # Contrastive loss
    sim_cost = t * dist
    dissim_cost = (1 - t) * (F.maximum_scalar(margin -
                                              (dist + eps)**(0.5), 0)**2)
    return F.mean(sim_cost + dissim_cost)
Exemplo n.º 2
0
    def build_train_graph(self, batch):
        self.solver = S.Adam(self.learning_rate)

        obs, action, reward, terminal, newobs = batch
        # Create input variables
        s = nn.Variable(obs.shape)
        a = nn.Variable(action.shape)
        r = nn.Variable(reward.shape)
        t = nn.Variable(terminal.shape)
        snext = nn.Variable(newobs.shape)
        with nn.parameter_scope(self.name_q):
            q = self.q_builder(s, self.num_actions, test=False)
            self.solver.set_parameters(nn.get_parameters())
        with nn.parameter_scope(self.name_qnext):
            qnext = self.q_builder(snext, self.num_actions, test=True)
        qnext.need_grad = False
        clipped_r = F.minimum_scalar(F.maximum_scalar(
            r, -self.clip_reward), self.clip_reward)
        q_a = F.sum(
            q * F.one_hot(F.reshape(a, (-1, 1), inplace=False), (q.shape[1],)), axis=1)
        target = clipped_r + self.gamma * (1 - t) * F.max(qnext, axis=1)
        loss = F.mean(F.huber_loss(q_a, target))
        Variables = namedtuple(
            'Variables', ['s', 'a', 'r', 't', 'snext', 'q', 'loss'])
        self.v = Variables(s, a, r, t, snext, q, loss)
        self.sync_models()
        self.built = True
def net(n_class,
        xs,
        xq,
        init_type='nnabla',
        embedding='conv4',
        net_type='prototypical',
        distance='euclid',
        test=False):
    '''
    Similarity net function
        This function implements the network with settings as specified.

        Args:
            n_class (int): number of classes. Typical setting is 5 or 20.
            xs (~nnabla.Variable): support images.
            xq (~nnabla.Variable): query images.
            init_type (str, optional): initialization type for weights and bias parameters. See conv_initializer function.
            embedding(str, optional): embedding network.
            distance (str, optional): similarity metric to use. See similarity function.
            test (bool, optional): switch flag for training dataset and test dataset
        Returns:
            h (~nnabla.Variable): output variable indicating similarity between support and query.
    '''

    # feature embedding for supports and queries
    n_shot = xs.shape[0] / n_class
    n_query = xq.shape[0] / n_class
    if embedding == 'conv4':
        fs = conv4(xs, test, init_type)  # tensor of (n_support, fdim)
        fq = conv4(xq, test, init_type)  # tensor of (n_query, fdim)

    if net_type == 'matching':
        # This example does not include the full-context-embedding of matching networks.
        fs = F.reshape(fs, (1, ) + fs.shape)  # (1, n_way, fdim)
        # (n_way*n_query, 1, fdim)
        fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:])
        h = similarity(fq, fs, distance)
        h = h - F.mean(h, axis=1, keepdims=True)
        if 1 < n_shot:
            h = F.minimum_scalar(F.maximum_scalar(h, -35), 35)
            h = F.softmax(h)
            h = F.reshape(h, (h.shape[0], n_class, n_shot))
            h = F.mean(h, axis=2)
            # Reverse to logit to use same softmax cross entropy
            h = F.log(h)
    elif net_type == 'prototypical':
        if 1 < n_shot:
            fs = F.reshape(fs, (n_class, n_shot) + fs.shape[1:])
            fs = F.mean(fs, axis=1)
        fs = F.reshape(fs, (1, ) + fs.shape)  # (1, n_way, fdim)
        # (n_way*n_query, 1, fdim)
        fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:])
        h = similarity(fq, fs, distance)
        h = h - F.mean(h, axis=1, keepdims=True)

    return h
Exemplo n.º 4
0
def sample_pdf(bins, weights, N_samples, det=False):
    """Sample additional points for training fine network

    Args:
      bins: int. Height in pixels.
      weights: int. Width in pixels.
      N_samples: float. Focal length of pinhole camera.
      det

    Returns:
      samples: array of shape [batch_size, 3]. Depth samples for fine network
    """
    weights += 1e-5
    pdf = weights / F.sum(weights, axis=-1, keepdims=True)

    cdf = F.cumsum(pdf, axis=-1)
    # if isinstance(pdf, nn.Variable):
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1))
    # else:
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data
    cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1)

    if det:
        u = F.arange(0., 1., 1 / N_samples)
        u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, ))
        u = u.data if isinstance(cdf, nn.NdArray) else u
    else:
        u = F.rand(shape=cdf.shape[:-1] + (N_samples, ))

    indices = F.searchsorted(cdf, u, right=True)
    # if isinstance(cdf, nn.Variable):
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.d, u.d, side='right').numpy())
    # else:
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.data, u.data, side='right').numpy())
    below = F.maximum_scalar(indices - 1, 0)
    above = F.minimum_scalar(indices, cdf.shape[-1] - 1)
    indices_g = F.stack(below, above, axis=below.ndim)
    cdf_g = F.gather(cdf,
                     indices_g,
                     axis=-1,
                     batch_dims=len(indices_g.shape) - 2)
    bins_g = F.gather(bins,
                      indices_g,
                      axis=-1,
                      batch_dims=len(indices_g.shape) - 2)

    denom = (cdf_g[..., 1] - cdf_g[..., 0])
    denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape),
                    denom)
    t = (u - cdf_g[..., 0]) / denom
    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])

    return samples
Exemplo n.º 5
0
def contrastive_loss(sd, l, margin=1.0, eps=1e-4):
    """
    This implements contrustive loss function given squared difference `sd` and labels `l` in {0, 1}.

    f(sd, l) = l * sd + (1 - l) * max(0, margin - sqrt(sd))^2

    NNabla implements various basic arithmetic operations. That helps write custom operations
    with composition like this. This is handy, but still implementing NNabla Function in C++
    gives you better performance advantage.
    """
    sim_cost = l * sd
    dissim_cost = (1 - l) * \
        (F.maximum_scalar(margin - (sd + eps) ** (0.5), 0) ** 2)
    return sim_cost + dissim_cost
Exemplo n.º 6
0
def contrastive_loss(sd, l, margin=1.0, eps=1e-4):
    """
    This implements contrastive loss function given squared difference `sd` and labels `l` in {0, 1}.

    f(sd, l) = l * sd + (1 - l) * max(0, margin - sqrt(sd))^2

    NNabla implements various basic arithmetic operations. That helps write custom operations
    with composition like this. This is handy, but still implementing NNabla Function in C++
    gives you better performance advantage.
    """
    sim_cost = l * sd
    dissim_cost = (1 - l) * \
        (F.maximum_scalar(margin - (sd + eps) ** (0.5), 0) ** 2)
    return sim_cost + dissim_cost
Exemplo n.º 7
0
def _focal_loss(pred, gt):
    '''Modified focal loss. Exactly the same as CornerNet.

    Modified for more stability by using log_sigmoid function

      Arguments:
        pred (batch x c x h x w): logit (must be values before sigmoid activation)
        gt_regr (batch x c x h x w)
    '''
    alpha = 2
    beta = 4
    pos_inds = F.greater_equal_scalar(gt, 1)
    neg_inds = 1 - pos_inds
    neg_weights = F.pow_scalar(1.0 - gt, beta)
    prob_pred = F.sigmoid(pred)
    pos_loss = F.log_sigmoid(pred) * F.pow_scalar(1.0 - prob_pred,
                                                  alpha) * pos_inds
    pos_loss = F.sum(pos_loss)
    neg_loss = F.log_sigmoid(-pred) * F.pow_scalar(
        prob_pred, alpha) * neg_weights * neg_inds
    neg_loss = F.sum(neg_loss)
    num_pos = F.maximum_scalar(F.sum(pos_inds), 1)
    loss = -(1 / num_pos) * (pos_loss + neg_loss)
    return loss
Exemplo n.º 8
0
def network_size_activations():
    """
    Returns total number of activations
    and size in KBytes (NNabla variable using `max` or `sum` operator)
    """
    kbytes = []
    num_activations = 0

    # get all parameters
    ps = nn.get_parameters(grad_only=False)
    for p in ps:
        if "Asize" in p:
            print(f"{p}\t{ps[p].d}")

            num_activations += ps[p].d

            if cfg.a_quantize is not None:
                if cfg.a_quantize in ['fp_relu', 'pow2_relu']:
                    # fixed quantization
                    n = nn.Variable((), need_grad=False)
                    n.d = cfg.a_bitwidth
                elif cfg.a_quantize in [
                        'parametric_fp_relu', 'parametric_fp_b_xmax_relu',
                        'parametric_fp_d_b_relu',
                        'parametric_pow2_b_xmax_relu',
                        'parametric_pow2_b_xmin_relu'
                ]:
                    # parametric quantization
                    s = p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/n")
                    n = F.round(
                        clip_scalar(ps[s], cfg.a_bitwidth_min,
                                    cfg.a_bitwidth_max))
                elif cfg.a_quantize in ['parametric_fp_d_xmax_relu']:
                    # these quantization methods do not have n, so we need to compute it!
                    # parametric quantization
                    d = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/d")]
                    xmax = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmax")]

                    # ensure that stepsize is in specified range and a power of two
                    d_q = quantize_pow2(
                        clip_scalar(d, cfg.a_stepsize_min, cfg.a_stepsize_max))

                    # ensure that dynamic range is in specified range
                    xmax = clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max)

                    # compute real `xmax`
                    xmax = F.round(xmax / d_q) * d_q

                    n = F.maximum_scalar(F.ceil(log2(xmax / d_q + 1.0)),
                                         cfg.a_bitwidth_min)
                elif cfg.a_quantize in ['parametric_pow2_xmin_xmax_relu']:
                    # these quantization methods do not have n, so we need to compute it!
                    # parametric quantization
                    xmin = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmin")]
                    xmax = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmax")]

                    # ensure that dynamic ranges are in specified range and a power-of-two
                    xmin = quantize_pow2(
                        clip_scalar(xmin, cfg.a_xmin_min, cfg.a_xmin_max))
                    xmax = quantize_pow2(
                        clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max))

                    # use ceil rounding
                    n = F.maximum_scalar(
                        F.ceil(log2(log2(xmax / xmin) + 1.) + 1.),
                        cfg.a_bitwidth_min)
                else:
                    raise ValueError("Unknown quantization method {}".format(
                        cfg.a_quantize))
            else:
                # float precision
                n = nn.Variable((), need_grad=False)
                n.d = 32.

            kbytes.append(
                F.reshape(n * ps[p].d / 8. / 1024., (1, ), inplace=False))

    if cfg.target_activation_type == 'max':
        _kbytes = F.max(F.concatenate(*kbytes))
    elif cfg.target_activation_type == 'sum':
        _kbytes = F.sum(F.concatenate(*kbytes))
    return num_activations, _kbytes
Exemplo n.º 9
0
def clip_scalar(v, min_value, max_value):
    return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)
Exemplo n.º 10
0
def network_size_weights():
    """
    Return total number of weights and network size (for weights) in KBytes
    """
    kbytes = None
    num_params = None

    # get all parameters
    ps = nn.get_parameters()
    for p in ps:
        if ((p.endswith("quantized_conv/W") or p.endswith("quantized_conv/b")
             or p.endswith("quantized_affine/W")
             or p.endswith("quantized_affine/b"))):
            _num_params = np.prod(ps[p].shape)
            print(f"{p}\t{ps[p].shape}\t{_num_params}")

            if cfg.w_quantize is not None:
                if cfg.w_quantize in [
                        'parametric_fp_b_xmax', 'parametric_fp_d_b',
                        'parametric_pow2_b_xmax', 'parametric_pow2_b_xmin'
                ]:
                    # parametric quantization
                    n_p = p + "quant/" + cfg.w_quantize + "/n"
                    n = F.round(
                        clip_scalar(ps[n_p], cfg.w_bitwidth_min,
                                    cfg.w_bitwidth_max))
                elif cfg.w_quantize == 'parametric_fp_d_xmax':
                    # this quantization methods do not have n, so we need to compute it
                    d = ps[p + "quant/" + cfg.w_quantize + "/d"]
                    xmax = ps[p + "quant/" + cfg.w_quantize + "/xmax"]

                    # ensure that stepsize is in specified range and a power of two
                    d_q = quantize_pow2(
                        clip_scalar(d, cfg.w_stepsize_min, cfg.w_stepsize_max))

                    # ensure that dynamic range is in specified range
                    xmax = clip_scalar(xmax, cfg.w_xmax_min, cfg.w_xmax_max)

                    # compute real `xmax`
                    xmax = F.round(xmax / d_q) * d_q

                    # we do not clip to `cfg.w_bitwidth_max` as xmax/d_q could correspond to more than 8 bit
                    n = F.maximum_scalar(F.ceil(log2(xmax / d_q + 1.0) + 1.0),
                                         cfg.w_bitwidth_min)
                elif cfg.w_quantize == 'parametric_pow2_xmin_xmax':
                    # this quantization methods do not have n, so we need to compute it
                    xmin = ps[p + "quant/" + cfg.w_quantize + "/xmin"]
                    xmax = ps[p + "quant/" + cfg.w_quantize + "/xmax"]

                    # ensure that minimum dynamic range is in specified range and a power-of-two
                    xmin = quantize_pow2(
                        clip_scalar(xmin, cfg.w_xmin_min, cfg.w_xmin_max))

                    # ensure that maximum dynamic range is in specified range and a power-of-two
                    xmax = quantize_pow2(
                        clip_scalar(xmax, cfg.w_xmax_min, cfg.w_xmax_max))

                    # use ceil to determine bitwidth
                    n = F.maximum_scalar(
                        F.ceil(log2(log2(xmax / xmin) + 1.0) + 1.),
                        cfg.w_bitwidth_min)
                elif cfg.w_quantize == 'fp' or cfg.w_quantize == 'pow2':
                    # fixed quantization
                    n = nn.Variable((), need_grad=False)
                    n.d = cfg.w_bitwidth
                else:
                    raise ValueError(
                        f'Unknown quantization method {cfg.w_quantize}')
            else:
                # float precision
                n = nn.Variable((), need_grad=False)
                n.d = 32.

            if kbytes is None:
                kbytes = n * _num_params / 8. / 1024.
                num_params = _num_params
            else:
                kbytes += n * _num_params / 8. / 1024.
                num_params += _num_params
    return num_params, kbytes
Exemplo n.º 11
0
def main():

    random.seed(args.seed)
    np.random.seed(args.seed)

    # Prepare for CUDA.
    ctx = get_extension_context('cudnn', device_id=args.gpus)
    nn.set_default_context(ctx)

    start_full_time = time.time()
    from iterator import data_iterator

    # Data list for sceneflow data set
    train_list = "./dataset/sceneflow_train.csv"
    test_list = "./dataset/sceneflow_test.csv"
    train = True
    validation = True

    # Set monitor path.
    monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S"))

    img_left, img_right, disp_img = read_csv(train_list)
    img_left_test, img_right_test, disp_img_test = read_csv(test_list)
    train_samples = len(img_left)
    test_samples = len(img_left_test)
    train_size = int(len(img_left) / args.batchsize_train)
    test_size = int(len(img_left_test) / args.batchsize_test)

    # Create data iterator.
    data_iterator_train = data_iterator(
        train_samples, args.batchsize_train, img_left, img_right, disp_img, train=True, shuffle=True, dataset=args.dataset)
    data_iterator_test = data_iterator(
        test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, train=False, shuffle=False, dataset=args.dataset)

    # Set data size

    print(train_size, test_size)

    # Define data shape for training.
    var_left = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_right = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_disp = nn.Variable(
        (args.batchsize_train, 1, args.crop_height, args.crop_width))
    # Define data shape for testing.
    var_left_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_right_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_disp_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))
    mask_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))

    if args.loadmodel is not None:
        # Loading CNN pretrained parameters.
        nn.load_parameters(args.loadmodel)

    # === for Training ===
    # Definition of pred
    pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True)
    mask_train = F.less_scalar(var_disp, args.maxdisp)
    sum_mask = F.maximum_scalar(F.sum(mask_train), 1)
    # Definition of loss
    loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss(
        pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask))

    # === for Testing ===
    # Definition of pred
    mask_test = F.less_scalar(var_disp_test, args.maxdisp)
    sum_mask_test = F.maximum_scalar(F.sum(mask_test), 1)
    pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False)
    test_loss = F.sum(F.abs(pred_test - var_disp_test)*mask_test)/sum_mask_test

    # Prepare monitors.
    monitor = Monitor(monitor_path)
    monitor_train = MonitorSeries('Training loss', monitor, interval=1)
    monitor_test = MonitorSeries('Validation loss', monitor, interval=1)
    monitor_time_train = MonitorTimeElapsed(
        "Training time/epoch", monitor, interval=1)

    # Create a solver (parameter updater)
    solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999)

    # Set Parameters
    params = nn.get_parameters()
    solver.set_parameters(params)
    params2 = nn.get_parameters(grad_only=False)
    solver.set_parameters(params2)

    for epoch in range(1, args.epochs+1):
        print('This is %d-th epoch' % (epoch))

        if validation:
            ## teting ##
            total_test_loss = 0

            index_test = 0
            while index_test < test_size:
                var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next()
                test_loss.forward(clear_no_need_grad=True)
                total_test_loss += test_loss

                print('Iter %d test loss = %.3f' % (index_test, test_loss.d))
                index_test += 1
            test_error = total_test_loss/test_size
            print('epoch %d total 3-px error in val = %.3f' %
                  (epoch, test_error.d))
            # Pass validation loss to a monitor.
            monitor_test.add(epoch, test_error)

        if train:
            ## training ##
            total_train_loss = 0
            index = 0

            while index < train_size:

                # Get mini batch
                # Preprocess
                var_left.d, var_right.d, var_disp.d = data_iterator_train.next()
                loss.forward(clear_no_need_grad=True)
                # Initialize gradients
                solver.zero_grad()
                # Backward execution
                loss.backward(clear_buffer=True)
                # Update parameters by computed gradients
                solver.update()
                print('Iter %d training loss = %.3f' %
                      (index, loss.d))
                total_train_loss += loss.d
                index += 1
            train_error = total_train_loss/train_size
            monitor_time_train.add(epoch)
            print('epoch %d total training loss = %.3f' % (epoch, train_error))

            # Pass training loss to a monitor.
            monitor_train.add(epoch, train_error)
            print('full training time = %.2f HR' %
                  ((time.time() - start_full_time)/3600))

            # Save Parameter
            out_param_file = os.path.join(
                args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5')
            nn.save_parameters(out_param_file)
Exemplo n.º 12
0
def srelus(x):
    return F.maximum_scalar(x, -1)
Exemplo n.º 13
0
def clip_by_value(x, minimum, maximum):
    return F.minimum_scalar(F.maximum_scalar(x, minimum), maximum)
Exemplo n.º 14
0
def srelus(x):
    return F.maximum_scalar(x, -1)