Exemplo n.º 1
0
def test_graph_logreg(seed):
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4], need_grad=True)
    w = nn.Variable([12, 5], need_grad=True)
    b = nn.Variable([5], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    w.d = rng.randn(*w.shape)
    b.d = rng.randn(*b.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    with nn.auto_forward():
        z = F.affine(x, w, b, 1)
        l = F.softmax_cross_entropy(z, t, 1)
        L = F.mean(l)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.g = 0
    w.g = 0
    b.g = 0
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)

    inputs = [x, w, b]

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1e-2)
Exemplo n.º 2
0
def ce_loss_with_uncertainty(ctx, pred, y_l, log_var):
    r = F.randn(0., 1., log_var.shape)
    r = F.pow_scalar(F.exp(log_var), 0.5) * r
    h = pred + r
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(h, y_l))
    return loss_ce
Exemplo n.º 3
0
def test_graph_model(model, seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    nn.clear_parameters()
    if model == "mlp":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
        z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    elif model == "recurrent":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
            z2 = F.relu(z, inplace=True)
        h = z2
        for _ in range(2):
            with nn.parameter_scope('fc2'):
                h = PF.affine(h, 3)
                h = F.relu(h, inplace=True)
        with nn.parameter_scope('fc3'):
            z3 = PF.affine(h, 5)
    elif model == "convolution":
        with nn.parameter_scope('conv1'):
            z = PF.convolution(x, 3, (2, 2))
            z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    else:
        raise ValueError()
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    L.forward(clear_no_need_grad=True)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)
    parameters = nn.get_parameters()
    for param in parameters.values():
        param.grad.zero()
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1.05e-2)
Exemplo n.º 4
0
def test_forward_backward():
    batch_size, m, h, w = 4, 3, 32, 32
    extension_module = "cpu"
    device_id = 0
    ctx = extension_context(extension_module, device_id=device_id)

    x_l_data = np.random.randn(batch_size, m, h, w)
    y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32)
    x_l = nn.Variable(x_l_data.shape)
    y_l = nn.Variable(y_l_data.shape)
    x_l.d = x_l_data
    y_l.d = y_l_data
    pred = cnn_model_003(ctx, x_l)
    with nn.context_scope(ctx):
        loss = F.mean(F.softmax_cross_entropy(pred, y_l))

    loss.forward()
    loss.backward()
Exemplo n.º 5
0
def get_model(args, num_classes, test=False, tiny=False):
    """
    Create computation graph and variables.

    Args:

        tiny: Tiny ImageNet mode if True.
    """
    data_size = 320
    nn_in_size = 224
    if tiny:
        data_size = 64
        nn_in_size = 56
    image = nn.Variable([args.batch_size, 3, data_size, data_size])
    label = nn.Variable([args.batch_size, 1])
    pimage = image_preprocess(image, nn_in_size)
    pred, hidden = model_resnet.resnet_imagenet(
        pimage, num_classes, args.num_layers, args.shortcut_type, test=test, tiny=tiny)
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    Model = namedtuple('Model', ['image', 'label', 'pred', 'loss', 'hidden'])
    return Model(image, label, pred, loss, hidden)
Exemplo n.º 6
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            L.forward(clear_no_need_grad=cnng)
            L.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                assert np.all(g == g2)
Exemplo n.º 7
0
def loss_function(pred, label, label_smoothing=0.1):
    l = F.softmax_cross_entropy(pred, label)
    if label_smoothing <= 0:
        return l
    return (1 - label_smoothing) * l - label_smoothing * F.mean(
        F.log_softmax(pred), axis=1, keepdims=True)
Exemplo n.º 8
0
def ce_loss(ctx, pred, y_l):
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l))
    return loss_ce
def train_and_eval():

    # Settings
    args = get_args()
    n_class = args.n_class
    n_shot = args.n_shot
    n_query = args.n_query
    n_class_tr = args.n_class_tr
    n_shot_tr = args.n_shot_tr
    if n_shot_tr == 0:
        n_shot_tr = n_shot
    n_query_tr = args.n_query_tr
    if n_query_tr == 0:
        n_query_tr = n_query

    dataset = args.dataset
    dataset_root = args.dataset_root

    init_type = args.init_type
    embedding = args.embedding
    net_type = args.net_type
    metric = args.metric

    max_iteration = args.max_iteration
    lr_decay_interval = args.lr_decay_interval
    lr_decay = args.lr_decay
    iter_per_epoch = args.iter_per_epoch
    iter_per_valid = args.iter_per_valid
    n_episode_for_valid = args.n_episode_for_valid
    n_episode_for_test = args.n_episode_for_test
    work_dir = args.work_dir

    # Set context
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    # Monitor outputs
    from nnabla.monitor import Monitor, MonitorSeries
    monitor = Monitor(args.work_dir)
    monitor_loss = MonitorSeries(
        "Training loss", monitor, interval=iter_per_epoch)
    monitor_valid_err = MonitorSeries(
        "Validation error", monitor, interval=iter_per_valid)
    monitor_test_err = MonitorSeries("Test error", monitor)
    monitor_test_conf = MonitorSeries("Test error confidence", monitor)

    # Output files
    param_file = work_dir + "params.h5"
    tsne_file = work_dir + "tsne.png"

    # Load data
    shape_x = (1, 28, 28)
    train_data, valid_data, test_data = load_omniglot(
        dataset_root + "/omniglot/data/")
    train_episode_generator = EpisodeGenerator(
        n_class_tr, n_shot_tr, n_query_tr, shape_x, train_data)
    valid_episode_generator = EpisodeGenerator(
        n_class, n_shot, n_query, shape_x, valid_data)
    test_episode_generator = EpisodeGenerator(
        n_class, n_shot, n_query, shape_x, test_data)

    # Build training model
    xs_t = nn.Variable((n_class_tr * n_shot_tr, ) + shape_x)
    xq_t = nn.Variable((n_class_tr * n_query_tr, ) + shape_x)
    hq_t = net(n_class_tr, xs_t, xq_t, init_type,
               embedding, net_type, metric, False)
    yq_t = nn.Variable((n_class_tr * n_query_tr, 1))
    loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t))

    # Build evaluation model
    xs_v = nn.Variable((n_class * n_shot, ) + shape_x)
    xq_v = nn.Variable((n_class * n_query, ) + shape_x)
    hq_v = net(n_class, xs_v, xq_v, init_type,
               embedding, net_type, metric, True)
    yq_v = nn.Variable((n_class * n_query, 1))
    err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1))

    # Setup solver
    solver = S.Adam(1.0e-3)
    solver.set_parameters(nn.get_parameters())
    learning_rate_decay_activate = True

    # Training loop
    train_losses = []
    best_err = 1.0
    for i in range(max_iteration):

        # Decay learning rate
        if learning_rate_decay_activate and ((i + 1) % lr_decay_interval == 0):
            solver.set_learning_rate(solver.learning_rate() * lr_decay)

        # Create an episode
        xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next()

        # Training by the episode
        solver.zero_grad()
        loss_t.forward(clear_no_need_grad=True)
        loss_t.backward(clear_buffer=True)
        solver.update()
        train_losses.append(loss_t.d.copy())

        # Evaluation
        if (i + 1) % iter_per_valid == 0:
            train_loss = np.mean(train_losses)
            train_losses = []
            valid_errs = []
            for k in range(n_episode_for_valid):
                xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next()
                err_v.forward(clear_no_need_grad=True, clear_buffer=True)
                valid_errs.append(np.float(err_v.d.copy()))
            valid_err = np.mean(valid_errs)

            #monitor_loss.add(i + 1, train_loss)
            monitor_valid_err.add(i + 1, valid_err * 100)
            if valid_err < best_err:
                best_err = valid_err
                nn.save_parameters(param_file)

    # Final evaluation
    nn.load_parameters(param_file)
    v_errs = []
    for k in range(n_episode_for_test):
        xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next()
        err_v.forward(clear_no_need_grad=True, clear_buffer=True)
        v_errs.append(np.float(err_v.d.copy()))
    v_err = np.mean(v_errs)
    v_err_conf = 1.96 * np.std(v_errs) / np.sqrt(n_episode_for_test)
    monitor_test_err.add(0, v_err * 100)
    monitor_test_conf.add(0, v_err_conf)

    # Visualization
    n_class = 50
    n_sample = 20
    batch = test_data[:n_class].reshape(n_class * n_sample, 1, 28, 28)
    label = []
    for i in range(n_class):
        label.extend(np.ones(n_sample) * (i % 50))
    u = get_embeddings(batch, conv4)
    v = get_tsne(u)
    plot_tsne(v[:, 0], v[:, 1], label, tsne_file)
def train():
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet23_prediction":
        model_prediction = cifar10_resnet23_prediction
    elif args.net == 'cifar10_binary_connect_resnet23_prediction':
        model_prediction = cifar10_binary_connect_resnet23_prediction
    elif args.net == 'cifar10_binary_net_resnet23_prediction':
        model_prediction = cifar10_binary_net_resnet23_prediction
    elif args.net == 'cifar10_binary_weight_resnet23_prediction':
        model_prediction = cifar10_binary_weight_resnet23_prediction
    elif args.net == 'cifar10_fp_connect_resnet23_prediction':
        model_prediction = functools.partial(
            cifar10_fp_connect_resnet23_prediction,
            n=args.bit_width,
            delta=args.delta)
    elif args.net == 'cifar10_fp_net_resnet23_prediction':
        model_prediction = functools.partial(
            cifar10_fp_net_resnet23_prediction,
            n=args.bit_width,
            delta=args.delta)
    elif args.net == 'cifar10_pow2_connect_resnet23_prediction':
        model_prediction = functools.partial(
            cifar10_pow2_connect_resnet23_prediction,
            n=args.bit_width,
            m=args.upper_bound)
    elif args.net == 'cifar10_pow2_net_resnet23_prediction':
        model_prediction = functools.partial(
            cifar10_pow2_net_resnet23_prediction,
            n=args.bit_width,
            m=args.upper_bound)
    elif args.net == 'cifar10_inq_resnet23_prediction':
        model_prediction = functools.partial(cifar10_inq_resnet23_prediction,
                                             num_bits=args.bit_width)

    # TRAIN
    maps = 64
    data_iterator = data_iterator_cifar10
    c = 3
    h = w = 32
    n_train = 50000
    n_valid = 10000

    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    label = nn.Variable([args.batch_size, 1])
    # Create model_prediction graph.
    pred = model_prediction(image, maps=maps, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = model_prediction(vimage, maps=maps, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 11
0
def loss_function(pred, label, reduction='mean'):
    loss_dict = {
        'mean': F.mean(F.softmax_cross_entropy(pred, label)),
        'sum': F.sum(F.softmax_cross_entropy(pred, label))
    }
    return loss_dict[reduction]
Exemplo n.º 12
0
def ce_loss(ctx, pred, y_l):
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l))
    return loss_ce
Exemplo n.º 13
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for CIFAR10.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """

    # define training parameters
    augmented_shift = True
    augmented_flip = True
    batch_size = 128
    vbatch_size = 100
    num_classes = 10
    weight_decay = 0.0002
    momentum = 0.9
    learning_rates = (cfg.initial_learning_rate,)*80 + \
        (cfg.initial_learning_rate / 10.,)*40 + \
        (cfg.initial_learning_rate / 100.,)*40
    print('lr={}'.format(learning_rates))
    print('weight_decay={}'.format(weight_decay))
    print('momentum={}'.format(momentum))

    # create nabla context
    from nnabla.ext_utils import get_extension_context
    ctx = get_extension_context('cudnn', device_id=args.gpu)
    nn.set_default_context(ctx)

    # Initialize DataIterator for CIFAR10.
    logger.info("Get CIFAR10 Data ...")
    data = cifar_data.DataIterator(batch_size,
                                   augmented_shift=augmented_shift,
                                   augmented_flip=augmented_flip)
    vdata = cifar_data.DataIterator(vbatch_size, val=True)

    if cfg.weightfile is not None:
        logger.info(f"Loading weights from {cfg.weightfile}")
        nn.load_parameters(cfg.weightfile)

    # TRAIN
    # Create input variables.
    image = nn.Variable([batch_size, 3, 32, 32])
    label = nn.Variable([batch_size, 1])

    # Create prediction graph.
    pred, hidden = resnet_cifar10(image,
                                  num_classes=num_classes,
                                  cfg=cfg,
                                  test=False)
    pred.persistent = True

    # Compute initial network size
    num_weights, kbytes_weights = network_size_weights()
    kbytes_weights.forward()
    print(f"Initial network size (weights) is {float(kbytes_weights.d):.3f}KB "
          f"(total number of weights: {int(num_weights):d}).")

    num_activations, kbytes_activations = network_size_activations()
    kbytes_activations.forward()
    print(
        f"Initial network size (activations) is {float(kbytes_activations.d):.3f}KB "
        f"(total number of activations: {int(num_activations):d}).")

    # Create loss function.
    cost_lambda2 = nn.Variable(())
    cost_lambda2.d = cfg.initial_cost_lambda2
    cost_lambda2.persistent = True
    cost_lambda3 = nn.Variable(())
    cost_lambda3.d = cfg.initial_cost_lambda3
    cost_lambda3.persistent = True

    loss1 = F.mean(F.softmax_cross_entropy(pred, label))
    loss1.persistent = True

    if cfg.target_weight_kbytes > 0:
        loss2 = F.relu(kbytes_weights - cfg.target_weight_kbytes)**2
        loss2.persistent = True
    else:
        loss2 = nn.Variable(())
        loss2.d = 0
        loss2.persistent = True
    if cfg.target_activation_kbytes > 0:
        loss3 = F.relu(kbytes_activations - cfg.target_activation_kbytes)**2
        loss3.persistent = True
    else:
        loss3 = nn.Variable(())
        loss3.d = 0
        loss3.persistent = True

    loss = loss1 + cost_lambda2 * loss2 + cost_lambda3 * loss3

    # VALID
    # Create input variables.
    vimage = nn.Variable([vbatch_size, 3, 32, 32])
    vlabel = nn.Variable([vbatch_size, 1])
    # Create predition graph.
    vpred, vhidden = resnet_cifar10(vimage,
                                    num_classes=num_classes,
                                    cfg=cfg,
                                    test=True)
    vpred.persistent = True

    # Create Solver.
    if cfg.optimizer == "adam":
        solver = S.Adam(alpha=learning_rates[0])
    else:
        solver = S.Momentum(learning_rates[0], momentum)

    solver.set_parameters(nn.get_parameters())

    # Training loop (epochs)
    logger.info("Start Training ...")
    i = 0
    best_v_err = 1.0

    # logs of the results
    iters = []
    res_train_err = []
    res_train_loss = []
    res_val_err = []

    # print all variables that exist
    for k in nn.get_parameters():
        print(k)

    res_n_b = collections.OrderedDict()
    res_n_w = collections.OrderedDict()
    res_n_a = collections.OrderedDict()
    res_d_b = collections.OrderedDict()
    res_d_w = collections.OrderedDict()
    res_d_a = collections.OrderedDict()
    res_xmin_b = collections.OrderedDict()
    res_xmin_w = collections.OrderedDict()
    res_xmin_a = collections.OrderedDict()
    res_xmax_b = collections.OrderedDict()
    res_xmax_w = collections.OrderedDict()
    res_xmax_a = collections.OrderedDict()

    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'bquant'):
            res_n_b[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'Wquant'):
            res_n_w[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'Aquant'):
            res_n_a[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'bquant'):
            res_d_b[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'Wquant'):
            res_d_w[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'Aquant'):
            res_d_a[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'bquant'):
            res_xmin_b[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'Wquant'):
            res_xmin_w[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'Aquant'):
            res_xmin_a[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'bquant'):
            res_xmax_b[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'Wquant'):
            res_xmax_w[k] = []
    for k in nn.get_parameters():
        if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'Aquant'):
            res_xmax_a[k] = []

    for epoch in range(len(learning_rates)):
        train_loss = list()
        train_loss1 = list()
        train_loss2 = list()
        train_loss3 = list()
        train_err = list()

        # check whether we need to adapt the learning rate
        if epoch > 0 and learning_rates[epoch - 1] != learning_rates[epoch]:
            solver.set_learning_rate(learning_rates[epoch])

        # Training loop (iterations)
        start_epoch = True
        while data.current != 0 or start_epoch:
            start_epoch = False
            # Next batch
            image.d, label.d = data.next()

            # Training forward/backward
            solver.zero_grad()

            loss.forward()
            loss.backward()

            if weight_decay is not None:
                solver.weight_decay(weight_decay)

            # scale gradients
            if cfg.target_weight_kbytes > 0 or cfg.target_activation_kbytes > 0:
                clip_quant_grads()

            solver.update()
            e = categorical_error(pred.d, label.d)
            train_loss += [loss.d]
            train_loss1 += [loss1.d]
            train_loss2 += [loss2.d]
            train_loss3 += [loss3.d]
            train_err += [e]

            # make sure that parametric values are clipped to correct values (if outside)
            clip_quant_vals()

            # Intermediate Validation (when constraint is set and fulfilled)
            kbytes_weights.forward()
            kbytes_activations.forward()
            if ((cfg.target_weight_kbytes > 0
                 and (cfg.target_weight_kbytes <= 0
                      or float(kbytes_weights.d) <= cfg.target_weight_kbytes)
                 and (cfg.target_activation_kbytes <= 0 or float(
                     kbytes_activations.d) <= cfg.target_activation_kbytes))):

                ve = list()
                start_epoch_ = True
                while vdata.current != 0 or start_epoch_:
                    start_epoch_ = False
                    vimage.d, vlabel.d = vdata.next()
                    vpred.forward()
                    ve += [categorical_error(vpred.d, vlabel.d)]

                v_err = np.array(ve).mean()
                if v_err < best_v_err:
                    best_v_err = v_err
                    nn.save_parameters(
                        os.path.join(cfg.params_dir, 'params_best.h5'))
                    print(
                        f'Best validation error (fulfilling constraints: {best_v_err}'
                    )
                    save_nnp(os.path.join(cfg.params_dir,
                                          nnp_out_name), 'resnet20',
                             batch_size, image, vimage, pred, vpred, loss)
                    sys.stdout.flush()
                    sys.stderr.flush()

            i += 1

        # Validation
        ve = list()
        start_epoch = True
        while vdata.current != 0 or start_epoch:
            start_epoch = False
            vimage.d, vlabel.d = vdata.next()
            vpred.forward()
            ve += [categorical_error(vpred.d, vlabel.d)]

        v_err = np.array(ve).mean()
        kbytes_weights.forward()
        kbytes_activations.forward()
        if ((v_err < best_v_err
             and (cfg.target_weight_kbytes <= 0
                  or float(kbytes_weights.d) <= cfg.target_weight_kbytes) and
             (cfg.target_activation_kbytes <= 0 or
              float(kbytes_activations.d) <= cfg.target_activation_kbytes))):
            best_v_err = v_err
            nn.save_parameters(os.path.join(cfg.params_dir, 'params_best.h5'))
            save_nnp(os.path.join(cfg.params_dir, nnp_out_name), 'resnet20',
                     batch_size, image, vimage, pred, vpred, loss)
            sys.stdout.flush()
            sys.stderr.flush()

        if cfg.target_weight_kbytes > 0:
            print(
                f"Current network size (weights) is {float(kbytes_weights.d):.3f}KB "
                f"(#params: {int(num_weights)}, "
                f"avg. bitwidth: {8. * 1024. * kbytes_weights.d / num_weights})"
            )
            sys.stdout.flush()
            sys.stderr.flush()
        if cfg.target_activation_kbytes > 0:
            print(
                f"Current network size (activations) is {float(kbytes_activations.d):.3f}KB"
            )
            sys.stdout.flush()
            sys.stderr.flush()

        for k in nn.get_parameters():
            if k.split('/')[-1] == 'n':
                print(f'{k}', f'{nn.get_parameters()[k].d}',
                      f'{nn.get_parameters()[k].g}')
                sys.stdout.flush()
                sys.stderr.flush()
                if k.split('/')[-3] == 'bquant':
                    res_n_b[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Wquant':
                    res_n_w[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Aquant':
                    res_n_a[k].append(np.asscalar(nn.get_parameters()[k].d))

            elif k.split('/')[-1] == 'd':
                print(f'{k}', f'{nn.get_parameters()[k].d}',
                      f'{nn.get_parameters()[k].g}')
                sys.stdout.flush()
                sys.stderr.flush()
                if k.split('/')[-3] == 'bquant':
                    res_d_b[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Wquant':
                    res_d_w[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Aquant':
                    res_d_a[k].append(np.asscalar(nn.get_parameters()[k].d))

            elif k.split('/')[-1] == 'xmin':
                print(f'{k}', f'{nn.get_parameters()[k].d}',
                      f'{nn.get_parameters()[k].g}')
                sys.stdout.flush()
                sys.stderr.flush()
                if k.split('/')[-3] == 'bquant':
                    res_xmin_b[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Wquant':
                    res_xmin_w[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Aquant':
                    res_xmin_a[k].append(np.asscalar(nn.get_parameters()[k].d))

            elif k.split('/')[-1] == 'xmax':
                print(f'{k}', f'{nn.get_parameters()[k].d}',
                      f'{nn.get_parameters()[k].g}')
                sys.stdout.flush()
                sys.stderr.flush()
                if k.split('/')[-3] == 'bquant':
                    res_xmax_b[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Wquant':
                    res_xmax_w[k].append(np.asscalar(nn.get_parameters()[k].d))
                elif k.split('/')[-3] == 'Aquant':
                    res_xmax_a[k].append(np.asscalar(nn.get_parameters()[k].d))

        # Print
        logger.info(f'epoch={epoch}(iter={i}); '
                    f'overall cost={np.array(train_loss).mean()}; '
                    f'cross-entropy cost={np.array(train_loss1).mean()}; '
                    f'weight-size cost={np.array(train_loss2).mean()}; '
                    f'activations-size cost={np.array(train_loss3).mean()}; '
                    f'TrainErr={np.array(train_err).mean()}; '
                    f'ValidErr={v_err}; BestValidErr={best_v_err}')
        sys.stdout.flush()
        sys.stderr.flush()

        # update the logs
        iters.append(i)
        res_train_err.append(np.array(train_err).mean())
        res_train_loss.append([
            np.array(train_loss).mean(),
            np.array(train_loss1).mean(),
            np.array(train_loss2).mean(),
            np.array(train_loss3).mean()
        ])
        res_val_err.append(np.array(v_err).mean())
        res_ges = np.concatenate([
            np.array(iters)[:, np.newaxis],
            np.array(res_train_err)[:, np.newaxis],
            np.array(res_val_err)[:, np.newaxis],
            np.array(res_train_loss)
        ],
                                 axis=-1)

        # save the results
        np.savetxt(cfg.params_dir + '/results.csv',
                   np.array(res_ges),
                   fmt='%10.8f',
                   header='iter,train_err,val_err,loss,loss1,loss2,loss3',
                   comments='',
                   delimiter=',')

        for rs, res in zip([
                'res_n_b.csv', 'res_n_w.csv', 'res_n_a.csv', 'res_d_b.csv',
                'res_d_w.csv', 'res_d_a.csv', 'res_min_b.csv', 'res_min_w.csv',
                'res_min_a.csv', 'res_max_b.csv', 'res_max_w.csv',
                'res_max_a.csv'
        ], [
                res_n_b, res_n_w, res_n_a, res_d_b, res_d_w, res_d_a,
                res_xmin_b, res_xmin_w, res_xmin_a, res_xmax_b, res_xmax_w,
                res_xmax_a
        ]):
            res_mat = np.array([res[i] for i in res])
            if res_mat.shape[0] > 1 and res_mat.shape[1] > 1:
                np.savetxt(
                    cfg.params_dir + '/' + rs,
                    np.array([[i, j, res_mat[i, j]] for i, j in product(
                        range(res_mat.shape[0]), range(res_mat.shape[1]))]),
                    fmt='%10.8f',
                    comments='',
                    delimiter=',')
Exemplo n.º 14
0
    def __init__(self,
                 solver,
                 tinput=None,
                 tlabel=None,
                 tpred=None,
                 tdata=None,
                 vinput=None,
                 vlabel=None,
                 vpred=None,
                 vdata=None,
                 monitor_path=None,
                 model_save_path=None,
                 max_epoch=1,
                 iter_per_epoch=None,
                 val_iter=None):
        # Monitors
        monitor = Monitor(monitor_path)
        monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
        monitor_err = MonitorSeries("Training error", monitor, interval=10)
        monitor_vloss = MonitorSeries("Valid loss", monitor, interval=1)
        monitor_verr = MonitorSeries("Valid error", monitor, interval=1)
        monitor_time = MonitorTimeElapsed("Training time",
                                          monitor,
                                          interval=10)

        # Loss and error
        tpred = tpred.apply(persistent=True)
        tloss = F.mean(F.softmax_cross_entropy(tpred, tlabel))
        terror = F.mean(F.top_n_error(tpred.get_unlinked_variable(), tlabel))
        vpred = vpred.apply(persistent=True)
        vloss = F.mean(F.softmax_cross_entropy(vpred, vlabel))
        verror = F.mean(F.top_n_error(vpred.get_unlinked_variable(), vlabel))

        # Updater
        def tdata_feeder():
            tinput.d, tlabel.d = tdata.next()

        def forward_callback_on_finish(i):
            terror.forward()

        def update_callback_on_finish(i):
            monitor_loss.add(i, tloss.d)
            monitor_err.add(i, terror.d)
            monitor_time.add(i)

        updater = Updater(
            solver,
            tloss,
            data_feeder=tdata_feeder,
            forward_callback_on_finish=forward_callback_on_finish,
            update_callback_on_finish=update_callback_on_finish)

        # Evaluator
        def vdata_feeder():
            vinput.d, vlabel.d = vdata.next()

        def vloss_callback_on_finish(i, v):
            monitor_vloss.add(i, v)

        def verror_callback_on_finish(i, v):
            monitor_verr.add(i, v)

        val_iter = val_iter if val_iter is not None else vdata.size // vdata.batch_size
        evaluator = Evaluator([vloss, verror],
                              data_feeder=vdata_feeder,
                              val_iter=val_iter,
                              callback_on_finish=[
                                  vloss_callback_on_finish,
                                  verror_callback_on_finish
                              ])

        # Trainer
        iter_per_epoch = iter_per_epoch if iter_per_epoch is not None \
            else tdata.size // tdata.batch_size
        self.trainer = Trainer(updater,
                               evaluator,
                               model_save_path,
                               max_epoch=max_epoch,
                               iter_per_epoch=iter_per_epoch)
Exemplo n.º 15
0
def train():
    '''
    Run D3Net Semantic Segmentation Training
    '''
    # Check NNabla version
    if get_nnabla_version_integer() < 12100:
        raise ValueError(
            'This code does not work with nnabla version less than v1.21.0 since [ignore index less than 0](https://github.com/sony/nnabla/pull/945) is added in v1.21.0 . Please update the nnabla version.')

    args = get_args()
    # Load D3Net Hyper parameters (D3Net-L or D3Net-S)
    with open(args.config_file) as file:
        hparams = yaml.load(file, Loader=yaml.FullLoader)

    # Get context.
    ctx = get_extension_context(args.context, device_id=0)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)

    # Change max_iter, learning_rate and weight_decay according no. of gpu devices for multi-gpu training.
    default_batch_size = 8
    train_scale_factor = comm.n_procs * \
        (hparams['batch_size'] / default_batch_size)
    hparams['max_iter'] = int(hparams['max_iter'] // train_scale_factor)
    hparams['lr'] = hparams['lr'] * train_scale_factor
    hparams['min_lr'] = hparams['min_lr'] * train_scale_factor
    hparams['weight_decay'] = hparams['weight_decay'] * comm.n_procs

    # ---------------------
    # Create data iterators
    # ---------------------
    rng = np.random.RandomState()
    data = data_iterator_cityscapes(
        hparams['batch_size'], args.data_dir, rng=rng, train=True)

    if comm.n_procs > 1:
        data = data.slice(rng=rng, num_of_slices=comm.n_procs,
                          slice_pos=comm.rank)

    if comm.rank == 0:
        if not os.path.isdir(args.output_dir):
            os.makedirs(args.output_dir)

    # Create monitors
    monitor = M.Monitor(args.output_dir)
    monitor_training_loss = M.MonitorSeries(
        'Training loss', monitor, interval=args.log_interval)
    monitor_lr = M.MonitorSeries(
        'Learning rate', monitor, interval=args.log_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time per iteration", monitor, interval=args.log_interval)

    # ---------------------
    # Create Training Graph
    # ---------------------
    # Create input variables
    image = nn.Variable(
        (hparams['batch_size'], 3, hparams['image_height'], hparams['image_width']))
    seg_gt = nn.Variable(
        (hparams['batch_size'], 1, hparams['image_height'], hparams['image_width']))

    # D3Net prediction/output
    seg_pred = d3net_segmentation(image, hparams, recompute=args.recompute)

    # Configure loss
    loss = F.mean(F.softmax_cross_entropy(seg_pred, seg_gt, axis=1))
    loss.persistent = True

    # Create Solver
    solver = S.Momentum(hparams['lr'], hparams['momentum'])
    solver.set_parameters(nn.get_parameters())

    # Initialize LR Scheduler
    lr_scheduler = PolynomialScheduler(hparams)

    if args.pretrained is not None:
        # Initialize the D3Net backbone weights
        with nn.parameter_scope('backbone'):
            nn.load_parameters(args.pretrained)

    # -------------
    # Training loop
    # -------------
    for i in range(hparams['max_iter']):
        image.d, seg_gt.d = data.next()
        solver.zero_grad()
        lr = lr_scheduler.get_learning_rate(i)
        solver.set_learning_rate(lr)
        loss.forward(clear_no_need_grad=True)

        if comm.n_procs > 1:
            all_reduce_callback = comm.get_all_reduce_callback()
            loss.backward(clear_buffer=True,
                          communicator_callbacks=all_reduce_callback)
        else:
            loss.backward(clear_buffer=True)
        solver.weight_decay(hparams['weight_decay'])
        solver.update()

        if comm.rank == 0:
            # Log monitors
            monitor_training_loss.add(i, loss.d.copy())
            monitor_lr.add(i, lr)
            monitor_time.add(i)

            if (i % hparams['save_interval']) == 0:
                # Save intermediate model parameters
                nn.save_parameters(os.path.join(
                    args.output_dir, "model_param_%08d.h5" % i))
                solver.save_states(os.path.join(
                    args.output_dir, "solver_states.h5"))

    if comm.rank == 0:
        # save final model parameters
        nn.save_parameters(os.path.join(args.output_dir, "final.h5"))
Exemplo n.º 16
0
def test_graph_model(model, seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    nn.clear_parameters()
    if model == "mlp":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
        z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
            z4 = PF.affine(z2, 5)
    elif model == "recurrent":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 4)
            z2 = F.relu(z, inplace=True)
        h = z2
        for _ in range(2):
            with nn.parameter_scope('fc2'):
                h = PF.affine(h, 4)
                h = F.relu(h, inplace=True)
        with nn.parameter_scope('fc3'):
            z3 = PF.affine(h, 5)
            z4 = PF.affine(h, 5)
    elif model == "convolution":
        with nn.parameter_scope('conv1'):
            z = PF.convolution(x, 3, (2, 2))
            z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
            z4 = PF.affine(z2, 5)
    else:
        raise ValueError()
    l1 = F.softmax_cross_entropy(z3, t, 1)
    L1 = F.mean(l1)
    l2 = F.softmax_cross_entropy(z4, t, 1)
    L2 = F.mean(l2)

    # Forwardprop
    nn.forward_all([L1, L2])

    parameters = nn.get_parameters()

    # Backprop for L1
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    initialize_grad(parameters)
    L1.backward(clear_buffer=True)
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L1, inputs, 1e-3, False)
    assert_allclose(ngrad, agrad, atol=1.05e-2)

    # Backprop for L2
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    initialize_grad(parameters)
    L2.backward(clear_buffer=True)
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L2, inputs, 1e-3, False)
    assert_allclose(ngrad, agrad, atol=1.05e-2)
Exemplo n.º 17
0
def loss_func(pred, label):
    return F.mean(F.softmax_cross_entropy(pred, label))
Exemplo n.º 18
0
def cifar100_resnet32_loss(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemplo n.º 19
0
def logreg_loss(y, t):
    loss_f = F.mean(F.softmax_cross_entropy(y, t))
    return loss_f
Exemplo n.º 20
0
def loss_function(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemplo n.º 21
0
def sample_from_controller(args):
    """
        2-layer RNN(LSTM) based controller which outputs an architecture of CNN, 
        represented as a sequence of integers and its list.
        Given the number of layers, for each layer, 
        it executes 2 types of computation, one for sampling the operation at that layer,
        another for sampling the skip connection patterns.
    """

    entropys = nn.Variable([1, 1], need_grad=True)
    log_probs = nn.Variable([1, 1], need_grad=True)

    entropys.d = log_probs.d = 0.0  # initialize them all

    num_cells = args.num_cells
    num_nodes = args.num_nodes
    lstm_size = args.lstm_size
    state_size = args.state_size
    lstm_num_layers = args.lstm_layers
    temperature = args.temperature
    tanh_constant = args.tanh_constant
    op_tanh_reduce = args.op_tanh_reduce
    num_branch = args.num_ops

    both_archs = [list(), list()]
    initializer = I.UniformInitializer((-0.1, 0.1))

    prev_h = [
        nn.Variable([1, lstm_size], need_grad=True)
        for _ in range(lstm_num_layers)
    ]
    prev_c = [
        nn.Variable([1, lstm_size], need_grad=True)
        for _ in range(lstm_num_layers)
    ]

    for i in range(len(prev_h)):
        prev_h[i].d = 0  # initialize.
        prev_c[i].d = 0

    inputs = nn.Variable([1, lstm_size])
    inputs.d = np.random.normal(0, 0.5, [1, lstm_size])

    g_emb = nn.Variable([1, lstm_size])
    g_emb.d = np.random.normal(0, 0.5, [1, lstm_size])

    for ind in range(2):
        # first create conv cell and then reduc cell.
        idx_seq = list()
        ops_seq = list()
        for node_id in range(num_nodes):
            if node_id == 0:
                anchors = nn.parameter.get_parameter_or_create("anchors",
                                                               [2, lstm_size],
                                                               initializer,
                                                               need_grad=False)
                anchors_w_1 = nn.parameter.get_parameter_or_create(
                    "anchors_w_1", [2, lstm_size],
                    initializer,
                    need_grad=False)
            else:
                assert anchors.shape[0] == node_id + \
                    2, "Something wrong with anchors."
                assert anchors_w_1.shape[0] == node_id + \
                    2, "Something wrong with anchors_w_1."

            # for each node, get the index used as inputs
            for i in range(2):
                # One-step stacked LSTM.
                with nn.parameter_scope("controller_lstm"):
                    next_h, next_c = stack_lstm(inputs, prev_h, prev_c,
                                                state_size)
                prev_h, prev_c = next_h, next_c  # shape:(1, lstm_size)
                query = anchors_w_1

                with nn.parameter_scope("skip_affine_1"):
                    query = F.tanh(
                        F.add2(
                            query,
                            PF.affine(next_h[-1],
                                      lstm_size,
                                      w_init=initializer,
                                      with_bias=False)))
                    #            (node_id + 2, lstm_size)   +   (1, lstm_size)
                    # broadcast occurs here. resulting shape is; (node_id + 2, lstm_size)

                with nn.parameter_scope("skip_affine_2"):
                    # (node_id + 2, 1)
                    logit = PF.affine(query,
                                      1,
                                      w_init=initializer,
                                      with_bias=False)

                if temperature is not None:
                    logit = F.mul_scalar(logit, (1 / temperature))
                if tanh_constant is not None:
                    logit = F.mul_scalar(F.tanh(logit), tanh_constant)

                index = F.exp(logit)
                index = F.mul_scalar(index, (1 / index.d.sum()))

                # Sampling input indices from multinomial distribution.
                index = np.random.multinomial(
                    1,
                    np.reshape(index.d, (1, index.d.size))[0], 1)
                idx_seq.append(index.nonzero()[1])

                label = nn.Variable.from_numpy_array(
                    index.transpose())  # (node_id + 2, 1)
                log_prob = F.softmax_cross_entropy(logit, label)
                log_probs = F.add2(log_probs, F.sum(log_prob, keepdims=True))

                curr_ent = F.softmax_cross_entropy(logit, F.softmax(logit))
                entropy = F.sum(curr_ent, keepdims=True)
                entropys = F.add2(entropys, entropy)
                taking_ind = int(index.nonzero()[1][0])

                # (1, lstm_size)
                inputs = F.reshape(anchors[taking_ind], (1, anchors.shape[1]))

            # ops
            for j in range(2):
                with nn.parameter_scope("controller_lstm"):
                    next_h, next_c = stack_lstm(inputs, prev_h, prev_c,
                                                state_size)
                prev_h, prev_c = next_h, next_c  # shape:(1, lstm_size)

                # Compute for operation.
                with nn.parameter_scope("ops"):
                    logit = PF.affine(next_h[-1],
                                      num_branch,
                                      w_init=initializer,
                                      with_bias=False)

                # shape of logit : (1, num_branch)
                if temperature is not None:
                    logit = F.mul_scalar(logit, (1 / temperature))

                if tanh_constant is not None:
                    op_tanh = tanh_constant / op_tanh_reduce
                    logit = F.mul_scalar(F.tanh(logit), op_tanh)

                # normalizing logits.
                normed_logit = np.e**logit.d
                normed_logit = normed_logit / np.sum(normed_logit)

                # Sampling operation id from multinomial distribution.
                branch_id = np.random.multinomial(1, normed_logit[0],
                                                  1).nonzero()[1]
                branch_id = nn.Variable.from_numpy_array(branch_id)
                ops_seq.append(branch_id.d)

                # log policy for operation.
                log_prob = F.softmax_cross_entropy(
                    logit, F.reshape(branch_id, shape=(1, 1)))
                # accumulate log policy as log probs
                log_probs = F.add2(log_probs, log_prob)

                logit = F.transpose(logit, axes=(1, 0))
                curr_ent = F.softmax_cross_entropy(logit, F.softmax(logit))
                entropy = F.sum(curr_ent, keepdims=True)
                entropys = F.add2(entropys, entropy)

                w_emb = nn.parameter.get_parameter_or_create(
                    "w_emb", [num_branch, lstm_size],
                    initializer,
                    need_grad=False)
                # (1, lstm_size)
                inputs = F.reshape(w_emb[int(branch_id.d)],
                                   (1, w_emb.shape[1]))

                with nn.parameter_scope("controller_lstm"):
                    next_h, next_c = stack_lstm(inputs, prev_h, prev_c,
                                                lstm_size)
                prev_h, prev_c = next_h, next_c

                with nn.parameter_scope("skip_affine_3"):
                    adding_w_1 = PF.affine(next_h[-1],
                                           lstm_size,
                                           w_init=initializer,
                                           with_bias=False)

            # (node_id + 2 + 1, lstm_size)
            anchors = F.concatenate(anchors, next_h[-1], axis=0)
            # (node_id + 2 + 1, lstm_size)
            anchors_w_1 = F.concatenate(anchors_w_1, adding_w_1, axis=0)

        for idx, ops in zip(idx_seq, ops_seq):
            both_archs[ind].extend([int(idx), int(ops)])

    return both_archs, log_probs, entropys
Exemplo n.º 22
0
def train():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet2rnn_10_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[10])
    if args.net == "cifar10_resnet2rnn_15_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[15])
    if args.net == "cifar10_resnet2rnn_3x3x4_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[3, 3, 4])
    if args.net == "cifar10_resnet2rnn_5x5_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[5, 5])
    if args.net == "cifar10_resnet2rnn_5_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[5])
    if args.net == "cifar10_bresnet2rnn_10_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[10],
                                             res_unit=res_unit_bottleneck)
    if args.net == "cifar10_bresnet2rnn_15_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[15],
                                             res_unit=res_unit_bottleneck)
    if args.net == "cifar10_bresnet2rnn_3x3x4_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[3, 3, 4],
                                             res_unit=res_unit_bottleneck)
    if args.net == "cifar10_bresnet2rnn_5x5_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[5, 5],
                                             res_unit=res_unit_bottleneck)
    if args.net == "cifar10_bresnet2rnn_5_prediction":
        model_prediction = functools.partial(cifar10_resnet2rnn_prediction,
                                             unrolls=[5],
                                             res_unit=res_unit_bottleneck)

    # TRAIN
    maps = 64
    data_iterator = data_iterator_cifar10
    c = 3
    h = w = 32
    n_train = 50000
    n_valid = 10000
    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    label = nn.Variable([args.batch_size, 1])
    # Create `teacher` model_prediction graph.
    pred = model_prediction(image, maps=maps, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create teacher predition graph.
    vpred = model_prediction(vimage, maps=maps, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 23
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    if args.net == 'bincon':
        mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    elif args.net == 'binnet':
        mnist_cnn_prediction = mnist_binary_net_lenet_prediction
    elif args.net == 'bwn':
        mnist_cnn_prediction = mnist_binary_weight_lenet_prediction
    elif args.net == 'bincon_resnet':
        mnist_cnn_prediction = mnist_binary_connect_resnet_prediction
    elif args.net == 'binnet_resnet':
        mnist_cnn_prediction = mnist_binary_net_resnet_prediction
    elif args.net == 'bwn_resnet':
        mnist_cnn_prediction = mnist_binary_weight_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # save_nnp
    contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_result_epoch0.nnp'.format(args.net)), contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    # save_nnp_lastepoch
    contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_result.nnp'.format(args.net)), contents)
Exemplo n.º 24
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == 'lenet':
        mnist_cnn_prediction = mnist_lenet_prediction
    elif args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction
    else:
        raise ValueError("Unknown network type {}".format(args.net))

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                vpred.data.cast(np.float32, ctx)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        loss.data.cast(np.float32, ctx)
        pred.data.cast(np.float32, ctx)
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path,
        '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 25
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(solver.learning_rate() *
                                     args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
Exemplo n.º 26
0
def meta_train(args, train_data, valid_data, test_data):

    # Build episode generators
    shape_x = (1, 28, 28)
    train_episode_generator = EpisodeGenerator(args.n_class_tr, args.n_shot_tr,
                                               args.n_query_tr, shape_x,
                                               train_data)
    valid_episode_generator = EpisodeGenerator(args.n_class, args.n_shot,
                                               args.n_query, shape_x,
                                               valid_data)
    test_episode_generator = EpisodeGenerator(args.n_class, args.n_shot,
                                              args.n_query, shape_x, test_data)

    # Build training model
    xs_t = nn.Variable((args.n_class_tr * args.n_shot_tr, ) + shape_x)
    xq_t = nn.Variable((args.n_class_tr * args.n_query_tr, ) + shape_x)
    hq_t = net(args.n_class_tr, xs_t, xq_t, args.embedding, args.net_type,
               args.metric, False)
    yq_t = nn.Variable((args.n_class_tr * args.n_query_tr, 1))
    loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t))

    # Build evaluation model
    xs_v = nn.Variable((args.n_class * args.n_shot, ) + shape_x)
    xq_v = nn.Variable((args.n_class * args.n_query, ) + shape_x)
    hq_v = net(args.n_class, xs_v, xq_v, args.embedding, args.net_type,
               args.metric, True)
    yq_v = nn.Variable((args.n_class * args.n_query, 1))
    err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1))

    # Setup solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor outputs
    monitor = Monitor(args.work_dir)
    monitor_loss = MonitorSeries("Training loss",
                                 monitor,
                                 interval=args.iter_per_epoch)
    monitor_valid_err = MonitorSeries("Validation error",
                                      monitor,
                                      interval=args.iter_per_valid)
    monitor_test_err = MonitorSeries("Test error", monitor)
    monitor_test_conf = MonitorSeries("Test error confidence", monitor)

    # Output files
    param_file = args.work_dir + "params.h5"
    tsne_file = args.work_dir + "tsne.png"

    # Training loop
    train_losses = []
    best_err = 1.0
    for i in range(args.max_iteration):

        # Decay learning rate
        if (i + 1) % args.lr_decay_interval == 0:
            solver.set_learning_rate(solver.learning_rate() * args.lr_decay)

        # Create an episode
        xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next()

        # Training by the episode
        solver.zero_grad()
        loss_t.forward(clear_no_need_grad=True)
        loss_t.backward(clear_buffer=True)
        solver.update()
        train_losses.append(loss_t.d.copy())

        # Evaluation
        if (i + 1) % args.iter_per_valid == 0:
            train_loss = np.mean(train_losses)
            train_losses = []
            valid_errs = []
            for k in range(args.n_episode_for_valid):
                xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next()
                err_v.forward(clear_no_need_grad=True, clear_buffer=True)
                valid_errs.append(np.float(err_v.d.copy()))
            valid_err = np.mean(valid_errs)

            monitor_valid_err.add(i + 1, valid_err * 100)
            if valid_err < best_err:
                best_err = valid_err
                nn.save_parameters(param_file)

    # Final evaluation
    nn.load_parameters(param_file)
    v_errs = []
    for k in range(args.n_episode_for_test):
        xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next()
        err_v.forward(clear_no_need_grad=True, clear_buffer=True)
        v_errs.append(np.float(err_v.d.copy()))
    v_err_mean = np.mean(v_errs)
    v_err_std = np.std(v_errs)
    v_err_conf = 1.96 * v_err_std / np.sqrt(args.n_episode_for_test)
    monitor_test_err.add(0, v_err_mean * 100)
    monitor_test_conf.add(0, v_err_conf * 100)

    # Visualization
    n_class = 50
    n_sample = 20
    batch = test_data[:n_class].reshape(n_class * n_sample, 1, 28, 28)
    label = []
    for i in range(n_class):
        label.extend(np.ones(n_sample) * (i % 50))
    u = get_embeddings(batch, conv4)
    v = get_tsne(u)
    plot_tsne(v[:, 0], v[:, 1], label, tsne_file)
Exemplo n.º 27
0
def loss_function(pred, label):
    """
        Compute loss.
    """
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemplo n.º 28
0
def ce_loss(pred, label):
    return F.mean(F.softmax_cross_entropy(pred, label))
Exemplo n.º 29
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 30
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    if args.net == 'bincon':
        mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    elif args.net == 'binnet':
        mnist_cnn_prediction = mnist_binary_net_lenet_prediction
    elif args.net == 'bwn':
        mnist_cnn_prediction = mnist_binary_weight_lenet_prediction
    elif args.net == 'bincon_resnet':
        mnist_cnn_prediction = mnist_binary_connect_resnet_prediction
    elif args.net == 'binnet_resnet':
        mnist_cnn_prediction = mnist_binary_net_resnet_prediction
    elif args.net == 'bwn_resnet':
        mnist_cnn_prediction = mnist_binary_weight_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    nnp_file = os.path.join(args.model_save_path,
                            '{}_{:06}.nnp'.format(args.net, args.max_iter))
    runtime_contents = {
        'networks': [{
            'name': 'Validation',
            'batch_size': args.batch_size,
            'outputs': {
                'y': vpred
            },
            'names': {
                'x': vimage
            }
        }],
        'executors': [{
            'name': 'Runtime',
            'network': 'Validation',
            'data': ['x'],
            'output': ['y']
        }]
    }
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage.d], [vimage], vpred,
                      nnp_file)
Exemplo n.º 31
0
def main():

    # Read envvar `NNABLA_EXAMPLES_ROOT` to identify the path to your local
    # nnabla-examples directory.
    HERE = os.path.dirname(__file__)
    nnabla_examples_root = os.environ.get(
        'NNABLA_EXAMPLES_ROOT',
        os.path.join(HERE, '../../../../nnabla-examples'))
    mnist_examples_root = os.path.realpath(
        os.path.join(nnabla_examples_root, 'mnist-collection'))
    sys.path.append(mnist_examples_root)
    nnabla_examples_git_url = 'https://github.com/sony/nnabla-examples'

    # Check if nnabla-examples found.
    try:
        from args import get_args
    except ImportError:
        print('An envvar `NNABLA_EXAMPLES_ROOT`'
              ' which locates the local path to '
              '[nnabla-examples]({})'
              ' repository must be set correctly.'.format(
                  nnabla_examples_git_url),
              file=sys.stderr)
        raise

    # Import MNIST data
    from mnist_data import data_iterator_mnist
    from classification import mnist_lenet_prediction, mnist_resnet_prediction

    args = get_args(description=__doc__)

    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # Create a computation graph to be saved.
    x = nn.Variable([args.batch_size, 1, 28, 28])
    h = mnist_cnn_prediction(x, test=False, aug=False)
    t = nn.Variable([args.batch_size, 1])
    loss = F.mean(F.softmax_cross_entropy(h, t))
    y = mnist_cnn_prediction(x, test=True, aug=False)

    # Save NNP file (used in C++ inference later.).
    nnp_file = '{}_initialized.nnp'.format(args.net)
    runtime_contents = {
        'networks': [{
            'name': 'training',
            'batch_size': args.batch_size,
            'outputs': {
                'loss': loss
            },
            'names': {
                'x': x,
                't': t
            }
        }, {
            'name': 'runtime',
            'batch_size': args.batch_size,
            'outputs': {
                'y': y
            },
            'names': {
                'x': x
            }
        }],
        'executors': [{
            'name': 'runtime',
            'network': 'runtime',
            'data': ['x'],
            'output': ['y']
        }]
    }
    nn.utils.save.save(nnp_file, runtime_contents)
Exemplo n.º 32
0
                                       shuffle=True,
                                       with_file_cache=False)
valid_data_iter = data_iterator_simple(load_valid_func,
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

x = nn.Variable([batch_size, window_size * 2])
with nn.parameter_scope('W_in'):
    h = PF.embed(x, vocab_size, embedding_size)
h = F.mean(h, axis=1)
with nn.parameter_scope('W_out'):
    y = PF.affine(h, vocab_size, with_bias=False)
t = nn.Variable((batch_size, 1))
entropy = F.softmax_cross_entropy(y, t)
loss = F.mean(entropy)

# Create solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

trainer = Trainer(inputs=[x, t],
                  loss=loss,
                  metrics=dict(PPL=np.e**loss),
                  solver=solver)

trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch)

with open('vectors.txt', 'w') as f:
    f.write('{} {}\n'.format(vocab_size - 1, embedding_size))
Exemplo n.º 33
0
def cifar10_resnet23_loss(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemplo n.º 34
0
def classification_svd():
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction_slim

    # TRAIN
    reference = "reference"
    slim = "slim"
    rrate = 0.5  # reduction rate
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create `reference` and "slim" prediction graph.
    model_load_path = args.model_load_path
    pred = mnist_cnn_prediction(image, scope=slim, rrate=rrate, test=False)
    pred.persistent = True

    # Decompose and set parameters
    decompose_network_and_set_params(model_load_path, reference, slim, rrate)
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create reference predition graph.
    vpred = mnist_cnn_prediction(vimage, scope=slim, rrate=rrate, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    with nn.parameter_scope(slim):
        solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    best_ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 35
0
Arquivo: vat.py Projeto: zwsong/nnabla
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(
                di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(
                solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    nnp_file = os.path.join(
        args.model_save_path, 'vat_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batchsize_v,
             'outputs': {'y': hv},
             'names': {'x': xv}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
Exemplo n.º 36
0
def distil():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet23_prediction":
        model_prediction = cifar10_resnet23_prediction
        data_iterator = data_iterator_cifar10
        c = 3
        h = w = 32
        n_train = 50000
        n_valid = 10000

    # TRAIN
    teacher = "teacher"
    student = "student"
    maps = args.maps
    rrate = args.reduction_rate
    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    image.persistent = True  # not clear the intermediate buffer re-used
    label = nn.Variable([args.batch_size, 1])
    label.persistent = True  # not clear the intermediate buffer re-used
    # Create `teacher` and "student" prediction graph.
    model_load_path = args.model_load_path
    nn.load_parameters(model_load_path)
    pred_label = model_prediction(
        image, net=teacher, maps=maps, test=not args.use_batch)
    pred_label.need_grad = False  # no need backward through teacher graph
    pred = model_prediction(image, net=student, maps=int(
        maps * (1. - rrate)), test=False)
    pred.persistent = True  # not clear the intermediate buffer used
    loss_ce = F.mean(F.softmax_cross_entropy(pred, label))
    loss_ce_soft = ce_soft(pred, pred_label)
    loss = args.weight_ce * loss_ce + args.weight_ce_soft * loss_ce_soft

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create teacher prediction graph.
    vpred = model_prediction(vimage, net=student, maps=int(
        maps * (1. - rrate)), test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    with nn.parameter_scope(student):
        solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator for MNIST.
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, 'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemplo n.º 37
0
def train():
    args = get_args()

    # Set context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in {}:{}".format(args.context, args.type_config))
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir)
    _data_source = data_iterator._data_source  # dirty hack...

    # model
    x = nn.Variable(shape=(args.batch_size, data_config.duration,
                           1))  # (B, T, 1)
    onehot = F.one_hot(x, shape=(data_config.q_bit_len, ))  # (B, T, C)
    wavenet_input = F.transpose(onehot, (0, 2, 1))  # (B, C, T)

    # speaker embedding
    if args.use_speaker_id:
        s_id = nn.Variable(shape=(args.batch_size, 1))
        with nn.parameter_scope("speaker_embedding"):
            s_emb = PF.embed(s_id,
                             n_inputs=_data_source.n_speaker,
                             n_features=WavenetConfig.speaker_dims)
            s_emb = F.transpose(s_emb, (0, 2, 1))
    else:
        s_emb = None

    net = WaveNet()
    wavenet_output = net(wavenet_input, s_emb)

    pred = F.transpose(wavenet_output, (0, 2, 1))

    # (B, T, 1)
    t = nn.Variable(shape=(args.batch_size, data_config.duration, 1))

    loss = F.mean(F.softmax_cross_entropy(pred, t))

    # for generation
    prob = F.softmax(pred)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # load checkpoint
    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)

    # setup save env.
    audio_save_path = os.path.join(os.path.abspath(args.model_save_path),
                                   "audio_results")
    if audio_save_path and not os.path.exists(audio_save_path):
        os.makedirs(audio_save_path)

    # save_nnp
    contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path,
                     'Speechsynthesis_result_epoch0.nnp'), contents)
    # Training loop.
    for i in range(start_point, args.max_iter):
        # todo: validation

        x.d, _speaker, t.d = data_iterator.next()
        if args.use_speaker_id:
            s_id.d = _speaker.reshape(-1, 1)

        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.update()

        loss.data.cast(np.float32, ctx)
        monitor_loss.add(i, loss.d.copy())

        if i % args.model_save_interval == 0:
            prob.forward()
            audios = mu_law_decode(np.argmax(prob.d, axis=-1),
                                   quantize=data_config.q_bit_len)  # (B, T)
            save_audio(audios, i, audio_save_path)
            # save checkpoint file
            save_checkpoint(audio_save_path, i, solver)

    # save_nnp
    contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Speechsynthesis_result.nnp'),
              contents)
def main():

    # Read envvar `NNABLA_EXAMPLES_ROOT` to identify the path to your local
    # nnabla-examples directory.
    HERE = os.path.dirname(__file__)
    nnabla_examples_root = os.environ.get('NNABLA_EXAMPLES_ROOT', os.path.join(
        HERE, '../../../../nnabla-examples'))
    mnist_examples_root = os.path.realpath(
        os.path.join(nnabla_examples_root, 'mnist-collection'))
    sys.path.append(mnist_examples_root)
    nnabla_examples_git_url = 'https://github.com/sony/nnabla-examples'

    # Check if nnabla-examples found.
    try:
        from args import get_args
    except ImportError:
        print(
            'An envvar `NNABLA_EXAMPLES_ROOT`'
            ' which locates the local path to '
            '[nnabla-examples]({})'
            ' repository must be set correctly.'.format(
                nnabla_examples_git_url),
            file=sys.stderr)
        raise

    # Import MNIST data
    from mnist_data import data_iterator_mnist
    from classification import mnist_lenet_prediction, mnist_resnet_prediction

    import argparse
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--max_epoch", "-me", type=int, default=100)
    parser.add_argument("--iter_per_epoch", "-ipe", type=int, default=937)
    parser.add_argument("--cache_dir", "-cd", type=str, default='cache')
    parser.add_argument("--batch-size", "-b", type=int, default=128)
    parser.add_argument("--learning-rate", "-l", type=float, default=1e-3)
    parser.add_argument("--weight-decay", "-w", type=float, default=0)
    parser.add_argument("--device-id", "-d", type=str, default='0')
    parser.add_argument("--type-config", "-t", type=str, default='float')
    parser.add_argument("--net", "-n", type=str, default='lenet')
    parser.add_argument('--context', '-c', type=str,
                        default='cpu', help="Extension modules. ex) 'cpu', 'cudnn'.")
    args = parser.parse_args()

    args_added = parser.parse_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # Create a computation graph to be saved.
    x = nn.Variable([args.batch_size, 1, 28, 28])
    t = nn.Variable([args.batch_size, 1])
    h_t = mnist_cnn_prediction(x, test=False, aug=False)
    loss_t = F.mean(F.softmax_cross_entropy(h_t, t))
    h_v = mnist_cnn_prediction(x, test=True, aug=False)
    loss_v = F.mean(F.softmax_cross_entropy(h_v, t))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Save NNP file (used in C++ inference later.).
    nnp_file = '{}_initialized.nnp'.format(args.net)
    training_contents = {
        'global_config': {'default_context': ctx},
        'training_config':
            {'max_epoch': args.max_epoch,
             'iter_per_epoch': args_added.iter_per_epoch,
             'save_best': True},
        'networks': [
            {'name': 'training',
             'batch_size': args.batch_size,
             'outputs': {'loss': loss_t},
             'names': {'x': x, 'y': t, 'loss': loss_t}},
            {'name': 'validation',
             'batch_size': args.batch_size,
             'outputs': {'loss': loss_v},
             'names': {'x': x, 'y': t, 'loss': loss_v}}],
        'optimizers': [
            {'name': 'optimizer',
             'solver': solver,
             'network': 'training',
             'dataset': 'mnist_training',
             'weight_decay': 0,
             'lr_decay': 1,
             'lr_decay_interval': 1,
             'update_interval': 1}],
        'datasets': [
            {'name': 'mnist_training',
             'uri': 'MNIST_TRAINING',
             'cache_dir': args.cache_dir + '/mnist_training.cache/',
             'variables': {'x': x, 'y': t},
             'shuffle': True,
             'batch_size': args.batch_size,
             'no_image_normalization': True},
            {'name': 'mnist_validation',
             'uri': 'MNIST_VALIDATION',
             'cache_dir': args.cache_dir + '/mnist_test.cache/',
             'variables': {'x': x, 'y': t},
             'shuffle': False,
             'batch_size': args.batch_size,
             'no_image_normalization': True
             }],
        'monitors': [
            {'name': 'training_loss',
             'network': 'validation',
             'dataset': 'mnist_training'},
            {'name': 'validation_loss',
             'network': 'validation',
             'dataset': 'mnist_validation'}],
    }
    nn.utils.save.save(nnp_file, training_contents)