Exemplo n.º 1
0
def test_nnp_graph_reshape(tmpdir, variable_batch_size, batch_size, shape):
    x = nn.Variable([10, 1, 28, 28, 10, 10])
    y = F.reshape(x, shape=shape)
    contents = {
        'networks': [
            {'name': 'graph',
             'batch_size': 1,
             'outputs': {'y': y},
             'names': {'x': x}}]}
    from nnabla.utils.save import save
    tmppath = tmpdir.join('tmp_reshape.nnp')
    tmppath.ensure()
    nnp_file = tmppath.strpath
    save(nnp_file, contents,
         variable_batch_size=variable_batch_size)

    from nnabla.utils import nnp_graph
    nnp = nnp_graph.NnpLoader(nnp_file)
    graph = nnp.get_network('graph', batch_size=batch_size)
    x2 = graph.inputs['x']
    y2 = graph.outputs['y']
    if not variable_batch_size:
        assert x2.shape == x.shape
        assert y2.shape == y.shape
        return

    assert x2.shape[0] == batch_size
    assert y2.shape[0] == batch_size
    x2.d = np.random.randn(*x2.shape)
    shape2 = list(shape)
    shape2[0] = batch_size
    shape2[1:] = y.shape[1:]

    y2.forward()
    assert np.all(y2.d == x2.d.reshape(shape2))
Exemplo n.º 2
0
def save_siamese_nnp(args):
    image = nn.Variable([1, 1, 28, 28])
    feature = mnist_lenet_feature(image, test=True)
    contents = save_nnp({'x': image}, {'y': feature}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)),
        contents)
Exemplo n.º 3
0
def check_nnp_graph_save_load(tmpdir, x, y, batch_size, variable_batch_size):

    # Save
    contents = {
        'networks': [{
            'name': 'graph',
            'batch_size': 1,
            'outputs': {
                'y': y
            },
            'names': {
                'x': x
            }
        }]
    }
    from nnabla.utils.save import save
    tmpdir.ensure(dir=True)
    tmppath = tmpdir.join('tmp.nnp')
    nnp_file = tmppath.strpath
    save(nnp_file, contents, variable_batch_size=variable_batch_size)

    # Load
    from nnabla.utils import nnp_graph
    nnp = nnp_graph.NnpLoader(nnp_file)
    graph = nnp.get_network('graph', batch_size=batch_size)
    x2 = graph.inputs['x']
    y2 = graph.outputs['y']
    if not variable_batch_size:
        assert x2.shape == x.shape
        assert y2.shape == y.shape
        return x2, y2

    assert x2.shape[0] == batch_size
    assert y2.shape[0] == batch_size
    return x2, y2
Exemplo n.º 4
0
def test_nnp_graph(seed):

    rng = np.random.RandomState(seed)

    def unit(i, prefix):
        c1 = PF.convolution(i, 4, (3, 3), pad=(1, 1), name=prefix + '-c1')
        c2 = PF.convolution(F.relu(c1),
                            4, (3, 3),
                            pad=(1, 1),
                            name=prefix + '-c2')
        c = F.add2(c2, c1, inplace=True)
        return c

    x = nn.Variable([2, 3, 4, 4])
    c1 = unit(x, 'c1')
    c2 = unit(x, 'c2')
    y = PF.affine(c2, 5, name='fc')

    runtime_contents = {
        'networks': [{
            'name': 'graph',
            'batch_size': 2,
            'outputs': {
                'y': y
            },
            'names': {
                'x': x
            }
        }],
    }
    import tempfile
    tmpdir = tempfile.mkdtemp()
    import os
    nnp_file = os.path.join(tmpdir, 'tmp.nnp')
    try:
        from nnabla.utils.save import save
        save(nnp_file, runtime_contents)
        from nnabla.utils import nnp_graph
        nnp = nnp_graph.NnpLoader(nnp_file)
    finally:
        import shutil
        shutil.rmtree(tmpdir)
    graph = nnp.get_network('graph')
    x2 = graph.inputs['x']
    y2 = graph.outputs['y']

    d = rng.randn(*x.shape).astype(np.float32)
    x.d = d
    x2.d = d
    y.forward(clear_buffer=True)
    y2.forward(clear_buffer=True)
    from nbla_test_utils import ArrayDiffStats
    assert np.allclose(y.d, y2.d), str(ArrayDiffStats(y.d, y2.d))
Exemplo n.º 5
0
def test_nnp_graph(seed, tmpdir):

    rng = np.random.RandomState(seed)

    def unit(i, prefix):
        c1 = PF.convolution(i, 4, (3, 3), pad=(1, 1), name=prefix + '-c1')
        c2 = PF.convolution(F.relu(c1),
                            4, (3, 3),
                            pad=(1, 1),
                            name=prefix + '-c2')
        c = F.add2(c2, c1, inplace=True)
        return c

    x = nn.Variable([2, 3, 4, 4])
    c1 = unit(x, 'c1')
    c2 = unit(x, 'c2')
    y = PF.affine(c2, 5, name='fc')

    runtime_contents = {
        'networks': [{
            'name': 'graph',
            'batch_size': 2,
            'outputs': {
                'y': y
            },
            'names': {
                'x': x
            }
        }],
    }
    tmpdir.ensure(dir=True)
    nnp_file = tmpdir.join('tmp.nnp').strpath

    from nnabla.utils.save import save
    save(nnp_file, runtime_contents)
    from nnabla.utils import nnp_graph
    nnp = nnp_graph.NnpLoader(nnp_file)

    graph = nnp.get_network('graph')
    x2 = graph.inputs['x']
    y2 = graph.outputs['y']

    d = rng.randn(*x.shape).astype(np.float32)
    x.d = d
    x2.d = d
    y.forward(clear_buffer=True)
    y2.forward(clear_buffer=True)
    assert_allclose(y.d, y2.d)
Exemplo n.º 6
0
def save_nnp(args):
    image = nn.Variable([1, 1, 28, 28])
    feature = mnist_lenet_feature(image, test=True)
    runtime_contents = {
        'networks': [
            {'name': 'Embedding',
             'batch_size': 1,
             'outputs': {'f': feature},
             'names': {'image': image}}],
        'executors': [
            {'name': 'Executor',
             'network': 'Embedding',
             'data': ['image'],
             'output': ['f']}]}
    import nnabla.utils.save as save
    save.save(os.path.join(args.monitor_path,
                           'embedding.nnp'), runtime_contents)
Exemplo n.º 7
0
def get_nnp(contents, tmpdir, need_file_object, file_type):
    import io
    from nnabla.utils.save import save
    from nnabla.utils import nnp_graph

    if file_type == '.nntxt' or file_type == '.prototxt':
        include_params = True
    else:
        include_params = False

    if need_file_object:
        nnp_object = io.BytesIO() if file_type == '.nnp' else io.StringIO()
        save(nnp_object, contents, extension=file_type,
             include_params=include_params)
        nnp_object.seek(0)
        nnp = nnp_graph.NnpLoader(nnp_object, extension=file_type)
    else:
        tmpdir.ensure(dir=True)
        nnp_file = tmpdir.join('tmp'+file_type).strpath
        save(nnp_file, contents, include_params=include_params)
        nnp = nnp_graph.NnpLoader(nnp_file)
    return nnp
Exemplo n.º 8
0
def save_model_from_utils_save(nnp_file, model_def, input_shape,
                               variable_batch_size):
    x = nn.Variable(input_shape)
    y = model_def(x)
    contents = {
        'networks': [{
            'name': 'model',
            'batch_size': 1,
            'outputs': {
                'y': y
            },
            'names': {
                'x': x
            }
        }],
        'executors': [{
            'name': 'runtime',
            'network': 'model',
            'data': ['x'],
            'output': ['y']
        }]
    }
    save.save(nnp_file, contents, variable_batch_size=variable_batch_size)
Exemplo n.º 9
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.get_unlinked_variable(need_grad=True)
    fake_dis.need_grad = True  # TODO: Workaround until v1.0.2
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint files.
        start_point = load_checkpoint(args.checkpoint, {
            "gen": solver_gen,
            "dis": solver_dis
        })

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: (x + 1) / 2.)

    data = data_iterator_mnist(args.batch_size, True)

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'),
        contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'),
        contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.model_save_interval == 0:
            save_checkpoint(args.model_save_path, i, {
                "gen": solver_gen,
                "dis": solver_dis
            })

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'),
              contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'),
              contents)
Exemplo n.º 10
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Dataset
    # We use Tiny ImageNet from Stanford CS231N class.
    # https://tiny-imagenet.herokuapp.com/
    # Tiny ImageNet consists of 200 categories, each category has 500 images
    # in training set. The image size is 64x64. To adapt ResNet into 64x64
    # image inputs, the input image size of ResNet is set as 56x56, and
    # the stride in the first conv and the first max pooling are removed.
    data = data_iterator_tiny_imagenet(args.batch_size, 'train')
    vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')

    num_classes = 200
    tiny = True  # TODO: Switch ILSVRC2012 dataset and TinyImageNet.
    t_model = get_model(
        args, num_classes, test=False, tiny=tiny)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    v_model = get_model(
        args, num_classes, test=True, tiny=tiny)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % args.val_interval == 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                l += v_model.loss.d
                e += categorical_error(v_model.pred.d, v_model.label.d)
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            l += t_model.loss.d
            e += categorical_error(t_model.pred.d, t_model.label.d)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(os.path.join(args.model_save_path,
                                    'param_%06d.h5' % args.max_iter))

    nnp_file = os.path.join(
        args.model_save_path, 'resnet_%06d.nnp' % (args.max_iter))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': v_model.pred.shape[0],
             'outputs': {'y': v_model.pred},
             'names': {'x': v_model.image}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [v_model.image.d], [
                      v_model.image], v_model.pred, nnp_file)
Exemplo n.º 11
0
pprint.pprint(nn.get_parameters()['affine/W'].d)

x.d = range(5)
# Execute a forward pass
y.forward()
# Showing results
print(
    "**** nnabla (python runtime): predictions: {}".format(y.d)
)  # we will later check this output with the output of the runtime engine

contents = {
    'networks': [{
        'name': 'main',
        'batch_size': 1,
        'outputs': {
            'y': y
        },
        'names': {
            'x': x
        }
    }],
    'executors': [{
        'name': 'runtime',
        'network': 'main',
        'data': ['x'],
        'output': ['y']
    }]
}

save('net.nnp', contents)  # save the network
Exemplo n.º 12
0
    def save(self, fname, inputs, batch_size=1, net_name='net', deploy=False):
        """
        Save QAT network model to NNP file as default.

        Args:
          fname (str): NNP file name.
          inputs (:obj:`nnabla.Variable` or list of :obj:`nnabla.Variable`): Network inputs variables.
          batch_size (int): batch size.
          net_name (str): network name.
          deploy (bool): Whether to apply QNN deployment conversion. deploy=True is not supported yet.

        Returns:
          None
        """

        def _force_list(o):
            if isinstance(o, (tuple)):
                return list(o)
            if not isinstance(o, (list)):
                return [o]
            return o

        for i, elm in enumerate(self.registry):
            pred, training = elm
            if deploy:
                assert self.state == QNNState.training
                # TODO: Convert the training graph to deployment graph
                # TODO: Save as nnp (we have to define nicely)
            else:
                if training:
                    continue

                from collections import defaultdict

                inps = defaultdict(list)
                otps = defaultdict(list)
                ec_data = []
                ec_otps = []

                inputs = _force_list(inputs)
                for i, inp in enumerate(inputs):
                    key = 'x{}'.format(i)
                    inps[key] = inp
                    ec_data.append(key)

                outputs = _force_list(pred)
                for i, otp in enumerate(outputs):
                    key = 'y{}'.format(i)
                    otps[key] = otp
                    ec_otps.append(key)

                contents = {
                    'networks': [
                        {'name': net_name,
                         'batch_size': batch_size,
                         'outputs': otps,
                         'names': inps
                         }],
                    'executors': [
                        {'name': 'runtime',
                         'network': net_name,
                         'data': ec_data,
                         'outputs': ec_otps
                         }]
                }

                from nnabla.utils.save import save
                save(fname, contents)
Exemplo n.º 13
0
def train():
    '''
    Main script.
    '''
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # TRAIN
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    x = image / 255.0
    t_onehot = F.one_hot(label, (10, ))
    with nn.parameter_scope("capsnet"):
        c1, pcaps, u_hat, caps, pred = model.capsule_net(
            x,
            test=False,
            aug=True,
            grad_dynamic_routing=args.grad_dynamic_routing)
    with nn.parameter_scope("capsnet_reconst"):
        recon = model.capsule_reconstruction(caps, t_onehot)
    loss_margin, loss_reconst, loss = model.capsule_loss(
        pred, t_onehot, recon, x)
    pred.persistent = True

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    vx = vimage / 255.0
    with nn.parameter_scope("capsnet"):
        _, _, _, _, vpred = model.capsule_net(vx, test=True, aug=False)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    train_iter = int(60000 / args.batch_size)
    val_iter = int(10000 / args.batch_size)
    logger.info("#Train: {} #Validation: {}".format(train_iter, val_iter))
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=1)
    monitor_mloss = MonitorSeries("Training margin loss", monitor, interval=1)
    monitor_rloss = MonitorSeries("Training reconstruction loss",
                                  monitor,
                                  interval=1)
    monitor_err = MonitorSeries("Training error", monitor, interval=1)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=1)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)
    monitor_lr = MonitorSeries("Learning rate", monitor, interval=1)

    # To_save_nnp
    m_image, m_label, m_noise, m_recon = model_tweak_digitscaps(
        args.batch_size)
    contents = save_nnp({
        'x1': m_image,
        'x2': m_label,
        'x3': m_noise
    }, {'y': m_recon}, args.batch_size)
    save.save(os.path.join(args.monitor_path, 'capsnet_epoch0_result.nnp'),
              contents)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(args.batch_size, False)
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)
    # Training loop.
    for e in range(start_point, args.max_epochs):

        # Learning rate decay
        learning_rate = solver.learning_rate()
        if e != 0:
            learning_rate *= 0.9
        solver.set_learning_rate(learning_rate)
        monitor_lr.add(e, learning_rate)

        # Training
        train_error = 0.0
        train_loss = 0.0
        train_mloss = 0.0
        train_rloss = 0.0
        for i in range(train_iter):
            image.d, label.d = data.next()
            solver.zero_grad()
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()
            train_error += categorical_error(pred.d, label.d)
            train_loss += loss.d
            train_mloss += loss_margin.d
            train_rloss += loss_reconst.d
        train_error /= train_iter
        train_loss /= train_iter
        train_mloss /= train_iter
        train_rloss /= train_iter

        # Validation
        val_error = 0.0
        for j in range(val_iter):
            vimage.d, vlabel.d = vdata.next()
            vpred.forward(clear_buffer=True)
            val_error += categorical_error(vpred.d, vlabel.d)
        val_error /= val_iter

        # Monitor
        monitor_time.add(e)
        monitor_loss.add(e, train_loss)
        monitor_mloss.add(e, train_mloss)
        monitor_rloss.add(e, train_rloss)
        monitor_err.add(e, train_error)
        monitor_verr.add(e, val_error)
        save_checkpoint(args.monitor_path, e, solver)

    # To_save_nnp
    contents = save_nnp({
        'x1': m_image,
        'x2': m_label,
        'x3': m_noise
    }, {'y': m_recon}, args.batch_size)
    save.save(os.path.join(args.monitor_path, 'capsnet_result.nnp'), contents)
Exemplo n.º 14
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Supervised Loss
    *     by Unlabeled Data
    *       Calculate Virtual Adversarial Noise
    *       Calculate Unsupervised Loss
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNIST Dataset
    from mnist_data import load_mnist, data_iterator_mnist
    images, labels = load_mnist(train=True)
    rng = np.random.RandomState(706)
    inds = rng.permutation(len(images))

    def feed_labeled(i):
        j = inds[i]
        return images[j], labels[j]

    def feed_unlabeled(i):
        j = inds[i]
        return images[j], labels[j]

    di_l = data_iterator_simple(feed_labeled,
                                args.n_labeled,
                                args.batchsize_l,
                                shuffle=True,
                                rng=rng,
                                with_file_cache=False)
    di_u = data_iterator_simple(feed_unlabeled,
                                args.n_train,
                                args.batchsize_u,
                                shuffle=True,
                                rng=rng,
                                with_file_cache=False)
    di_v = data_iterator_mnist(args.batchsize_v, train=False)

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False)
    yl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(yl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False)
    yu = forward(xu, test=False)
    y1 = yu.get_unlinked_variable()
    y1.need_grad = False

    noise = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True)
    r = noise / (F.sum(noise**2, [1, 2, 3], keepdims=True))**0.5
    r.persistent = True
    y2 = forward(xu + args.xi_for_vat * r, test=False)
    y3 = forward(xu + args.eps_for_vat * r, test=False)
    loss_k = F.mean(distance(y1, y2))
    loss_u = F.mean(distance(y1, y3))

    # Net for evaluating validation data
    xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)
    err = F.mean(F.top_n_error(hv, tv, n=1))

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor training and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    contents = save_nnp({'x': xv}, {'y': hv}, 1)
    save.save(os.path.join(args.model_save_path, 'result_epoch0.nnp'),
              contents)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            valid_error = calc_validation_error(di_v, xv, tv, err,
                                                args.val_iter)
            monitor_verr.add(i, valid_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # forward, backward and update
        xl.d, tl.d = di_l.next()
        xl.d = xl.d / 255
        solver.zero_grad()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # Calculate y without noise, only once.
        xu.d, _ = di_u.next()
        xu.d = xu.d / 255
        yu.forward(clear_buffer=True)

        ##### Calculate Adversarial Noise #####
        # Do power method iteration
        noise.d = np.random.normal(size=xu.shape).astype(np.float32)
        for k in range(args.n_iter_for_power_method):
            r.grad.zero()
            loss_k.forward(clear_no_need_grad=True)
            loss_k.backward(clear_buffer=True)
            noise.data.copy_from(r.grad)

        ##### Calculate loss for unlabeled data #####
        # forward, backward and update
        solver.zero_grad()
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(solver.learning_rate() *
                                     args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    contents = save_nnp({'x': xv}, {'y': hv}, 1)
    save.save(os.path.join(args.model_save_path, 'result.nnp'), contents)
Exemplo n.º 15
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        #image.d /= 255.0
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    nnp_file = os.path.join(args.model_save_path,
                            '{}_{:06}.nnp'.format(args.net, args.max_iter))
    runtime_contents = {
        'networks': [{
            'name': 'Validation',
            'batch_size': args.batch_size,
            'outputs': {
                'y': vpred
            },
            'names': {
                'x': vimage
            }
        }],
        'executors': [{
            'name': 'Runtime',
            'network': 'Validation',
            'data': ['x'],
            'output': ['y']
        }]
    }
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage.d], [vimage], vpred,
                      nnp_file)
Exemplo n.º 16
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    if args.net == 'bincon':
        mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    elif args.net == 'binnet':
        mnist_cnn_prediction = mnist_binary_net_lenet_prediction
    elif args.net == 'bwn':
        mnist_cnn_prediction = mnist_binary_weight_lenet_prediction
    elif args.net == 'bincon_resnet':
        mnist_cnn_prediction = mnist_binary_connect_resnet_prediction
    elif args.net == 'binnet_resnet':
        mnist_cnn_prediction = mnist_binary_net_resnet_prediction
    elif args.net == 'bwn_resnet':
        mnist_cnn_prediction = mnist_binary_weight_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # save_nnp
    contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path,
                     '{}_result_epoch0.nnp'.format(args.net)), contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    # save_nnp_lastepoch
    contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)),
        contents)
Exemplo n.º 17
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    if args.net == 'bincon':
        mnist_cnn_prediction = mnist_binary_connect_lenet_prediction
    elif args.net == 'binnet':
        mnist_cnn_prediction = mnist_binary_net_lenet_prediction
    elif args.net == 'bwn':
        mnist_cnn_prediction = mnist_binary_weight_lenet_prediction
    elif args.net == 'bincon_resnet':
        mnist_cnn_prediction = mnist_binary_connect_resnet_prediction
    elif args.net == 'binnet_resnet':
        mnist_cnn_prediction = mnist_binary_net_resnet_prediction
    elif args.net == 'bwn_resnet':
        mnist_cnn_prediction = mnist_binary_weight_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    nnp_file = os.path.join(
        args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batch_size,
             'outputs': {'y': vpred},
             'names': {'x': vimage}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage.d], [
                      vimage], vpred, nnp_file)
Exemplo n.º 18
0
def train():
    bs_train, bs_valid = args.train_batch_size, args.val_batch_size
    extension_module = args.context
    ctx = get_extension_context(
        extension_module, device_id=args.device_id, type_config=args.type_config
    )
    nn.set_default_context(ctx)

    if args.input:
        train_loader, val_loader, n_train_samples, n_val_samples = load_data(
            bs_train, bs_valid
        )

    else:
        train_data_source = data_source_cifar10(
            train=True, shuffle=True, label_shuffle=True
        )
        val_data_source = data_source_cifar10(train=False, shuffle=False)
        n_train_samples = len(train_data_source.labels)
        n_val_samples = len(val_data_source.labels)
        # Data Iterator
        train_loader = data_iterator(
            train_data_source, bs_train, None, False, False)
        val_loader = data_iterator(
            val_data_source, bs_valid, None, False, False)

        if args.shuffle_label:
            if not os.path.exists(args.output):
                os.makedirs(args.output)
            np.save(os.path.join(args.output, "x_train.npy"),
                    train_data_source.images)
            np.save(
                os.path.join(args.output, "y_shuffle_train.npy"),
                train_data_source.labels,
            )
            np.save(os.path.join(args.output, "y_train.npy"),
                    train_data_source.raw_label)
            np.save(os.path.join(args.output, "x_val.npy"),
                    val_data_source.images)
            np.save(os.path.join(args.output, "y_val.npy"),
                    val_data_source.labels)

    if args.model == "resnet23":
        model_prediction = resnet23_prediction
    elif args.model == "resnet56":
        model_prediction = resnet56_prediction
    prediction = functools.partial(
        model_prediction, ncls=10, nmaps=64, act=F.relu, seed=args.seed)

    # Create training graphs
    test = False
    image_train = nn.Variable((bs_train, 3, 32, 32))
    label_train = nn.Variable((bs_train, 1))
    pred_train, _ = prediction(image_train, test)

    loss_train = loss_function(pred_train, label_train)

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    label_valid = nn.Variable((bs_valid, 1))
    pred_valid, _ = prediction(image_valid, test)
    loss_val = loss_function(pred_valid, label_valid)

    for param in nn.get_parameters().values():
        param.grad.zero()

    cfg = read_yaml("./learning_rate.yaml")
    print(cfg)
    lr_sched = create_learning_rate_scheduler(cfg.learning_rate_config)
    solver = S.Momentum(momentum=0.9, lr=lr_sched.get_lr())
    solver.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed

    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=1)
    monitor_err = MonitorSeries("Training error", monitor, interval=1)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=1)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)
    monitor_vloss = MonitorSeries("Test loss", monitor, interval=1)

    # save_nnp
    contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid)
    save.save(
        os.path.join(args.model_save_path,
                     (args.model+"_epoch0_result.nnp")), contents
    )

    train_iter = math.ceil(n_train_samples / bs_train)
    val_iter = math.ceil(n_val_samples / bs_valid)

    # Training-loop
    for i in range(start_point, args.train_epochs):
        lr_sched.set_epoch(i)
        solver.set_learning_rate(lr_sched.get_lr())
        print("Learning Rate: ", lr_sched.get_lr())
        # Validation
        ve = 0.0
        vloss = 0.0
        print("## Validation")
        for j in range(val_iter):
            image, label = val_loader.next()
            image_valid.d = image
            label_valid.d = label
            loss_val.forward()
            vloss += loss_val.data.data.copy() * bs_valid
            ve += categorical_error(pred_valid.d, label)
        ve /= args.val_iter
        vloss /= n_val_samples

        monitor_verr.add(i, ve)
        monitor_vloss.add(i, vloss)

        if int(i % args.model_save_interval) == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Forward/Zerograd/Backward
        print("## Training")
        e = 0.0
        loss = 0.0
        for k in range(train_iter):

            image, label = train_loader.next()
            image_train.d = image
            label_train.d = label
            loss_train.forward()
            solver.zero_grad()
            loss_train.backward()
            solver.update()
            e += categorical_error(pred_train.d, label_train.d)
            loss += loss_train.data.data.copy() * bs_train
        e /= train_iter
        loss /= n_train_samples

        e = categorical_error(pred_train.d, label_train.d)
        monitor_loss.add(i, loss)
        monitor_err.add(i, e)
        monitor_time.add(i)

    nn.save_parameters(
        os.path.join(args.model_save_path, "params_%06d.h5" %
                     (args.train_epochs))
    )

    # save_nnp_lastepoch
    contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid)
    save.save(os.path.join(args.model_save_path,
              (args.model+"_result.nnp")), contents)
Exemplo n.º 19
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    if args.tiny_mode:
        # We use Tiny ImageNet from Stanford CS231N class.
        # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/)
        # Tiny ImageNet consists of 200 categories, each category has 500 images
        # in training set. The image size is 64x64. To adapt ResNet into 64x64
        # image inputs, the input image size of ResNet is set as 56x56, and
        # the stride in the first conv and the first max pooling are removed.
        # Please check README.
        data = data_iterator_tiny_imagenet(args.batch_size, 'train')
        vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')
        num_classes = 200
    else:
        # We use ImageNet.
        # (ImageNet, https://imagenet.herokuapp.com/)
        # ImageNet consists of 1000 categories, each category has 1280 images
        # in training set. The image size is various. To adapt ResNet into
        # 320x320 image inputs, the input image size of ResNet is set as
        # 224x224. We need to get tar file and create cache file(320x320 images).
        # Please check README.
        data = data_iterator_imagenet(args.batch_size,
                                      args.train_cachefile_dir)
        vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir)
        num_classes = 1000
    t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    t_pred2 = t_model.pred.get_unlinked_variable()
    t_pred2.need_grad = False

    t_e = F.mean(F.top_n_error(t_pred2, t_model.label))
    v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    v_pred2 = v_model.pred.get_unlinked_variable()
    v_pred2.need_grad = False

    v_e = F.mean(F.top_n_error(v_pred2, v_model.label))

    # Save_nnp_Epoch0
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'),
              contents)

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=10)

    # Training loop.
    for i in range(start_point, args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Validation
        if i % args.val_interval == 0 and i != 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                l += v_model.loss.d
                e += v_e.d
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)
            monitor_vtime.add(i)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        def accumulate_error(l, e, t_model, t_e):
            l += t_model.loss.d
            e += t_e.d
            return l, e

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)
            l, e = accumulate_error(l, e, t_model, t_e)

        solver.weight_decay(args.weight_decay)
        solver.update()

        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(
        os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))

    # Save_nnp
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'),
              contents)
Exemplo n.º 20
0
def save_nnp(nnp_file_name, nn_name, input, output, batchsize):
    content = _create_nnp_content(nn_name, input, output, batchsize)
    save.save(nnp_file_name, content)
    return content
Exemplo n.º 21
0
def main():
    """
    Main script.
    Steps:
    * Setup calculation environment
    * Initialize data iterator.
    * Create Networks
    * Create Solver.
    * Training Loop.
    *   Training
    *   Test
    * Save
    """

    # Set args
    args = get_args(monitor_path='tmp.monitor.vae',
                    max_iter=60000,
                    model_save_path=None,
                    learning_rate=3e-4,
                    batch_size=100,
                    weight_decay=0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Initialize data provider
    di_l = data_iterator_mnist(args.batch_size, True)
    di_t = data_iterator_mnist(args.batch_size, False)

    # Network
    shape_x = (1, 28, 28)
    shape_z = (50, )
    x = nn.Variable((args.batch_size, ) + shape_x)
    loss_l = vae(x, shape_z, test=False)
    loss_t = vae(x, shape_z, test=True)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitors for training and validation
    monitor = M.Monitor(args.model_save_path)
    monitor_training_loss = M.MonitorSeries("Training loss",
                                            monitor,
                                            interval=600)
    monitor_test_loss = M.MonitorSeries("Test loss", monitor, interval=600)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=600)

    # Save_nnp_at_epoch0
    contents = save_nnp({'x': x}, {'y': loss_t}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path,
                     '{}_resultEpoch0.nnp'.format(args.net)), contents)

    # Training Loop.
    for i in range(args.max_iter):

        # Initialize gradients
        solver.zero_grad()

        # Forward, backward and update
        x.d, _ = di_l.next()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Forward for test
        x.d, _ = di_t.next()
        loss_t.forward(clear_no_need_grad=True)

        # Monitor for logging
        monitor_training_loss.add(i, loss_l.d.copy())
        monitor_test_loss.add(i, loss_t.d.copy())
        monitor_time.add(i)

    # Save the model
    nn.save_parameters(
        os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))

    # save_nnp_lastepoch
    contents = save_nnp({'x': x}, {'y': loss_t}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)),
        contents)
Exemplo n.º 22
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == 'lenet':
        mnist_cnn_prediction = mnist_lenet_prediction
    elif args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction
    else:
        raise ValueError("Unknown network type {}".format(args.net))

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                vpred.data.cast(np.float32, ctx)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        loss.data.cast(np.float32, ctx)
        pred.data.cast(np.float32, ctx)
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path,
        '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)

    # append F.Softmax to the prediction graph so users see intuitive outputs
    runtime_contents = {
        'networks': [{
            'name': 'Validation',
            'batch_size': args.batch_size,
            'outputs': {
                'y': F.softmax(vpred)
            },
            'names': {
                'x': vimage
            }
        }],
        'executors': [{
            'name': 'Runtime',
            'network': 'Validation',
            'data': ['x'],
            'output': ['y']
        }]
    }
    save.save(
        os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)),
        runtime_contents)
Exemplo n.º 23
0
def meta_train(args, shape_x, train_data, valid_data, test_data):

    # Build episode generators
    train_episode_generator = EpisodeGenerator(
        train_data[0], train_data[1], args.n_class_tr, args.n_shot_tr, args.n_query_tr)
    valid_episode_generator = EpisodeGenerator(
        valid_data[0], valid_data[1], args.n_class, args.n_shot, args.n_query)
    test_episode_generator = EpisodeGenerator(
        test_data[0], test_data[1], args.n_class, args.n_shot, args.n_query)

    # Build training model
    xs_t = nn.Variable((args.n_class_tr * args.n_shot_tr, ) + shape_x)
    xq_t = nn.Variable((args.n_class_tr * args.n_query_tr, ) + shape_x)
    hq_t = net(args.n_class_tr, xs_t, xq_t, args.embedding,
               args.net_type, args.metric, False)
    yq_t = nn.Variable((args.n_class_tr * args.n_query_tr, 1))
    loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t))

    # Build evaluation model
    xs_v = nn.Variable((args.n_class * args.n_shot, ) + shape_x)
    xq_v = nn.Variable((args.n_class * args.n_query, ) + shape_x)
    hq_v = net(args.n_class, xs_v, xq_v, args.embedding,
               args.net_type, args.metric, True)
    yq_v = nn.Variable((args.n_class * args.n_query, 1))
    err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1))

    # Setup solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor outputs
    monitor = Monitor(args.work_dir)
    monitor_loss = MonitorSeries(
        "Training loss", monitor, interval=args.iter_per_epoch)
    monitor_valid_err = MonitorSeries(
        "Validation error", monitor, interval=args.iter_per_valid)
    monitor_test_err = MonitorSeries("Test error", monitor)
    monitor_test_conf = MonitorSeries("Test error confidence", monitor)

    # Output files
    param_file = args.work_dir + "/params.h5"
    tsne_file = args.work_dir + "/tsne.png"

    # Save NNP
    batch_size = 1
    contents = save_nnp({'x0': xs_v, 'x1': xq_v}, {
                          'y': hq_v}, batch_size)
    save.save(os.path.join(args.work_dir,
                           'MetricMetaLearning_epoch0.nnp'), contents, variable_batch_size=False)

    # Training loop
    train_losses = []
    best_err = 1.0
    for i in range(args.max_iteration):

        # Decay learning rate
        if (i + 1) % args.lr_decay_interval == 0:
            solver.set_learning_rate(solver.learning_rate() * args.lr_decay)

        # Create an episode
        xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next()

        # Training by the episode
        solver.zero_grad()
        loss_t.forward(clear_no_need_grad=True)
        loss_t.backward(clear_buffer=True)
        solver.update()
        train_losses.append(loss_t.d.copy())

        # Evaluation
        if (i + 1) % args.iter_per_valid == 0:
            train_loss = np.mean(train_losses)
            train_losses = []
            valid_errs = []
            for k in range(args.n_episode_for_valid):
                xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next()
                err_v.forward(clear_no_need_grad=True, clear_buffer=True)
                valid_errs.append(np.float(err_v.d.copy()))
            valid_err = np.mean(valid_errs)

            monitor_loss.add(i + 1, loss_t.d.copy())
            monitor_valid_err.add(i + 1, valid_err * 100)
            if valid_err < best_err:
                best_err = valid_err
                nn.save_parameters(param_file)

    # Final evaluation
    nn.load_parameters(param_file)
    v_errs = []
    for k in range(args.n_episode_for_test):
        xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next()
        err_v.forward(clear_no_need_grad=True, clear_buffer=True)
        v_errs.append(np.float(err_v.d.copy()))
    v_err_mean = np.mean(v_errs)
    v_err_std = np.std(v_errs)
    v_err_conf = 1.96 * v_err_std / np.sqrt(args.n_episode_for_test)
    monitor_test_err.add(0, v_err_mean * 100)
    monitor_test_conf.add(0, v_err_conf * 100)

    # Visualization
    n_class = 50
    n_sample = 20
    visualize_episode_generator = EpisodeGenerator(
        train_data[0], train_data[1], n_class, 0, n_sample)
    _, samples, labels = visualize_episode_generator.next()
    u = get_embeddings(samples, conv4)
    v = get_tsne(u)
    plot_tsne(v[:, 0], v[:, 1], labels[:, 0], tsne_file)

    # Save NNP
    contents = save_nnp({'x0': xs_v, 'x1': xq_v}, {
                          'y': hq_v}, batch_size)
    save.save(os.path.join(args.work_dir,
                           'MetricMetaLearning.nnp'), contents, variable_batch_size=False)
Exemplo n.º 24
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size
    extension_module = args.context
    ctx = get_extension_context(
        extension_module, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)
    if args.net == "cifar10_resnet23":
        prediction = functools.partial(
            resnet23_prediction, ncls=10, nmaps=64, act=F.relu)
        data_iterator = data_iterator_cifar10
    if args.net == "cifar100_resnet23":
        prediction = functools.partial(
            resnet23_prediction, ncls=100, nmaps=384, act=F.elu)
        data_iterator = data_iterator_cifar100

    # Create training graphs
    test = False
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = prediction(image_train, test)
    loss_train = loss_function(pred_train, label_train)
    input_image_train = {"image": image_train, "label": label_train}

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = prediction(image_valid, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Data Iterator
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)

    # save_nnp
    contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_epoch0_result.nnp'.format(args.net)), contents)

    # Training-loop
    for i in range(start_point, args.max_iter):
        # Validation
        if i % int(n_train_samples / args.batch_size) == 0:
            ve = 0.
            for j in range(args.val_iter):
                image, label = vdata.next()
                input_image_valid["image"].d = image
                pred_valid.forward()
                ve += categorical_error(pred_valid.d, label)
            ve /= args.val_iter
            monitor_verr.add(i, ve)
        if int(i % args.model_save_interval) == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Forward/Zerograd/Backward
        image, label = tdata.next()
        input_image_train["image"].d = image
        input_image_train["label"].d = label
        loss_train.forward()
        solver.zero_grad()
        loss_train.backward()

        # Solvers update
        solver.update()

        e = categorical_error(
            pred_train.d, input_image_train["label"].d)
        monitor_loss.add(i, loss_train.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    nn.save_parameters(os.path.join(args.model_save_path,
                                    'params_%06d.h5' % (args.max_iter)))

    # save_nnp_lastepoch
    contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_result.nnp'.format(args.net)), contents)
Exemplo n.º 25
0
def train():
    """
    Main script.
    """

    args = get_args()

    _ = nn.load_parameters(args.pretrained_model_path)
    if args.fine_tune:
        nnabla.parameter.pop_parameter('decoder/logits/affine/conv/W')
        nnabla.parameter.pop_parameter('decoder/logits/affine/conv/b')

    n_train_samples = args.train_samples
    n_val_samples = args.val_samples
    distributed = args.distributed
    compute_acc = args.compute_acc

    if distributed:
        # Communicator and Context
        from nnabla.ext_utils import get_extension_context
        extension_module = "cudnn"
        ctx = get_extension_context(
            extension_module, type_config=args.type_config)
        comm = C.MultiProcessDataParalellCommunicator(ctx)
        comm.init()
        n_devices = comm.size
        mpi_rank = comm.rank
        device_id = mpi_rank
        ctx.device_id = str(device_id)
        nn.set_default_context(ctx)
    else:
        # Get context.
        from nnabla.ext_utils import get_extension_context
        extension_module = args.context
        if args.context is None:
            extension_module = 'cpu'
        logger.info("Running in %s" % extension_module)
        ctx = get_extension_context(
            extension_module, device_id=args.device_id, type_config=args.type_config)
        nn.set_default_context(ctx)
        n_devices = 1
        device_id = 0

    # training data
    data = data_iterator_segmentation(
            args.train_samples, args.batch_size, args.train_dir, args.train_label_dir, target_width=args.image_width, target_height=args.image_height)
    # validation data
    vdata = data_iterator_segmentation(args.val_samples, args.batch_size, args.val_dir,
                                       args.val_label_dir, target_width=args.image_width, target_height=args.image_height)

    if distributed:
        data = data.slice(
            rng=None, num_of_slices=n_devices, slice_pos=device_id)
        vdata = vdata.slice(
            rng=None, num_of_slices=n_devices, slice_pos=device_id)
    num_classes = args.num_class

    # Workaround to start with the same initialized weights for all workers.
    np.random.seed(313)
    t_model = get_model(
        args, test=False)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    t_pred2 = t_model.pred.unlinked()
    t_e = F.sum(F.top_n_error(t_pred2, t_model.label, axis=1)
                * t_model.mask) / F.sum(t_model.mask)

    v_model = get_model(
        args, test=True)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward
    v_pred2 = v_model.pred.unlinked()
    v_e = F.sum(F.top_n_error(v_pred2, v_model.label, axis=1)
                * v_model.mask) / F.sum(t_model.mask)

    # Create Solver
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Load checkpoint
    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Setting warmup.
    base_lr = args.learning_rate / n_devices
    warmup_iter = int(1. * n_train_samples /
                      args.batch_size / args.accum_grad / n_devices) * args.warmup_epoch
    warmup_slope = base_lr * (n_devices - 1) / warmup_iter
    solver.set_learning_rate(base_lr)

    # Create monitor
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_miou = M.MonitorSeries("mean IOU", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed(
        "Validation time", monitor, interval=1)

    # save_nnp
    contents = save_nnp({'x': v_model.image}, {
                        'y': v_model.pred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           'Deeplabv3plus_result_epoch0.nnp'), contents, variable_batch_size=False)

    # Training loop
    for i in range(start_point, int(args.max_iter / n_devices)):
        # Save parameters
        if i % (args.model_save_interval // n_devices) == 0 and device_id == 0:
            save_checkpoint(args.model_save_path, i, solver)
        # Validation
        if i % (args.val_interval // n_devices) == 0 and i != 0:
            vmiou_local = 0.
            val_iter_local = n_val_samples // args.batch_size
            vl_local = nn.NdArray()
            vl_local.zero()
            ve_local = nn.NdArray()
            ve_local.zero()
            for j in range(val_iter_local):
                images, labels, masks = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.mask.d = masks
                v_model.image.data.cast(np.float32, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                vl_local += v_model.loss.data
                ve_local += v_e.data
                # Mean IOU computation
                if compute_acc:
                    vmiou_local += compute_miou(num_classes, labels,
                                                np.argmax(v_model.pred.d, axis=1), masks)

            vl_local /= val_iter_local
            ve_local /= val_iter_local
            if compute_acc:
                vmiou_local /= val_iter_local
                vmiou_ndarray = nn.NdArray.from_numpy_array(
                    np.array(vmiou_local))
            if distributed:
                comm.all_reduce(vl_local, division=True, inplace=True)
                comm.all_reduce(ve_local, division=True, inplace=True)
                if compute_acc:
                    comm.all_reduce(vmiou_ndarray, division=True, inplace=True)

            if device_id == 0:
                monitor_vloss.add(i * n_devices, vl_local.data.copy())
                monitor_verr.add(i * n_devices, ve_local.data.copy())
                if compute_acc:
                    monitor_miou.add(i * n_devices, vmiou_local)
                monitor_vtime.add(i * n_devices)

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        e_acc = nn.NdArray(t_e.shape)
        e_acc.zero()
        l_acc = nn.NdArray(t_model.loss.shape)
        l_acc.zero()
        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels, masks = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.mask.d = masks
            t_model.image.data.cast(np.float32, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)
            e_acc += t_e.data
            l_acc += t_model.loss.data

        # AllReduce
        if distributed:
            params = [x.grad for x in nn.get_parameters().values()]
            comm.all_reduce(params, division=False, inplace=False)
            comm.all_reduce(l_acc, division=True, inplace=True)
            comm.all_reduce(e_acc, division=True, inplace=True)
        solver.scale_grad(1./args.accum_grad)
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Linear Warmup
        if i <= warmup_iter:
            lr = base_lr + warmup_slope * i
            solver.set_learning_rate(lr)

        if distributed:
            # Synchronize by averaging the weights over devices using allreduce
            if (i+1) % args.sync_weight_every_itr == 0:
                weights = [x.data for x in nn.get_parameters().values()]
                comm.all_reduce(weights, division=True, inplace=True)

        if device_id == 0:
            monitor_loss.add(
                i * n_devices, (l_acc / args.accum_grad).data.copy())
            monitor_err.add(
                i * n_devices, (e_acc / args.accum_grad).data.copy())
            monitor_time.add(i * n_devices)

        # Learning rate decay at scheduled iter --> changed to poly learning rate decay policy
        # if i in args.learning_rate_decay_at:
        solver.set_learning_rate(base_lr * ((1 - i / args.max_iter)**0.1))

    if device_id == 0:
        nn.save_parameters(os.path.join(args.model_save_path,
                                        'param_%06d.h5' % args.max_iter))

    contents = save_nnp({'x': v_model.image}, {
                        'y': v_model.pred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           'Deeplabv3plus_result.nnp'), contents, variable_batch_size=False)
Exemplo n.º 26
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    nnp = os.path.join(args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [{
            'name': 'Generator',
            'batch_size': args.batch_size,
            'outputs': {
                'G': fake
            },
            'names': {
                'z': z
            }
        }, {
            'name': 'Discriminator',
            'batch_size': args.batch_size,
            'outputs': {
                'D': pred_real
            },
            'names': {
                'x': x
            }
        }],
        'executors': [{
            'name': 'Generator',
            'network': 'Generator',
            'data': ['z'],
            'output': ['G']
        }, {
            'name': 'Discriminator',
            'network': 'Discriminator',
            'data': ['x'],
            'output': ['D']
        }]
    }

    save.save(nnp, runtime_contents)
    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
Exemplo n.º 27
0
def main():

    # Get arguments
    args = get_args()
    data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt"
    model_file = args.work_dir + "model.h5"

    # Load Dataset
    itow, wtoi, dataset = load_ptbset(data_file)

    # Computation environment settings
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create data provider
    n_word = len(wtoi)
    n_dim = args.embed_dim
    batchsize = args.batchsize
    half_window = args.half_window_length
    n_negative = args.n_negative_sample

    di = DataIteratorForEmbeddingLearning(
        batchsize=batchsize,
        half_window=half_window,
        n_negative=n_negative,
        dataset=dataset)

    # Create model
    # - Real batch size including context samples and negative samples
    size = batchsize * (1 + n_negative) * (2 * (half_window - 1))

    # Model for learning
    # - input variables
    xl = nn.Variable((size,))  # variable for word
    yl = nn.Variable((size,))  # variable for context

    # Embed layers for word embedding function
    # - f_embed : word index x to get y, the n_dim vector
    # --  for each sample in a minibatch
    hx = PF.embed(xl, n_word, n_dim, name="e1")  # feature vector for word
    hy = PF.embed(yl, n_word, n_dim, name="e1")  # feature vector for context
    hl = F.sum(hx * hy, axis=1)

    # -- Approximated likelihood of context prediction
    # pos: word context, neg negative samples
    tl = nn.Variable([size, ], need_grad=False)
    loss = F.sigmoid_cross_entropy(hl, tl)
    loss = F.mean(loss)

    # Model for test of searching similar words
    xr = nn.Variable((size,), need_grad=False)
    hr = PF.embed(xr, n_word, n_dim, name="e1")  # feature vector for test

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = M.Monitor(args.work_dir)
    monitor_loss = M.MonitorSeries(
        "Training loss", monitor, interval=args.monitor_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time", monitor, interval=args.monitor_interval)

    # Do training
    max_epoch = args.max_epoch
    for epoch in range(max_epoch):

        # iteration per epoch
        for i in range(di.n_batch):

            # get minibatch
            xi, yi, ti = di.next()

            # learn
            solver.zero_grad()
            xl.d, yl.d, tl.d = xi, yi, ti
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()

            # monitor
            itr = epoch * di.n_batch + i
            monitor_loss.add(itr, loss.d)
            monitor_time.add(itr)

    # Save model
    nn.save_parameters(model_file)
    nnp_file = os.path.join(
        args.work_dir, 'wtov_%06d.nnp' % (args.max_epoch))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': size,
             'outputs': {'e': hr},
             'names': {'w': xr}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['w'],
             'output': ['e']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.work_dir, [xi], [xr], hr, nnp_file)
    exit()

    # Evaluate by similarity
    max_check_words = args.max_check_words
    for i in range(max_check_words):

        # prediction
        xr.d = i
        hr.forward(clear_buffer=True)
        h = hr.d

        # similarity calculation
        w = nn.get_parameters()['e1/embed/W'].d
        s = np.sqrt((w * w).sum(1))
        w /= s.reshape((s.shape[0], 1))
        similarity = w.dot(h[0]) / s[i]

        # for understanding
        output_similar_words(itow, i, similarity)
Exemplo n.º 28
0
def train():
    args = get_args()

    # Set context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in {}:{}".format(args.context, args.type_config))
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir)
    _data_source = data_iterator._data_source  # dirty hack...

    # model
    x = nn.Variable(shape=(args.batch_size, data_config.duration,
                           1))  # (B, T, 1)
    onehot = F.one_hot(x, shape=(data_config.q_bit_len, ))  # (B, T, C)
    wavenet_input = F.transpose(onehot, (0, 2, 1))  # (B, C, T)

    # speaker embedding
    if args.use_speaker_id:
        s_id = nn.Variable(shape=(args.batch_size, 1))
        with nn.parameter_scope("speaker_embedding"):
            s_emb = PF.embed(s_id,
                             n_inputs=_data_source.n_speaker,
                             n_features=WavenetConfig.speaker_dims)
            s_emb = F.transpose(s_emb, (0, 2, 1))
    else:
        s_emb = None

    net = WaveNet()
    wavenet_output = net(wavenet_input, s_emb)

    pred = F.transpose(wavenet_output, (0, 2, 1))

    # (B, T, 1)
    t = nn.Variable(shape=(args.batch_size, data_config.duration, 1))

    loss = F.mean(F.softmax_cross_entropy(pred, t))

    # for generation
    prob = F.softmax(pred)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # load checkpoint
    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)

    # setup save env.
    audio_save_path = os.path.join(os.path.abspath(args.model_save_path),
                                   "audio_results")
    if audio_save_path and not os.path.exists(audio_save_path):
        os.makedirs(audio_save_path)

    # save_nnp
    contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path,
                     'Speechsynthesis_result_epoch0.nnp'), contents)
    # Training loop.
    for i in range(start_point, args.max_iter):
        # todo: validation

        x.d, _speaker, t.d = data_iterator.next()
        if args.use_speaker_id:
            s_id.d = _speaker.reshape(-1, 1)

        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.update()

        loss.data.cast(np.float32, ctx)
        monitor_loss.add(i, loss.d.copy())

        if i % args.model_save_interval == 0:
            prob.forward()
            audios = mu_law_decode(np.argmax(prob.d, axis=-1),
                                   quantize=data_config.q_bit_len)  # (B, T)
            save_audio(audios, i, audio_save_path)
            # save checkpoint file
            save_checkpoint(audio_save_path, i, solver)

    # save_nnp
    contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Speechsynthesis_result.nnp'),
              contents)
Exemplo n.º 29
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create predition graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create predition graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    nnp_file = os.path.join(
        args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batch_size,
             'outputs': {'y': vpred},
             'names': {'x0': vimage0, 'x1': vimage1}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x0', 'x1'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [
                      vimage0, vimage1], vpred, nnp_file)
Exemplo n.º 30
0
    def save_net_nnp(self,
                     path,
                     inp,
                     out,
                     calc_latency=False,
                     func_real_latency=None,
                     func_accum_latency=None):
        """
            Saves whole net as one nnp
            Calc whole net (real) latency (using e.g.Nnabla's [Profiler])
            Calculate also layer-based latency
            The modules are discovered using the nnabla graph of the whole net
            The latency is then calculated based on each individual module's
            nnabla graph (e.g. [LatencyGraphEstimator])

            Args:
                path
                inp: input of the created network
                out: output of the created network
                calc_latency: flag for calc latency
                func_real_latency: function to use to calc actual latency
                func_accum_latency: function to use to calc accum. latency,
                        this is, dissecting the network layer by layer
                        using the graph of the network, calculate the
                        latency for each layer and add up all these results.
        """
        batch_size = inp.shape[0]

        name = self.name

        filename = path + name + '.nnp'
        pathname = os.path.dirname(filename)
        upper_pathname = os.path.dirname(pathname)
        if not os.path.exists(upper_pathname):
            os.mkdir(upper_pathname)
        if not os.path.exists(pathname):
            os.mkdir(pathname)

        dict = {'0': inp}
        keys = ['0']

        name_for_nnp = name if (name != '') else 'empty'
        contents = {
            'networks': [{
                'name': name_for_nnp,
                'batch_size': batch_size,
                'outputs': {
                    'out': out
                },
                'names': dict
            }],
            'executors': [{
                'name': 'runtime',
                'network': name_for_nnp,
                'data': keys,
                'output': ['out']
            }]
        }

        save(filename, contents, variable_batch_size=False)

        if calc_latency:
            acc_latency = func_accum_latency.get_estimation(out)
            filename = path + name + '.acclat'
            with open(filename, 'w') as f:
                print(acc_latency.__str__(), file=f)

            func_real_latency.run()
            real_latency = float(func_real_latency.result['forward_all'])
            filename = path + name + '.realat'
            with open(filename, 'w') as f:
                print(real_latency.__str__(), file=f)
            return real_latency, acc_latency
        else:
            return 0.0, 0.0
Exemplo n.º 31
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(F.sigmoid_cross_entropy(
        pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(F.sigmoid_cross_entropy(
        pred_fake_dis, F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real,
                                               F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries(
        "Discriminator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile(
        "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    nnp = os.path.join(
        args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Generator',
             'batch_size': args.batch_size,
             'outputs': {'G': fake},
             'names': {'z': z}},
            {'name': 'Discriminator',
             'batch_size': args.batch_size,
             'outputs': {'D': pred_real},
             'names': {'x': x}}],
        'executors': [
            {'name': 'Generator',
             'network': 'Generator',
             'data': ['z'],
             'output': ['G']},
            {'name': 'Discriminator',
             'network': 'Discriminator',
             'data': ['x'],
             'output': ['D']}]}

    save.save(nnp, runtime_contents)
    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
Exemplo n.º 32
0
    def save_modules_nnp_by_mod(
        self,
        path,
        active_only=False,
        calc_latency=False,
        func_latency=None,
    ):
        """
            *** Note: This function is deprecated. Use save_modules_nnp() ***
            Saves all modules of the network as individual nnp files,
            using folder structure given by name convention.
            The modules are extracted going over the module list, not
            over the graph structure.
            The latency is then calculated using the module themselves
            (e.g. [LatencyEstimator])
            Args:
                path
                active_only: if True, only active modules are saved
                calc_latency: flag for calc latency
                func_latency: function to use to calc latency of
                              each of the extracted modules
                              This function needs to work based on the modules
        """
        accum_lat = 0.0
        mods = self.get_net_modules(active_only=active_only)
        for mi in mods:
            if type(mi) in self.modules_to_profile:
                if len(mi.input_shapes) == 0:
                    continue
                pass

                inp = [nn.Variable((1, ) + si[1:]) for si in mi.input_shapes]
                out = mi.call(*inp)

                filename = path + mi.name + '.nnp'
                pathname = os.path.dirname(filename)
                upper_pathname = os.path.dirname(pathname)
                if not os.path.exists(upper_pathname):
                    os.mkdir(upper_pathname)
                if not os.path.exists(pathname):
                    os.mkdir(pathname)

                d_dict = {str(i): inpi for i, inpi in enumerate(inp)}
                d_keys = [str(i) for i, inpi in enumerate(inp)]

                name_for_nnp = mi.name if (mi.name != '') else 'empty'
                contents = {
                    'networks': [{
                        'name': name_for_nnp,
                        'batch_size': 1,
                        'outputs': {
                            'out': out
                        },
                        'names': d_dict
                    }],
                    'executors': [{
                        'name': 'runtime',
                        'network': name_for_nnp,
                        'data': d_keys,
                        'output': ['out']
                    }]
                }

                if hasattr(mi, '_scope_name'):
                    with nn.parameter_scope(mi._scope_name):
                        save(filename, contents, variable_batch_size=False)
                else:
                    save(filename, contents, variable_batch_size=False)

                if calc_latency:
                    latency = func_latency.get_estimation(mi)
                    filename = path + mi.name + '.acclat'
                    with open(filename, 'w') as f:
                        print(latency.__str__(), file=f)
                    accum_lat += latency
        return accum_lat
Exemplo n.º 33
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(
                di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(
                solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    nnp_file = os.path.join(
        args.model_save_path, 'vat_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batchsize_v,
             'outputs': {'y': hv},
             'names': {'x': xv}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
Exemplo n.º 34
0
                def __call__(self, config):
                    if config.iter != half_iter:
                        return

                    info = self.info
                    datasets = []
                    with ExitStack() as stack:
                        for d_name, d in info.datasets.items():
                            ds = {}
                            ds['name'] = d_name
                            ds['uri'] = d.uri
                            ds['cache_dir'] = d.cache_dir
                            di_instance = stack.enter_context(
                                d.data_iterator())
                            ds['variables'] = [
                                var_name for var_name in di_instance.variables
                            ]
                            ds['batch_size'] = di_instance.batch_size
                            ds['no_image_normalization'] = not d.normalize
                            ds['shuffle'] = di_instance._shuffle
                            datasets.append(ds)

                    dataset_assign = set()
                    for obj in itertools.chain(info.monitors.values(),
                                               info.executors.values(),
                                               info.optimizers.values()):
                        for pv in obj.dataset_assign.keys():
                            dataset_assign.add(pv.name)

                    contents = {
                        'global_config': {
                            'default_context':
                            info.global_config.default_context
                        },
                        'training_config': {
                            'max_epoch': info.training_config.max_epoch,
                            'iter_per_epoch':
                            info.training_config.iter_per_epoch,
                            'save_best': info.training_config.save_best
                        },
                        'networks': [{
                            'name': n_name,
                            'batch_size': n.batch_size,
                            'outputs': {
                                out: n.variables[out].variable_instance
                                for out in n.outputs
                            },
                            'names': {
                                inp: n.variables[inp].variable_instance
                                for inp in itertools.chain(
                                    n.inputs, n.outputs)
                            }
                        } for n_name, n in info.networks.items()],
                        'executors': [{
                            'name':
                            e_name,
                            'network':
                            e.network.name,
                            'data':
                            [pv.name for pv in e.dataset_assign.keys()],
                            'generator_variables':
                            [pv.name for pv in e.generator_assign.keys()],
                            'output':
                            [pv.name for pv in e.output_assign.keys()]
                        } for e_name, e in info.executors.items()],
                        'optimizers': [{
                            'name':
                            o_name,
                            'solver':
                            o.solver,
                            'network':
                            o.network.name,
                            'data_variables':
                            {pv.name: d
                             for pv, d in o.dataset_assign.items()},
                            'generator_variables':
                            [pv.name for pv in o.generator_assign.keys()],
                            'loss_variables':
                            [pv.name for pv in o.loss_variables],
                            'dataset':
                            [ds_name for ds_name in o.data_iterators.keys()],
                            'weight_decay':
                            o.weight_decay,
                            'lr_decay':
                            o.lr_decay,
                            'lr_decay_interval':
                            o.lr_decay_interval,
                            'update_interval':
                            o.update_interval
                        } for o_name, o in info.optimizers.items()],
                        'datasets':
                        datasets,
                        'monitors': [{
                            'name':
                            m_name,
                            'network':
                            m.network.name,
                            'data_variables':
                            {pv.name: d
                             for pv, d in m.dataset_assign.items()},
                            'generator_variables':
                            [pv.name for pv in m.generator_assign.keys()],
                            'monitor_variables':
                            [pv.name for pv in m.monitor_variables],
                            'dataset':
                            [ds_name for ds_name in m.data_iterators.keys()]
                        } for m_name, m in info.monitors.items()],
                    }

                    save.save(saved_nnp_file, contents, include_params,
                              variable_batch_size)