Exemplo n.º 1
0
def create_loss_and_learner(model,
                            labels,
                            learning_rate,
                            momentum_coef=0.0,
                            wdecay=0.0,
                            nesterov=False,
                            gradient_clip_norm=None,
                            gradient_clip_value=None):
    """
    Auxiliary function to create loss function (cross entropy and softmax)
    and trainer using stochastic gradient descent with momentum.

    Arguments:
        model - imported model
        labels - placeholder for one-hot labels array
        learning_rate - learning rate for trainer
        momentum_coef - coefficient of momentum (deafult 0.0)
        wdecay - amount of weight decay (default 0.0)
        nesterov - use nesterov accelerated gradient (dafault False)
        gradient_clip_norm - target gradient norm (default None)
        gradient_clip_value - value to element-wise clip gradients (default None)

    Returns:
        Loss function (mean for batch)
    """
    if model.axes.lengths != labels.axes.lengths:
        labels = ng.Transpose(labels)
    assert model.axes.lengths == labels.axes.lengths
    model = ng.cast_axes(model, axes=labels.axes)

    loss = ng.cross_entropy_multi(ng.softmax(model), labels)
    optimizer = GradientDescentMomentum(learning_rate, momentum_coef, wdecay,
                                        gradient_clip_norm,
                                        gradient_clip_value, nesterov)
    return ng.sequential([optimizer(loss), ng.mean(loss, out_axes=())])
Exemplo n.º 2
0
def make_optimizer(name=None):
    learning_rate = 0.005 if minibatch_discrimination else 0.03
    optimizer = GradientDescentMomentum(learning_rate,
                                        momentum_coef=0.0,
                                        wdecay=0.0,
                                        gradient_clip_norm=None,
                                        gradient_clip_value=None,
                                        name=name)
    return optimizer
Exemplo n.º 3
0
def run_mini_ds2_benchmark(args, **kwargs):
    device_id = kwargs.get('device_id')

    inputs, train_set, eval_set = generate_ds2_data(args.max_length,
                                                    args.str_w, args.nout,
                                                    args.nbands,
                                                    args.batch_size,
                                                    args.num_iterations)

    model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width,
                             args.str_w, args.nbands, args.depth,
                             args.hidden_size, args.batch_norm,
                             args.hetr_device, device_id)

    if args.bprop:
        with ng.metadata(device=args.hetr_device,
                         device_id=device_id,
                         parallel=ax.N):
            loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]),
                          inputs["audio_length"], inputs["trans_length"])

            optimizer = GradientDescentMomentum(learning_rate=2e-5,
                                                momentum_coef=0.99,
                                                gradient_clip_norm=400,
                                                nesterov=args.nesterov)

            updates = optimizer(loss)
            mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])
            bprop_computation_op = ng.computation(mean_cost, "all")

        benchmark = Benchmark(bprop_computation_op, train_set, inputs,
                              args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark.time(args.num_iterations,
                           args.skip_iter,
                           'ds2_bprop',
                           args.visualize,
                           preprocess=True))
    else:
        fprop_computation_op = ng.computation(model_out, "all")

        benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs,
                                    args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark_fprop.time(args.num_iterations,
                                 args.skip_iter,
                                 'ds2_fprop',
                                 args.visualize,
                                 preprocess=True))
Exemplo n.º 4
0
def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov):

    # Setup the baseline and reference optimizers to be tested
    gdm_args = {
        'learning_rate': random_learning_rate,
        'momentum_coef': random_momentum_coef,
        'wdecay': wdecay,
        'nesterov': nesterov
    }

    gdm_ref = GDMReference(**gdm_args)
    gdm = GradientDescentMomentum(**gdm_args)

    # test baseline against reference
    compare_optimizer(gdm, gdm_ref)
Exemplo n.º 5
0
def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov,
             transformer_factory):

    # Setup the baseline and reference optimizers to be tested
    gdm_args = {
        'learning_rate': random_learning_rate,
        'momentum_coef': random_momentum_coef,
        'wdecay': wdecay,
        'nesterov': nesterov
    }

    gdm_reference = GDMReference(**gdm_args)
    gdm = GradientDescentMomentum(**gdm_args)

    # Set up data placeholders
    C = ng.make_axis(20)
    N = ng.make_axis(32, name='N')

    data = ng.placeholder([C, N])
    target = ng.placeholder([N])

    # params to be updated using GDM
    np_W = np.random.rand(C.length)
    W = ng.variable([C], initial_value=np_W)

    # Set up op graph
    cost = ng.sum(target - ng.dot(W, data), out_axis=())
    updated_weights = ng.sequential([gdm(cost), W])

    def data_generator(iteration_count):
        for i in range(iteration_count):
            yield (np.random.rand(C.length, N.length).astype('float32'),
                   np.random.rand(N.length).astype('float32'))

    # Set up the computation and run the "train" loop
    with ExecutorFactory() as ex:
        gdm_baseline = ex.transformer.computation(updated_weights, data,
                                                  target)
        mock_dataset = data_generator(20)

        for x, y in mock_dataset:
            ng_W = gdm_baseline(x, y)  # updated weights for ngraph optimizer
            np_W = gdm_reference(
                x, np_W)  # updated weights for reference optimizer

            ng.testing.assert_allclose(np_W, ng_W, rtol=1e-3)
Exemplo n.º 6
0
def run_resnet_benchmark(dataset,
                         num_iterations,
                         n_skip,
                         batch_size,
                         device_id,
                         transformer_type,
                         device,
                         bprop=True,
                         batch_norm=False,
                         visualize=False,
                         stage_depth=1):
    inputs, data, train_set = get_fake_data(dataset, batch_size,
                                            num_iterations)

    # Running forward propagation
    model_out = get_mini_resnet(inputs,
                                dataset,
                                device,
                                device_id,
                                batch_norm=batch_norm,
                                stage_depth=stage_depth)

    # Running back propagation
    if bprop:
        with ng.metadata(device=device, device_id=device_id, parallel=ax.N):
            optimizer = GradientDescentMomentum(0.01, 0.9)
            train_loss = ng.cross_entropy_multi(
                model_out, ng.one_hot(inputs['label'], axis=ax.Y))

            batch_cost = ng.sequential(
                [optimizer(train_loss),
                 ng.mean(train_loss, out_axes=())])
            batch_cost_computation_op = ng.computation(batch_cost, "all")
        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop',
                           visualize, 'device_id'))
    else:
        fprop_computation_op = ng.computation(model_out, 'all')
        benchmark = Benchmark(fprop_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop',
                           visualize))
Exemplo n.º 7
0
def run_cifar_benchmark(n_iter=10,
                        n_skip=5,
                        batch_size=4,
                        transformer_type='cpu'):
    inputs, data, train_set = get_fake_cifar(batch_size, n_iter)
    model = get_mini_resnet(inputs)
    optimizer = GradientDescentMomentum(0.01, 0.9)

    train_loss = ng.cross_entropy_multi(model(inputs['image']),
                                        ng.one_hot(inputs['label'], axis=ax.Y))

    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    batch_cost_computation_op = ng.computation(batch_cost, "all")

    feed_dict = fill_feed_dict(train_set, inputs)
    benchmarks = dict()
    benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op,
                                                   transformer_type, feed_dict,
                                                   n_skip, n_iter)
    print_benchmark_results(benchmarks)
Exemplo n.º 8
0
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id,
                         transformer_type, device, bprop=False, visualize=False):
    inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter)
    model_out = get_mini_resnet(inputs, dataset, device_id)

    # Running forward propagation
    fprop_computation_op = ng.computation(model_out, 'all')
    benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device)
    Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip,
                                                           dataset + '_msra_fprop', visualize))

    # Running back propagation
    if bprop:
        optimizer = GradientDescentMomentum(0.01, 0.9)
        train_loss = ng.cross_entropy_multi(model_out,
                                            ng.one_hot(inputs['label'], axis=ax.Y))

        batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
        batch_cost_computation_op = ng.computation(batch_cost, "all")

        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip,
                                          dataset + '_msra_bprop', visualize))
Exemplo n.º 9
0
    # we need to ask the dataset to create an iteration
    # placeholder for our learning rate schedule
    inputs = train_set.make_placeholders(include_iteration=True)
    ax.Y.length = 10

    resnet = residual_network(args.stage_depth)

    learning_rate_policy = {
        'name': 'schedule',
        'schedule': [32000, 48000],
        'gamma': 0.1,
        'base_lr': 0.1
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.0001,
                                        iteration=inputs['iteration'])
    label_indices = inputs['label']
    train_loss = ng.cross_entropy_multi(resnet(inputs['image']),
                                        ng.one_hot(label_indices, axis=ax.Y))
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        inference_prob = resnet(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              label_indices)
        eval_loss = ng.cross_entropy_multi(
            inference_prob, ng.one_hot(label_indices, axis=ax.Y))
Exemplo n.º 10
0
def train_mnist_mlp(transformer_name,
                    data_dir=None,
                    rng_seed=12,
                    batch_size=128,
                    train_iter=10,
                    eval_iter=10):
    assert transformer_name in ['cpu', 'hetr']
    assert isinstance(rng_seed, int)

    # Apply this metadata to graph regardless of transformer,
    # but it is ignored for non-HeTr case
    hetr_device_ids = (0, 1)

    # use consistent rng seed between runs
    np.random.seed(rng_seed)

    # Data
    train_data, valid_data = MNIST(path=data_dir).load_data()
    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=train_iter)
    valid_set = ArrayIterator(valid_data, batch_size)
    inputs = train_set.make_placeholders()
    ax.Y.length = 10

    # Model
    with ng.metadata(device_id=hetr_device_ids, parallel=ax.N):
        seq1 = Sequential([
            Preprocess(functor=lambda x: x / 255.),
            Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
            Affine(axes=ax.Y,
                   weight_init=GaussianInit(),
                   activation=Logistic())
        ])

        train_prob = seq1(inputs['image'])
        train_loss = ng.cross_entropy_binary(
            train_prob, ng.one_hot(inputs['label'], axis=ax.Y))

        optimizer = GradientDescentMomentum(0.1, 0.9)
        batch_cost = ng.sequential(
            [optimizer(train_loss),
             ng.mean(train_loss, out_axes=())])
        train_outputs = dict(batch_cost=batch_cost)

        with Layer.inference_mode_on():
            inference_prob = seq1(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              inputs['label'])
        eval_loss = ng.cross_entropy_binary(
            inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
        eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

    # Runtime
    with closing(
            ngt.make_transformer_factory(transformer_name)()) as transformer:
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)
        loss_computation = make_bound_computation(transformer, eval_outputs,
                                                  inputs)

        train_costs = list()
        for step in range(train_iter):
            out = train_computation(next(train_set))
            train_costs.append(float(out['batch_cost']))

        ce_loss = list()
        for step in range(eval_iter):
            out = loss_computation(next(valid_set))
            ce_loss.append(np.mean(out['cross_ent_loss']))

        return train_costs, ce_loss
Exemplo n.º 11
0
def test_gdm(args, transformer_factory):
    """
    Test the ngraph GradientDescentMomentum against the neon version across 10 update steps.
    """
    # set up parameters
    C = ng.make_axis(20, name="C")
    N = ng.make_axis(32, name="N", batch=True)

    be = gen_backend(backend='cpu', batch_size=N.length)

    # restrict to numpy transformer for now
    factory = ngt.make_transformer_factory('numpy')
    ngt.set_transformer_factory(factory)
    ngt.make_transformer()

    # generate dummy data (to initialize values)
    w_init = np.random.rand(C.length).astype('float32')

    # set up nervana graph
    X = ng.placeholder([C, N]).named('X')
    Y = ng.placeholder([N]).named('Y')
    W = ng.variable([C - 1], initial_value=w_init).named('W')

    ex = ExecutorFactory()
    transformer = ex.transformer

    lrate, mom, wdecay = args
    gdm = GradientDescentMomentum(learning_rate=lrate, momentum_coef=mom, wdecay=wdecay)
    cost = ng.sum(Y - ng.dot(W, X), out_axis=())

    # to call ngraph gdm, use (ngraph_W, _) = ngraph_optimize(x, y)
    # where (x, y) are nparrays that fill the placeholders X and Y
    updates = gdm(cost)
    ngraph_optimize = transformer.computation([W, updates], X, Y)
    transformer.initialize()

    # set up the neon gdm
    neon_gdm = NeonGradientDescentMomentum(learning_rate=lrate, momentum_coef=mom, wdecay=wdecay)
    # dev_v0 = be.zeros((C.length, 1))  # velocities are zero at the beginning
    dev_dw = be.zeros((C.length, 1))  # we fill the gradient info in the below
    dev_w_init = be.array(w_init)  # copy w_init to device
    param_list = [((dev_w_init, dev_dw), [])]

    # store the weights with each minibatch for debugging
    ng_Ws = []
    be_Ws = []

    # run for 20 minibatches
    for i, (x, y) in enumerate([generate_data(C.length, N.length) for _ in range(20)]):
        # obtain ngraph results
        (ng_W, _) = ngraph_optimize(x, y)
        ng_Ws.append(copy.deepcopy(ng_W))

        # obtain neon results
        dw = -1 * x.sum(axis=1)   # the gradients we compute analytically
        param_list[0][0][1].set(dw)  # fill the gradient

        neon_gdm.optimize([DummyLayer(param_list)], epoch=0)
        (param, grad), states = param_list[0]
        be_W = param.get()[:, 0]
        be_Ws.append(be_W)

        np.testing.assert_allclose(be_W, ng_W, rtol=1e-3)
Exemplo n.º 12
0
    Affine(nout=500, weight_init=init_uni, activation=Rectlin()),
    Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())
])

######################
# Input specification
ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image']
ax.D.length = 1
ax.N.length = args.batch_size
ax.Y.length = 10

# placeholders with descriptive names
inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]),
              label=ng.placeholder([ax.N]))

optimizer = GradientDescentMomentum(0.01, 0.9)
output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
loss_computation = make_bound_computation(transformer, loss_outputs, inputs)
Exemplo n.º 13
0
           activation=Rectlin()),
    Affine(axes=ax.Y,
           weight_init=GaussianInit(var=0.01),
           bias_init=init,
           activation=Softmax())
])

# Learning rate change based on schedule from learning_rate_policies.py
lr_schedule = {
    'name': 'schedule',
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}
optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

with closing(ngt.make_transformer()) as transformer:
    train_function = transformer.add_computation(train_computation)

    if args.no_progress_bar:
        ncols = 0
    else:
Exemplo n.º 14
0
        train = seq1(input_ops_train['image'])
        tb = TensorBoard("/tmp/")
        tb.add_graph(train)
        exit()

    # Learning Rate Placeholder
    lr_ph = ng.placeholder(axes=(), initial_value=base_lr)

    # Optimizer
    # Provided learning policy takes learning rate as input to graph using a placeholder.
    # This allows you to control learning rate based on various factors of network
    learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph}

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=momentum_coef,
                                        wdecay=wdecay,
                                        nesterov=False,
                                        iteration=input_ops_train['iteration'])
    # Make a prediction
    prediction = resnet(input_ops_train['image'])
    # Calculate loss
    train_loss = ng.cross_entropy_multi(
        prediction, ng.one_hot(input_ops_train['label'], axis=ax.Y))
    # Average loss over the batch
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

# Instantiate the Saver object to save weights
weight_saver = Saver()
Exemplo n.º 15
0
                     args.filter_width,
                     args.str_w,
                     nbands,
                     args.depth,
                     args.hidden_size,
                     batch_norm=args.batch_norm)
    output = ds2(inputs["audio"], spatial_axes={"H": "frequency", "W": "time"})

    # set up ctc loss
    loss = ng.ctc(output, ng.flatten(inputs["char_map"]),
                  ng.flatten(inputs["audio_length"]),
                  ng.flatten(inputs["char_map_length"]))

    optimizer = GradientDescentMomentum(
        args.lr,
        momentum_coef=args.momentum,
        gradient_clip_norm=args.gradient_clip_norm,
        nesterov=args.nesterov)

    start = time.time()
    updates = optimizer(loss)
    stop = time.time()
    logger.debug("Optimizer graph creation took {} seconds".format(stop -
                                                                   start))
    mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])

    # Create computation and initialize the transformer to allocate weights
    train_computation = ng.computation([mean_cost, output], "all")
    if inference is True:
        with Layer.inference_mode_on():
            eval_output = ds2(inputs["audio"],
            'W': 'REC'
        })] + [affine_layer])
elif args.modeltype == "LSTM":
    model = Sequential(
        recurrent_model.define_model(out_axis,
                                     celltype=args.modeltype,
                                     recurrent_units=hidden_sizes,
                                     return_sequence=True).layers +
        [Logistic()])

# Optimizer
if args.modeltype == "TCN":
    optimizer = Adam(learning_rate=args.lr,
                     gradient_clip_value=args.grad_clip_value)
else:
    optimizer = GradientDescentMomentum(
        learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
fwd_prop = model(inputs['X'])
fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])

with Layer.inference_mode_on():
    preds = model(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
eval_computation = ng.computation([eval_loss], "all")
predict_computation = ng.computation([preds], "all")

# Cost calculation
batch_cost = ng.sequential(
Exemplo n.º 17
0
# Optimizer
# Initial learning rate is 0.01 (base_lr)
# At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01)
# At iteration (num_iterations * 2 // 75), it is reduced by gamma again
# So on..
no_steps = 75
step = num_iterations // no_steps
schedule = list(np.arange(step, num_iterations, step))
learning_rate_policy = {
    'name': 'schedule',
    'schedule': schedule,
    'gamma': 0.95,
    'base_lr': 0.01
}
optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                    iteration=inputs['iteration'])
# Define the loss function (Cross entropy loss)
# Note that we convert the integer values of input['y'] to one hot here
fwd_prop = seq1(inputs['X'])
train_loss = ng.cross_entropy_multi(fwd_prop,
                                    ng.one_hot(inputs['y'], axis=out_axis),
                                    usebits=True)

# Train cost computation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation([batch_cost, fwd_prop], "all")
train_outputs = dict(batch_cost=batch_cost)

# Forward prop of evaluation set
Exemplo n.º 18
0
def train_network(model, train_set, valid_set, batch_size, epochs, log_file):
    '''
    Trains the predefined network. Trains the model and saves the progress in
    the log file that is defined in the arguments

    model(object): Defines the model in Neon
    train_set(object): Defines the training set
    valid_set(object): Defines the validation set
    args(object): Training arguments
    batch_size(int): Minibatch size
    epochs(int): Number of training epoch
    log_file(string): File name to store trainig logs for plotting

    '''

    # Form placeholders for inputs to the network
    # Iterations needed for learning rate schedule
    inputs = train_set.make_placeholders(include_iteration=True)

    # Convert labels into one-hot vectors
    one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y)

    learning_rate_policy = {
        'name': 'schedule',
        'schedule': list(np.arange(2, epochs, 2)),
        'gamma': 0.6,
        'base_lr': 0.001
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.005,
                                        iteration=inputs['iteration'])

    # Define graph for training
    train_prob = model(inputs['video'])
    train_loss = ng.cross_entropy_multi(train_prob, one_hot_label)
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])

    with closing(ngt.make_transformer()) as transformer:

        # Define graph for calculating validation set error and misclassification rate
        # Use inference mode for validation to avoid dropout in forward pass
        with Layer.inference_mode_on():
            inference_prob = model(inputs['video'])
            errors = ng.not_equal(ng.argmax(inference_prob), inputs['label'])
            eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label)
            eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors}

            eval_computation = make_bound_computation(transformer,
                                                      eval_outputs, inputs)

        train_outputs = {'batch_cost': batch_cost}
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)

        interval_cost = 0.0

        # Train in epochs
        logs = {'train': [], 'validation': [], 'misclass': []}
        for epoch in trange(epochs, desc='Epochs'):

            # Setup the training bar
            numBatches = train_set.ndata // batch_size
            tpbar = tqdm(unit='batches',
                         ncols=100,
                         total=numBatches,
                         leave=False)

            train_set.reset()
            valid_set.reset()

            train_log = []
            for step, data in enumerate(train_set):
                data = dict(data)
                data['iteration'] = epoch  # learning schedule based on epochs
                output = train_computation(data)
                train_log.append(float(output['batch_cost']))

                tpbar.update(1)
                tpbar.set_description("Training {:0.4f}".format(
                    float(output['batch_cost'])))
                interval_cost += float(output['batch_cost'])
            tqdm.write("Epoch {epch}  complete. "
                       "Avg Train Cost {cost:0.4f}".format(epch=epoch,
                                                           cost=interval_cost /
                                                           step))
            interval_cost = 0.0
            tpbar.close()
            validation_loss = run_validation(valid_set, eval_computation)
            tqdm.write("Avg losses: {}".format(validation_loss))
            logs['train'].append(train_log)
            logs['validation'].append(validation_loss['cross_ent_loss'])
            logs['misclass'].append(validation_loss['misclass'])

            # Save log data and plot at the end of each epoch
            with open(log_file, 'wb') as f:
                pickle.dump(logs, f)
            plot_logs(logs=logs)
Exemplo n.º 19
0
inputs = train_set.make_placeholders(include_iteration=args.use_lr_decay)
ax.Y.length = args.num_classes

layers = make_layers(args.use_large, dbpedia_dataset.vocab_size)
seq = Sequential(layers)

if args.use_lr_decay:
    lr_schedule = [(i + 1) * 3 * train_set.nbatches for i in range(10)]
    lr_policy = {
        'name': 'schedule',
        'base_lr': args.lr,
        'schedule': lr_schedule,
        'gamma': 0.5
    }
    optimizer = GradientDescentMomentum(lr_policy,
                                        momentum_coef=args.momentum,
                                        iteration=inputs['iteration'],
                                        wdecay=args.weight_decay)
else:
    optimizer = GradientDescentMomentum(args.lr,
                                        momentum_coef=args.momentum,
                                        wdecay=args.weight_decay)

train_prob = seq(inputs['text'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():