Exemplo n.º 1
0
def build_seq2seq_computations():
    # Training loss, optimizer
    train_decoded = recurrent_model.encode_and_decode(encoder, decoder,
                                                      inputs['X'], previous)
    train_loss = ng.squared_L2(target - train_decoded)
    batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    # Evaluation loss
    with Layer.inference_mode_on():
        eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes)
        eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=())
    loss_computation = ng.computation([eval_loss], "all")
    return train_computation, loss_computation
Exemplo n.º 2
0
def build_regressor_computations():
    train_preds = predictions(encoder, affine_layer, inputs['X'])
    train_loss = ng.squared_L2(train_preds - inputs['y'])

    # Cost calculation
    batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        eval_preds = predictions(encoder, affine_layer, inputs['X'])
        eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=())
    loss_computation = ng.computation([eval_loss], "all")

    return train_computation, loss_computation
Exemplo n.º 3
0
def run_mini_ds2_benchmark(args, **kwargs):
    device_id = kwargs.get('device_id')

    inputs, train_set, eval_set = generate_ds2_data(args.max_length,
                                                    args.str_w, args.nout,
                                                    args.nbands,
                                                    args.batch_size,
                                                    args.num_iterations)

    model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width,
                             args.str_w, args.nbands, args.depth,
                             args.hidden_size, args.batch_norm,
                             args.hetr_device, device_id)

    if args.bprop:
        with ng.metadata(device=args.hetr_device,
                         device_id=device_id,
                         parallel=ax.N):
            loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]),
                          inputs["audio_length"], inputs["trans_length"])

            optimizer = GradientDescentMomentum(learning_rate=2e-5,
                                                momentum_coef=0.99,
                                                gradient_clip_norm=400,
                                                nesterov=args.nesterov)

            updates = optimizer(loss)
            mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])
            bprop_computation_op = ng.computation(mean_cost, "all")

        benchmark = Benchmark(bprop_computation_op, train_set, inputs,
                              args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark.time(args.num_iterations,
                           args.skip_iter,
                           'ds2_bprop',
                           args.visualize,
                           preprocess=True))
    else:
        fprop_computation_op = ng.computation(model_out, "all")

        benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs,
                                    args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark_fprop.time(args.num_iterations,
                                 args.skip_iter,
                                 'ds2_fprop',
                                 args.visualize,
                                 preprocess=True))
Exemplo n.º 4
0
def test_persistent_tensor():
    input_axes = ng.make_axes([
        ng.make_axis(10),
        ng.make_axis(3)
    ])
    bgr = ng.persistent_tensor(
        axes=input_axes,
        initial_value=np.array([113.9, 123.0, 125.3]))
    bgr_comp = ng.computation(bgr, "all")

    results = dict()
    weight_saver = Saver()
    with closing(ngt.make_transformer()) as transformer:
        bgr_func = transformer.add_computation(bgr_comp)
        weight_saver.setup_save(transformer=transformer, computation=bgr_comp)
        results['saved'] = bgr_func().copy()
        weight_saver.save(filename="test_persistent_tensor")
    with closing(ngt.make_transformer()) as restore_transformer:
        bgr_refunc = restore_transformer.add_computation(bgr_comp)
        weight_saver.setup_restore(transformer=restore_transformer, computation=bgr_comp,
                                   filename="test_persistent_tensor")
        weight_saver.restore()
        results['restored'] = bgr_refunc().copy()
    os.remove("test_persistent_tensor.npz")
    assert np.allclose(results['saved'], results['restored'], atol=0)
Exemplo n.º 5
0
def run_mini_ds2_benchmark(max_length,
                           nbands,
                           str_w,
                           batch_size,
                           max_iter,
                           skip_iter,
                           nfilters,
                           filter_width,
                           depth,
                           hidden_size,
                           batch_norm,
                           device_id,
                           device,
                           transformer,
                           visualize=False):
    inputs, train_set, eval_set = generate_ds2_data(max_length, str_w, nbands,
                                                    batch_size, max_iter)
    model_out = get_mini_ds2(inputs, nfilters, filter_width, str_w, nbands,
                             depth, hidden_size, batch_norm, device_id)

    fprop_computation_op = ng.computation(model_out, "all")

    benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs,
                                transformer, device)
    Benchmark.print_benchmark_results(
        benchmark_fprop.time(max_iter, skip_iter, 'ds2_fprop', visualize))
Exemplo n.º 6
0
def run_resnet_benchmark(dataset,
                         num_iterations,
                         n_skip,
                         batch_size,
                         device_id,
                         transformer_type,
                         device,
                         bprop=True,
                         batch_norm=False,
                         visualize=False,
                         stage_depth=1):
    inputs, data, train_set = get_fake_data(dataset, batch_size,
                                            num_iterations)

    # Running forward propagation
    model_out = get_mini_resnet(inputs,
                                dataset,
                                device,
                                device_id,
                                batch_norm=batch_norm,
                                stage_depth=stage_depth)

    # Running back propagation
    if bprop:
        with ng.metadata(device=device, device_id=device_id, parallel=ax.N):
            optimizer = GradientDescentMomentum(0.01, 0.9)
            train_loss = ng.cross_entropy_multi(
                model_out, ng.one_hot(inputs['label'], axis=ax.Y))

            batch_cost = ng.sequential(
                [optimizer(train_loss),
                 ng.mean(train_loss, out_axes=())])
            batch_cost_computation_op = ng.computation(batch_cost, "all")
        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop',
                           visualize, 'device_id'))
    else:
        fprop_computation_op = ng.computation(model_out, 'all')
        benchmark = Benchmark(fprop_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop',
                           visualize))
Exemplo n.º 7
0
    def __init__(self,
                 tuning_parameters,
                 name="",
                 global_network=None,
                 network_is_local=True):
        Architecture.__init__(self, tuning_parameters, name)
        assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
        self.clip_error = tuning_parameters.clip_gradients
        self.total_loss = None
        self.epoch = 0
        self.inputs = []
        self.outputs = []
        self.targets = []
        self.losses = []

        self.transformer = tuning_parameters.sess
        self.network = self.get_model(tuning_parameters)
        self.accumulated_gradients = []

        # training and inference ops
        train_output = ng.sequential(
            [self.optimizer(self.total_loss), self.total_loss])
        placeholders = self.inputs + self.targets
        self.train_op = self.transformer.add_computation(
            ng.computation(train_output, *placeholders))
        self.predict_op = self.transformer.add_computation(
            ng.computation(self.outputs, self.inputs[0]))

        # update weights from array op
        self.weights = [
            ng.placeholder(w.axes) for w in self.total_loss.variables()
        ]
        self.set_weights_ops = []
        for target_variable, variable in zip(self.total_loss.variables(),
                                             self.weights):
            self.set_weights_ops.append(
                self.transformer.add_computation(
                    ng.computation(ng.assign(target_variable, variable),
                                   variable)))

        # get weights op
        self.get_variables = self.transformer.add_computation(
            ng.computation(self.total_loss.variables()))
Exemplo n.º 8
0
def test_conv1d(transformer_factory, filter_width, num_filters, strides,
                padding, time_steps, feature_dimension, batch_size):

    dilation = 1  # reference conv does not support dilation

    F = ng.make_axis(name='F', length=feature_dimension)
    REC = ng.make_axis(name='REC', length=time_steps)
    N = ng.make_axis(name='N', length=batch_size)
    in_axes = ng.make_axes([F, REC, N])

    inputs = ng.placeholder(axes=in_axes)
    input_vals = np.random.randn(*in_axes.lengths)

    filter_init = GaussianInit()

    conv1d = Convolution((filter_width, num_filters),
                         filter_init,
                         strides=strides,
                         padding=padding,
                         dilation=dilation,
                         bias_init=None,
                         activation=Rectlin(),
                         batch_norm=None)

    result_op = conv1d(inputs, channel_axes='F', spatial_axes={'W': 'REC'})

    with closing(ngt.make_transformer()) as transformer:
        result_comp = transformer.add_computation(
            ng.computation(result_op, inputs))
        filter_vals = transformer.add_computation(ng.computation(
            conv1d.conv.W))()

        result_ng = result_comp(input_vals)
        result_np = np.squeeze(
            reference_conv1d(input_vals, filter_vals,
                             lambda x: np.maximum(0, x)))
        ng.testing.assert_allclose(result_ng, result_np)
Exemplo n.º 9
0
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id,
                         transformer_type, device, bprop=False, visualize=False):
    inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter)
    model_out = get_mini_resnet(inputs, dataset, device_id)

    # Running forward propagation
    fprop_computation_op = ng.computation(model_out, 'all')
    benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device)
    Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip,
                                                           dataset + '_msra_fprop', visualize))

    # Running back propagation
    if bprop:
        optimizer = GradientDescentMomentum(0.01, 0.9)
        train_loss = ng.cross_entropy_multi(model_out,
                                            ng.one_hot(inputs['label'], axis=ax.Y))

        batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
        batch_cost_computation_op = ng.computation(batch_cost, "all")

        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip,
                                          dataset + '_msra_bprop', visualize))
Exemplo n.º 10
0
def test_variable():
    input_axes = ng.make_axes([
        ng.make_axis(10),
        ng.make_axis(3)
    ])
    var = ng.variable(axes=input_axes)
    assign_val = np.random.rand(10, 3)
    var_assign = ng.AssignOp(tensor=var, val=assign_val)
    var_seq = ng.sequential([var_assign, var])
    var_comp = ng.computation(var_seq, "all")
    results = dict()
    weight_saver = Saver()
    with closing(ngt.make_transformer()) as transformer:
        var_func = transformer.add_computation(var_comp)
        weight_saver.setup_save(transformer=transformer, computation=var_comp)
        results['saved'] = var_func().copy()
        weight_saver.save(filename="test_variable")

    reassign_val = np.random.rand(10, 3)
    var_reassign = ng.AssignOp(tensor=var, val=reassign_val)

    var_recomp = ng.computation(var_reassign, "all")
    var_read = ng.computation(var, "all")
    with closing(ngt.make_transformer()) as restore_transformer:
        var_recompfunc = restore_transformer.add_computation(var_recomp)
        weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp,
                                   filename="test_variable")
        var_readfunc = restore_transformer.add_computation(var_read)
        var_recompfunc()
        results['reassigned'] = var_readfunc().copy()
        weight_saver.restore()
        results['restored'] = var_readfunc().copy()
    os.remove("test_variable.npz")
    assert np.allclose(results['saved'], assign_val, atol=0)
    assert np.allclose(results['reassigned'], reassign_val, atol=0)
    assert np.allclose(results['saved'], results['restored'], atol=0)
Exemplo n.º 11
0
def test_deconv():
    """
    basic test of deconv fprop.
    ngraph/tests/test_conv.py tests ng.deconvolution bprop
    """

    # filter params
    R, S = 5, 5
    fshape = (R, S, 1)
    strides = 2
    filter_val_nz = np.arange(1, R * S + 1).reshape(R, S)
    filter_val = np.zeros(fshape)
    filter_val[:, :, 0] = filter_val_nz

    deconv = Deconvolution(fshape,
                           filter_init=ConstantInit(filter_val),
                           strides=strides,
                           padding=0,
                           dilation=1)

    N = ng.make_axis(name='N', length=1)  # batch
    image_shape = (1, 8, 8)  # CHW
    image_axes = ng.make_axes(
        [ng.make_axis(name=nm, length=l) for nm, l in zip('CHW', image_shape)])
    image_axes |= N
    image = ng.placeholder(axes=image_axes)

    output = deconv(image)

    with closing(ngt.make_transformer()) as transformer:
        comp = transformer.add_computation(ng.computation(output, image))
        input_val = np.zeros(image_shape + (N.length, ), dtype=float)
        input_val[0, 0, 0] = 1
        input_val[0, 5, 5] = 1
        input_val[0, 7, 7] = 1
        result = comp(input_val)
        feature_map = np.squeeze(result)

        assert (feature_map[:5, :5] == filter_val_nz).all()

        result2 = filter_val_nz.copy()
        result2[-1, -1] = 26
        assert (feature_map[10:15, 10:15] == result2).all()

        result3 = filter_val_nz.copy()
        result3[0, 0] = 26
        assert (feature_map[-5:, -5:] == result3).all()
Exemplo n.º 12
0
def run_cifar_benchmark(n_iter=10,
                        n_skip=5,
                        batch_size=4,
                        transformer_type='cpu'):
    inputs, data, train_set = get_fake_cifar(batch_size, n_iter)
    model = get_mini_resnet(inputs)
    optimizer = GradientDescentMomentum(0.01, 0.9)

    train_loss = ng.cross_entropy_multi(model(inputs['image']),
                                        ng.one_hot(inputs['label'], axis=ax.Y))

    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    batch_cost_computation_op = ng.computation(batch_cost, "all")

    feed_dict = fill_feed_dict(train_set, inputs)
    benchmarks = dict()
    benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op,
                                                   transformer_type, feed_dict,
                                                   n_skip, n_iter)
    print_benchmark_results(benchmarks)
Exemplo n.º 13
0
        'schedule': [32000, 48000],
        'gamma': 0.1,
        'base_lr': 0.1
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.0001,
                                        iteration=inputs['iteration'])
    label_indices = inputs['label']
    train_loss = ng.cross_entropy_multi(resnet(inputs['image']),
                                        ng.one_hot(label_indices, axis=ax.Y))
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        inference_prob = resnet(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              label_indices)
        eval_loss = ng.cross_entropy_multi(
            inference_prob, ng.one_hot(label_indices, axis=ax.Y))
        eval_loss_names = ['cross_ent_loss', 'misclass']
        eval_computation = ng.computation([eval_loss, errors], "all")

    # Now bind the computations we are interested in
    transformer = ngt.make_transformer()
    train_function = transformer.add_computation(train_computation)
    eval_function = transformer.add_computation(eval_computation)
Exemplo n.º 14
0
def build_generator_computation():
    with Layer.inference_mode_on():
        generated = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes)
    return ng.computation([generated], "all")
Exemplo n.º 15
0
def build_regressor_prediction():
    with Layer.inference_mode_on():
        eval_preds = predictions(encoder, affine_layer, inputs['X'])
    return ng.computation([eval_preds], "all")
Exemplo n.º 16
0
    'name': 'schedule',
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}
optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

with closing(ngt.make_transformer()) as transformer:
    train_function = transformer.add_computation(train_computation)

    if args.no_progress_bar:
        ncols = 0
    else:
        ncols = 100

    tpbar = tqdm(unit="batches", ncols=ncols, total=args.num_iterations)
    interval_cost = 0.0

    for step, data in enumerate(train_set):
        data['iteration'] = step
        feed_dict = {inputs[k]: data[k] for k in inputs.keys()}
Exemplo n.º 17
0
train_loss_main = ng.cross_entropy_multi(train_prob_main,
                                         y_onehot,
                                         enable_softmax_opt=False)

train_prob_aux = inception.seq_aux(inception.seq1(inputs['image']))
train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name})
train_loss_aux = ng.cross_entropy_multi(train_prob_aux,
                                        y_onehot,
                                        enable_softmax_opt=False)

batch_cost = ng.sequential([
    optimizer(train_loss_main + 0.4 * train_loss_aux),
    ng.mean(train_loss_main, out_axes=())
])

train_computation = ng.computation([batch_cost], 'all')

# Build the computations for inference (evaluation)
with Layer.inference_mode_on():
    inference_prob = inception.seq2(inception.seq1(inputs['image']))
    slices = [
        0 if cx.name in ("H", "W") else slice(None)
        for cx in inference_prob.axes
    ]
    inference_prob = ng.tensor_slice(inference_prob, slices)
    inference_prob = ng.map_roles(inference_prob, {"C": "Y"})
    errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                          inputs['label'])
    eval_loss = ng.cross_entropy_multi(inference_prob,
                                       y_onehot,
                                       enable_softmax_opt=False)
Exemplo n.º 18
0
    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=momentum_coef,
                                        wdecay=wdecay,
                                        nesterov=False,
                                        iteration=input_ops_train['iteration'])
    # Make a prediction
    prediction = resnet(input_ops_train['image'])
    # Calculate loss
    train_loss = ng.cross_entropy_multi(
        prediction, ng.one_hot(input_ops_train['label'], axis=ax.Y))
    # Average loss over the batch
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

# Instantiate the Saver object to save weights
weight_saver = Saver()

with ng.metadata(device=device_hetr, device_id=device_id, parallel=ax.N):
    # Inference
    with Layer.inference_mode_on():
        # Doing inference
        inference_prob = resnet(input_ops_valid['image'])
        eval_loss = ng.cross_entropy_multi(
            inference_prob, ng.one_hot(input_ops_valid['label'], axis=ax.Y))
        # Computation for inference
        eval_computation = ng.computation(
            [inference_prob, eval_loss, input_ops_valid['label']], "all")
Exemplo n.º 19
0
    optimizer = GradientDescentMomentum(
        args.lr,
        momentum_coef=args.momentum,
        gradient_clip_norm=args.gradient_clip_norm,
        nesterov=args.nesterov)

    start = time.time()
    updates = optimizer(loss)
    stop = time.time()
    logger.debug("Optimizer graph creation took {} seconds".format(stop -
                                                                   start))
    mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])

    # Create computation and initialize the transformer to allocate weights
    train_computation = ng.computation([mean_cost, output], "all")
    if inference is True:
        with Layer.inference_mode_on():
            eval_output = ds2(inputs["audio"],
                              spatial_axes={
                                  "H": "frequency",
                                  "W": "time"
                              })
        eval_computation = ng.computation(eval_output, "all")

    # Now bind the computations we are interested in
    with closing(ngt.make_transformer()) as transformer:
        train_function = transformer.add_computation(train_computation)
        if inference is True:
            eval_function = transformer.add_computation(eval_computation)
Exemplo n.º 20
0
# inference graph
with Layer.inference_mode_on():
    enc_out_inference = enc(one_hot_enc_out)

    # Create decoder placeholders
    axes = one_hot_dec_out.axes
    axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC")
    decoder_input_inference = ng.placeholder(axes, name="input")
    decoder_state_inference = ng.placeholder(enc_out_inference.axes,
                                             name="state")
    dec_out_inference = dec(decoder_input_inference,
                            init_state=decoder_state_inference)
    inference_out = linear(dec_out_inference)

encoder_computation = ng.computation(enc_out_inference, inputs["inp_txt"])
decoder_computation = ng.computation([inference_out, dec_out_inference],
                                     decoder_input_inference,
                                     decoder_state_inference)

######################
# Train Loop

# Now bind the computations we are interested in
with closing(ngt.make_transformer()) as transformer:
    # training computations
    train_computation = make_bound_computation(transformer, train_outputs,
                                               inputs)
    loss_computation = make_bound_computation(transformer, loss_outputs,
                                              inputs)
    optimizer = Adam(learning_rate=args.lr,
                     gradient_clip_value=args.grad_clip_value)
else:
    optimizer = GradientDescentMomentum(
        learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
fwd_prop = model(inputs['X'])
fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])

with Layer.inference_mode_on():
    preds = model(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
eval_computation = ng.computation([eval_loss], "all")
predict_computation = ng.computation([preds], "all")

# Cost calculation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

trainer = TimeseriesTrainer(optimizer,
                            train_computation,
                            eval_computation,
                            predict_computation,
                            inputs,
                            model_graph=[model],
                            tensorboard_dir="./tfboard")
Exemplo n.º 22
0
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}

optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, 'all')

with closing(ngt.make_transformer()) as transformer:
    train_function = transformer.add_computation(train_computation)

    if args.no_progress_bar:
        ncols = 0
    else:
        ncols = 100

    tpbar = tqdm(unit="batches", ncols=ncols, total=args.num_iterations)
    interval_cost = 0.0

    for step, data in enumerate(train_set):
        data['iteration'] = step
        feed_dict = {inputs[k]: data[k] for k in inputs.keys()}
Exemplo n.º 23
0
    def __init__(self,
                 state_axes,
                 action_size,
                 batch_size,
                 model,
                 learning_rate=0.0001):
        """
        for now, model must be a function which takes action_axes, and
        returns a neon container
        """
        super(ModelWrapper, self).__init__()

        self.axes = Namespace()
        self.axes.state = make_axes(state_axes, name='state')
        self.axes.action = ng.make_axis(name='action', length=action_size)
        self.axes.n = ng.make_axis(name='N', length=batch_size)
        self.axes.n1 = ng.make_axis(name='N', length=1)

        # placeholders
        self.state = ng.placeholder(self.axes.state + [self.axes.n])
        self.state_single = ng.placeholder(self.axes.state + [self.axes.n1])
        self.target = ng.placeholder([self.axes.action, self.axes.n])

        # these q functions have the same structure but different variables
        self.q_function = model(self.axes.action)
        self.q_function_target = model(self.axes.action)

        # construct inference computation
        with neon.Layer.inference_mode_on():
            inference = self.q_function(self.state)
        inference_computation = ng.computation(inference, self.state)

        # construct inference target computation
        with neon.Layer.inference_mode_on():
            inference_target = self.q_function_target(self.state)
        inference_target_computation = ng.computation(inference_target,
                                                      self.state)

        # construct inference computation for evaluating a single observation
        with neon.Layer.inference_mode_on():
            inference_single = self.q_function(self.state_single)
        inference_computation_single = ng.computation(inference_single,
                                                      self.state_single)

        # update q function target weights with values from q function
        # assumes that the variables in each are in the same order
        update_computation = ng.computation(
            ng.doall([
                ng.assign(target_variable,
                          ng.cast_axes(variable, target_variable.axes))
                for target_variable, variable in zip(
                    self.q_function_target.variables.values(),
                    self.q_function.variables.values())
            ]))

        # construct training computation
        loss = ng.squared_L2(self.q_function(self.state) - self.target)

        optimizer = neon.RMSProp(
            learning_rate=learning_rate,
            gradient_clip_value=1,
        )

        train_output = ng.sequential([
            optimizer(loss),
            loss,
        ])

        train_computation = ng.computation(train_output, self.state,
                                           self.target)

        # now bind computations we are interested in
        self.transformer = ng.transformers.make_transformer()
        self.inference_function = self.transformer.add_computation(
            inference_computation)
        self.inference_target_function = self.transformer.add_computation(
            inference_target_computation)
        self.inference_function_single = self.transformer.add_computation(
            inference_computation_single)
        self.train_function = self.transformer.add_computation(
            train_computation)
        self.update_function = self.transformer.add_computation(
            update_computation)

        # run a single update to ensure that both q functions have the same
        # initial weights
        self.update()
Exemplo n.º 24
0
# Cost calculation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

# Forward prop of test set
# Required for correct functioning of batch norm and dropout layers during inference mode
with Layer.inference_mode_on():
    inference_prob = seq1(inputs['X'])
eval_loss = ng.squared_L2(inference_prob - inputs['y'])
eval_outputs = dict(l2_loss=eval_loss)

# Define computations
print('Start training')
eval_computation = ng.computation(inference_prob, "all")
with closing(ngt.make_transformer()) as transformer:
    # transformer = ngt.make_transformer()
    train_computation = make_bound_computation(transformer, train_outputs,
                                               inputs)
    loss_computation = make_bound_computation(transformer, eval_outputs,
                                              inputs)
    eval_function = transformer.add_computation(eval_computation)

    # Printout interval of the validation set loss during training
    iter_interval = num_iterations // 10

    cbs = make_default_callbacks(transformer=transformer,
                                 output_file=args.output_file,
                                 frequency=iter_interval,
                                 train_computation=train_computation,
Exemplo n.º 25
0
    'base_lr': 0.01
}
optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                    iteration=inputs['iteration'])
# Define the loss function (Cross entropy loss)
# Note that we convert the integer values of input['y'] to one hot here
fwd_prop = seq1(inputs['X'])
train_loss = ng.cross_entropy_multi(fwd_prop,
                                    ng.one_hot(inputs['y'], axis=out_axis),
                                    usebits=True)

# Train cost computation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation([batch_cost, fwd_prop], "all")
train_outputs = dict(batch_cost=batch_cost)

# Forward prop of evaluation set
# Required for correct functioning of batch norm and dropout layers during inference mode
with Layer.inference_mode_on():
    inference_prop = seq1(inputs['X'])
eval_loss = ng.cross_entropy_multi(inference_prop,
                                   ng.one_hot(inputs['y'], axis=out_axis),
                                   usebits=True)
eval_computation = ng.computation(
    [ng.mean(eval_loss, out_axes=()), inference_prop], "all")
eval_outputs = dict(x_ent_loss=eval_loss)

# Computation for text generation - this is pure inference (fwd prop)
gen_computation = ng.computation(inference_prop, "all")