示例#1
0
def test_logreg(transformer_factory):
    # xs: (C, N), y: (N,)
    xs = np.array([[0.52, 0.88, 0.52, 0.74], [1.12, -1.08, 0.06, -2.49],
                   [0.77, 0.15, -1.3, 1.39]])
    ys = np.array([1, 1, 0, 1])
    max_iter = 10
    alpha = 0.1
    thetas = np.array([0., 0., 0.])

    np_logreg = NumpyLogreg(xs, ys, thetas)

    C, N = ng.make_axis(length=3), ng.make_axis(length=4)

    # input tensors
    xs_v = ng.placeholder((C, N))
    ys_v = ng.placeholder([N])
    alpha_v = ng.placeholder(())
    thetas_var = ng.variable([C], initial_value=thetas)

    # define ops
    ys_pred = ng.sigmoid(ng.dot(thetas_var, xs_v))
    log_likelihoods = ng.log(ys_pred) * ys_v + ng.log(1 - ys_pred) * (1 - ys_v)
    loss = -ng.sum(log_likelihoods, reduction_axes=[N])
    grad_comp = ng.deriv(loss, thetas_var)
    weight_update = ng.sequential(
        [ng.assign(thetas_var, thetas_var - alpha_v * grad_comp), thetas_var])

    # transformer
    with ExecutorFactory() as ex:
        train_eval_func = ex.executor([grad_comp, loss, weight_update], xs_v,
                                      ys_v, alpha_v)

        # evaluate
        for i in range(max_iter):
            grad_np, loss_np, thetas_np = np_logreg.optimize(alpha)
            grad_ng, loss_ng, thetas_ng = train_eval_func(xs, ys, alpha)
            ng.testing.assert_allclose(loss_np, loss_ng)
            ng.testing.assert_allclose(grad_np, grad_ng)
            ng.testing.assert_allclose(thetas_np, thetas_ng)
示例#2
0
    def __call__(self, cost_func, variable_scope=None):
        self._pre_call_hook()
        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        batch_size = cost_func.axes.batch_axis().length

        selected_variables = batch_cost.variables()
        if variable_scope is not None:
            selected_variables = [
                op for op in selected_variables if op.scope == variable_scope
            ]
        grads = [
            ng.deriv(batch_cost, v) / batch_size for v in selected_variables
        ]
        scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm)

        for variable, grad in zip(selected_variables, grads):
            updates = self.variable_update(variable, grad, scale_factor)
            all_updates.append(updates)
        updates = ng.doall(all_updates)
        grads = ng.doall(grads)
        return ng.sequential([grads, updates, 0])
示例#3
0
def run_cifar_benchmark(n_iter=10,
                        n_skip=5,
                        batch_size=4,
                        transformer_type='cpu'):
    inputs, data, train_set = get_fake_cifar(batch_size, n_iter)
    model = get_mini_resnet(inputs)
    optimizer = GradientDescentMomentum(0.01, 0.9)

    train_loss = ng.cross_entropy_multi(model(inputs['image']),
                                        ng.one_hot(inputs['label'], axis=ax.Y))

    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    batch_cost_computation_op = ng.computation(batch_cost, "all")

    feed_dict = fill_feed_dict(train_set, inputs)
    benchmarks = dict()
    benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op,
                                                   transformer_type, feed_dict,
                                                   n_skip, n_iter)
    print_benchmark_results(benchmarks)
示例#4
0
def run_resnet_benchmark(dataset,
                         num_iterations,
                         n_skip,
                         batch_size,
                         device_id,
                         transformer_type,
                         device,
                         bprop=True,
                         visualize=False):
    inputs, data, train_set = get_fake_data(dataset, batch_size,
                                            num_iterations)

    # Running forward propagation
    model_out = get_mini_resnet(inputs, dataset, device_id)

    # Running back propagation
    if bprop:
        with ng.metadata(device_id=device_id, parallel=ax.N):
            optimizer = GradientDescentMomentum(0.01, 0.9)
            train_loss = ng.cross_entropy_multi(
                model_out, ng.one_hot(inputs['label'], axis=ax.Y))

            batch_cost = ng.sequential(
                [optimizer(train_loss),
                 ng.mean(train_loss, out_axes=())])
            batch_cost_computation_op = ng.computation(batch_cost, "all")
        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop',
                           visualize, 'device_id'))
    else:
        fprop_computation_op = ng.computation(model_out, 'all')
        benchmark = Benchmark(fprop_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop',
                           visualize))
def test_weight_clipping(w_clip, optimizer):
    opt_ng = optimizer(0.1, weight_clip_value=w_clip)
    if isinstance(opt_ng, Adam):
        pytest.config.argon_skip_now("Argon Transformer error")  # TODO triage

    # Set up data placeholders
    C = ng.make_axis(20)
    N = ng.make_axis(32, name='N')

    data = ng.placeholder([C, N])
    target = ng.placeholder([N])

    # params to be updated using optimizer to be tested
    # make sure initial values are higher than clip values
    np_W = 10 * w_clip * (2 * np.random.rand(C.length) - 1)
    W = ng.variable([C], initial_value=np_W)

    # double check generated initial W value
    assert np.max(np_W) > w_clip
    assert np.min(np_W) < -w_clip

    # Set up op graph
    cost = ng.sum(target - ng.dot(W, data), out_axis=())

    updated_weights = ng.sequential([opt_ng(cost), W])

    epsilon = w_clip * 1e-3
    # Set up the computation and run the "train" loop
    with ExecutorFactory() as ex:
        opt_ng_comp = ex.transformer.computation(updated_weights, data, target)
        mock_dataset = data_generator(20, C.length, N.length)

        for x, y in mock_dataset:
            ng_W = opt_ng_comp(x, y)  # updated weights for ngraph optimizer

            assert np.max(ng_W) < w_clip + epsilon
            assert np.min(ng_W) > -w_clip - epsilon
示例#6
0
def test_setting(M):
    with ExecutorFactory() as ex:
        axes = ng.make_axes([M])

        np_x = np.array([1, 2, 3], dtype=np.float32)
        np_y = np.array([1, 3, 5], dtype=np.float32)

        y = ng.constant(np_y, axes)

        v = ng.variable(axes, initial_value=np_x)

        f_v = ex.executor(v)

        vset = ng.sequential([ng.assign(v, v + y), v])
        f_v1 = ex.executor(vset)

        f_v2 = ex.executor(v)

        e_v = f_v().copy()
        assert ng.testing.allclose(e_v, np_x)
        e_v1 = f_v1().copy()
        assert ng.testing.allclose(e_v1, np_x + np_y)
        e_v2 = f_v2().copy()
        assert ng.testing.allclose(e_v2, np_x + np_y)
示例#7
0
def test_variable():
    input_axes = ng.make_axes([
        ng.make_axis(10),
        ng.make_axis(3)
    ])
    var = ng.variable(axes=input_axes)
    assign_val = np.random.rand(10, 3)
    var_assign = ng.AssignOp(tensor=var, val=assign_val)
    var_seq = ng.sequential([var_assign, var])
    var_comp = ng.computation(var_seq, "all")
    results = dict()
    weight_saver = Saver()
    with closing(ngt.make_transformer()) as transformer:
        var_func = transformer.add_computation(var_comp)
        weight_saver.setup_save(transformer=transformer, computation=var_comp)
        results['saved'] = var_func().copy()
        weight_saver.save(filename="test_variable")

    reassign_val = np.random.rand(10, 3)
    var_reassign = ng.AssignOp(tensor=var, val=reassign_val)

    var_recomp = ng.computation(var_reassign, "all")
    var_read = ng.computation(var, "all")
    with closing(ngt.make_transformer()) as restore_transformer:
        var_recompfunc = restore_transformer.add_computation(var_recomp)
        weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp,
                                   filename="test_variable")
        var_readfunc = restore_transformer.add_computation(var_read)
        var_recompfunc()
        results['reassigned'] = var_readfunc().copy()
        weight_saver.restore()
        results['restored'] = var_readfunc().copy()
    os.remove("test_variable.npz")
    assert np.allclose(results['saved'], assign_val, atol=0)
    assert np.allclose(results['reassigned'], reassign_val, atol=0)
    assert np.allclose(results['saved'], results['restored'], atol=0)
示例#8
0
文件: train.py 项目: rsumner31/ngraph
inputs = make_placeholders(args.batch_size, cs_loader)

model = WideDeepClassifier(cs_loader.parameters['dimensions_embeddings'],
                           cs_loader.parameters['tokens_in_embeddings'],
                           fc_layers_deep,
                           deep_activation_fn=Rectlin())

wide_deep = model(args.batch_size, inputs)

loss = ng.cross_entropy_binary(wide_deep, inputs['Y'])

optimizer = Adagrad(args.learning_rate)

# recall that optimizer does not generate output

batch_cost = ng.sequential([optimizer(loss), ng.sum(loss, out_axes=())])


def compute_accuracy(data):
    accuracy = 0.0
    total = 0.0

    for value in data.values():

        x_d = value[0]
        x_w = value[1]
        x_e = value[2]
        y = value[3]

        wide_features = x_w
        deep_features = x_d
示例#9
0
    def __call__(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (tuple of Tensor): object that provides initial state, and in LSTM,
                                          it includes hidden state, and cell states

        Returns:
            rnn_out (Tensor): output

        """
        # try to understand the axes from the input
        if init_state is not None:
            assert len(init_state) == 2 and init_state[0].axes == init_state[1].axes
            self.interpret_axes(in_obj, init_state[0])
        else:
            self.interpret_axes(in_obj, init_state)

        # initialize the hidden states
        if init_state is not None:
            self.h_init = init_state[0]
            self.c_init = init_state[1]
        else:
            if self.reset_cells:
                self.h_init = ng.temporary(initial_value=0,
                                           axes=self.out_axes).named('h_init')
                self.c_init = ng.temporary(initial_value=0,
                                           axes=self.out_axes).named('c_init')
            else:
                self.h_init = ng.variable(initial_value=0,
                                          axes=self.out_axes).named('h_init')
                self.c_init = ng.variable(initial_value=0,
                                          axes=self.out_axes).named('c_init')

        # params are dictionary for i, f, o, g
        self.W_input = {k: ng.variable(axes=self.w_in_axes,
                                       initial_value=self.init,
                                       scope=self.scope).
                        named("W_in_{}".format(k)) for k in self.metadata['gates']}

        self.W_recur = {k: ng.variable(axes=self.w_re_axes,
                                       initial_value=self.init_inner,
                                       scope=self.scope).
                        named("W_re_{}".format(k)) for k in self.metadata['gates']}

        self.b = {k: ng.variable(axes=self.out_feature_axes,
                                 initial_value=0,
                                 scope=self.scope).
                  named("bias_{}".format(k)) for k in self.metadata['gates']}

        h = self.h_init
        c = self.c_init

        h_list = []
        c_list = []

        # Compute feed forward weighted inputs
        # Batch norm is computed only on the weighted inputs
        # as in https://arxiv.org/abs/1510.01378
        h_ff = dict()
        for k in self.metadata["gates"]:
            h_ff[k] = ng.dot(self.W_input[k], in_obj)
            if self.batch_norm is not None:
                h_ff[k] = self.batch_norm[k](h_ff[k])

            # slice the weighted inputs into time slices
        h_ff = get_steps(h_ff, self.recurrent_axis, self.backward)

        # recurrent computation
        for i in range(self.recurrent_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                [h, c] = self._step(h_ff[i], [h, c])
                h_list.append(h)
                c_list.append(c)

        if self.return_sequence is True:
            if self.backward:
                h_list = h_list[::-1]
                c_list = c_list[::-1]
            lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx)
        else:
            lstm_out = h_list[-1]

        if self.reset_cells is True:
            return lstm_out
        else:
            return ng.sequential([
                ng.doall([
                    ng.assign(self.h_init, h_list[-1]),
                    ng.assign(self.c_init, c_list[-1])
                ]),
                lstm_out
            ])
示例#10
0
def unroll_with_attention(cell,
                          num_steps,
                          H_pr,
                          H_hy,
                          init_states=None,
                          reset_cells=True,
                          return_sequence=True,
                          reverse_mode=False,
                          input_data=None):
    """
    Unroll the cell with attention for num_steps steps.

    Arguments:
    ----------
    cell : provide the cell that has to be unrolled (Eg: MatchLSTMCell_withAttention)
    num_steps: the number of steps needed to unroll
    H_pr : the encoding for the question
    H_hy : the encoding for the passage
    init_states: Either None or a dictionary containing states
    reset_cell: argument which determine if cell has to be reset or not
    reverse_mode: Set to True if unrolling in the opposite direction is desired
    input_data: the ArrayIterator object for training data
                (contains information of length of each sentence)

    """
    recurrent_axis = H_hy.axes.recurrent_axis()

    if init_states is not None:
        states = {
            k: ng.cast_role(v, out_axes)
            for (k, v) in init_states.items()
        }
    else:
        states = init_states

    stepped_inputs = get_steps(H_hy, recurrent_axis, backward=reverse_mode)
    stepped_outputs = []

    for t in range(num_steps):
        with ng.metadata(step=str(t)):
            if t == 0:
                output, states = cell(H_pr,
                                      stepped_inputs[t],
                                      states,
                                      output=None,
                                      input_data=input_data)
            else:
                output, states = cell(H_pr,
                                      stepped_inputs[t],
                                      states,
                                      output=output,
                                      input_data=input_data)

            stepped_outputs.append(output)

    if reverse_mode:
        if return_sequence:
            stepped_outputs.reverse()

    if return_sequence:
        outputs = ng.stack(stepped_outputs, recurrent_axis, pos=1)
    else:
        outputs = stepped_outputs[-1]

    if not reset_cells:
        update_inits = ng.doall([
            ng.assign(initial, states[name])
            for (name, initial) in states.items()
        ])
        outputs = ng.sequential([update_inits, outputs])

    return outputs
示例#11
0
# Build the main and auxiliary loss functions
y_onehot = ng.one_hot(inputs['label'], axis=ax.Y)
train_prob_main = inception.seq2(inception.seq1(inputs['image']))
train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name})
train_loss_main = ng.cross_entropy_multi(train_prob_main,
                                         y_onehot,
                                         enable_softmax_opt=False)

train_prob_aux = inception.seq_aux(inception.seq1(inputs['image']))
train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name})
train_loss_aux = ng.cross_entropy_multi(train_prob_aux,
                                        y_onehot,
                                        enable_softmax_opt=False)

batch_cost = ng.sequential([
    optimizer(train_loss_main + 0.4 * train_loss_aux),
    ng.mean(train_loss_main, out_axes=())
])

train_computation = ng.computation([batch_cost], 'all')

# Build the computations for inference (evaluation)
with Layer.inference_mode_on():
    inference_prob = inception.seq2(inception.seq1(inputs['image']))
    slices = [
        0 if cx.name in ("H", "W") else slice(None)
        for cx in inference_prob.axes
    ]
    inference_prob = ng.tensor_slice(inference_prob, slices)
    inference_prob = ng.map_roles(inference_prob, {"C": "Y"})
    errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                          inputs['label'])
示例#12
0
def train_network(model, train_set, valid_set, batch_size, epochs, log_file):
    '''
    Trains the predefined network. Trains the model and saves the progress in
    the log file that is defined in the arguments

    model(object): Defines the model in Neon
    train_set(object): Defines the training set
    valid_set(object): Defines the validation set
    args(object): Training arguments
    batch_size(int): Minibatch size
    epochs(int): Number of training epoch
    log_file(string): File name to store trainig logs for plotting

    '''

    # Form placeholders for inputs to the network
    # Iterations needed for learning rate schedule
    inputs = train_set.make_placeholders(include_iteration=True)

    # Convert labels into one-hot vectors
    one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y)

    learning_rate_policy = {
        'name': 'schedule',
        'schedule': list(np.arange(2, epochs, 2)),
        'gamma': 0.6,
        'base_lr': 0.001
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.005,
                                        iteration=inputs['iteration'])

    # Define graph for training
    train_prob = model(inputs['video'])
    train_loss = ng.cross_entropy_multi(train_prob, one_hot_label)
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])

    with closing(ngt.make_transformer()) as transformer:

        # Define graph for calculating validation set error and misclassification rate
        # Use inference mode for validation to avoid dropout in forward pass
        with Layer.inference_mode_on():
            inference_prob = model(inputs['video'])
            errors = ng.not_equal(ng.argmax(inference_prob), inputs['label'])
            eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label)
            eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors}

            eval_computation = make_bound_computation(transformer,
                                                      eval_outputs, inputs)

        train_outputs = {'batch_cost': batch_cost}
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)

        interval_cost = 0.0

        # Train in epochs
        logs = {'train': [], 'validation': [], 'misclass': []}
        for epoch in trange(epochs, desc='Epochs'):

            # Setup the training bar
            numBatches = train_set.ndata // batch_size
            tpbar = tqdm(unit='batches',
                         ncols=100,
                         total=numBatches,
                         leave=False)

            train_set.reset()
            valid_set.reset()

            train_log = []
            for step, data in enumerate(train_set):
                data = dict(data)
                data['iteration'] = epoch  # learning schedule based on epochs
                output = train_computation(data)
                train_log.append(float(output['batch_cost']))

                tpbar.update(1)
                tpbar.set_description("Training {:0.4f}".format(
                    float(output['batch_cost'])))
                interval_cost += float(output['batch_cost'])
            tqdm.write("Epoch {epch}  complete. "
                       "Avg Train Cost {cost:0.4f}".format(epch=epoch,
                                                           cost=interval_cost /
                                                           step))
            interval_cost = 0.0
            tpbar.close()
            validation_loss = run_validation(valid_set, eval_computation)
            tqdm.write("Avg losses: {}".format(validation_loss))
            logs['train'].append(train_log)
            logs['validation'].append(validation_loss['cross_ent_loss'])
            logs['misclass'].append(validation_loss['misclass'])

            # Save log data and plot at the end of each epoch
            with open(log_file, 'wb') as f:
                pickle.dump(logs, f)
            plot_logs(logs=logs)
示例#13
0
    learning_rate_policy = {
        'name': 'schedule',
        'schedule': [32000, 48000],
        'gamma': 0.1,
        'base_lr': 0.1
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.0001,
                                        iteration=inputs['iteration'])
    label_indices = inputs['label']
    train_loss = ng.cross_entropy_multi(resnet(inputs['image']),
                                        ng.one_hot(label_indices, axis=ax.Y))
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        inference_prob = resnet(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              label_indices)
        eval_loss = ng.cross_entropy_multi(
            inference_prob, ng.one_hot(label_indices, axis=ax.Y))
        eval_loss_names = ['cross_ent_loss', 'misclass']
        eval_computation = ng.computation([eval_loss, errors], "all")

    # Now bind the computations we are interested in
    transformer = ngt.make_transformer()
    train_function = transformer.add_computation(train_computation)
示例#14
0
                  wikimovies.story_length, wikimovies.memory_size,
                  wikimovies.vocab_size, vocab_axis, args.use_v_luts)
# Compute answer predictions
a_pred, _ = memn2n(inputs)

loss = ng.cross_entropy_multi(a_pred,
                              ng.one_hot(inputs['answer'], axis=vocab_axis),
                              usebits=True)

mean_cost = ng.sum(loss, out_axes=[])

optimizer = Adam(learning_rate=args.lr)

updates = optimizer(loss)

batch_cost = ng.sequential([updates, mean_cost])

# provide outputs for bound computation
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)

with Layer.inference_mode_on():
    a_pred_inference, _ = memn2n(inputs)
    eval_loss = ng.cross_entropy_multi(a_pred_inference,
                                       ng.one_hot(inputs['answer'],
                                                  axis=vocab_axis),
                                       usebits=True)

eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference)

if args.interactive:
    interactive_outputs = dict(test_preds=a_pred_inference)
示例#15
0
    eps=args.eps,
    init=GaussianInit(
        mean=0.0,
        std=0.1))

# Compute answer predictions
a_pred, attention = memn2n(inputs)

# specify loss function, calculate loss and update weights
loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True)

mean_cost = ng.sum(loss, out_axes=[])
optimizer = Adam(learning_rate=args.lr)
updates = optimizer(loss)

batch_cost = ng.sequential([updates, mean_cost])

# provide outputs for bound computation
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)

with Layer.inference_mode_on():
    a_pred_inference, attention_inference = memn2n(inputs)
    eval_loss = ng.cross_entropy_multi(
        a_pred_inference, inputs['answer'], usebits=True)

interactive_outputs = dict(
    test_preds=a_pred_inference,
    attention=attention_inference)
eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference)

# Train Loop
示例#16
0
    def __init__(self,
                 state_axes,
                 action_size,
                 batch_size,
                 model,
                 learning_rate=0.0001):
        """
        for now, model must be a function which takes action_axes, and
        returns a neon container
        """
        super(ModelWrapper, self).__init__()

        self.axes = Namespace()
        self.axes.state = make_axes(state_axes, name='state')
        self.axes.action = ng.make_axis(name='action', length=action_size)
        self.axes.n = ng.make_axis(name='N', length=batch_size)
        self.axes.n1 = ng.make_axis(name='N', length=1)

        # placeholders
        self.state = ng.placeholder(self.axes.state + [self.axes.n])
        self.state_single = ng.placeholder(self.axes.state + [self.axes.n1])
        self.target = ng.placeholder([self.axes.action, self.axes.n])

        # these q functions have the same structure but different variables
        self.q_function = model(self.axes.action)
        self.q_function_target = model(self.axes.action)

        # construct inference computation
        with neon.Layer.inference_mode_on():
            inference = self.q_function(self.state)
        inference_computation = ng.computation(inference, self.state)

        # construct inference target computation
        with neon.Layer.inference_mode_on():
            inference_target = self.q_function_target(self.state)
        inference_target_computation = ng.computation(inference_target,
                                                      self.state)

        # construct inference computation for evaluating a single observation
        with neon.Layer.inference_mode_on():
            inference_single = self.q_function(self.state_single)
        inference_computation_single = ng.computation(inference_single,
                                                      self.state_single)

        # update q function target weights with values from q function
        # assumes that the variables in each are in the same order
        update_computation = ng.computation(
            ng.doall([
                ng.assign(target_variable,
                          ng.cast_axes(variable, target_variable.axes))
                for target_variable, variable in zip(
                    self.q_function_target.variables.values(),
                    self.q_function.variables.values())
            ]))

        # construct training computation
        loss = ng.squared_L2(self.q_function(self.state) - self.target)

        optimizer = neon.RMSProp(
            learning_rate=learning_rate,
            gradient_clip_value=1,
        )

        train_output = ng.sequential([
            optimizer(loss),
            loss,
        ])

        train_computation = ng.computation(train_output, self.state,
                                           self.target)

        # now bind computations we are interested in
        self.transformer = ng.transformers.make_transformer()
        self.inference_function = self.transformer.add_computation(
            inference_computation)
        self.inference_target_function = self.transformer.add_computation(
            inference_target_computation)
        self.inference_function_single = self.transformer.add_computation(
            inference_computation_single)
        self.train_function = self.transformer.add_computation(
            train_computation)
        self.update_function = self.transformer.add_computation(
            update_computation)

        # run a single update to ensure that both q functions have the same
        # initial weights
        self.update()
示例#17
0
文件: layer.py 项目: kkasravi/ngraph
    def train_outputs(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (tuple of Tensor): object that provides initial state, and in LSTM,
                                          it includes hidden state, and cell states

        Returns:
            rnn_out (Tensor): output

        """
        # try to understand the axes from the input
        if init_state is not None:
            assert len(
                init_state) == 2 and init_state[0].axes == init_state[1].axes
            self.interpret_axes(in_obj, init_state[0])
        else:
            self.interpret_axes(in_obj, init_state)

        # initialize the hidden states
        if init_state is not None:
            self.h_init = init_state[0]
            self.c_init = init_state[1]
        else:
            if self.reset_cells:
                self.h_init = ng.temporary(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('h_init')
                self.c_init = ng.temporary(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('c_init')
            else:
                self.h_init = ng.variable(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('h_init')
                self.c_init = ng.variable(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('c_init')

        # params are dictionary for i, f, o, g
        self.W_input = {
            k: ng.variable(axes=self.w_in_axes,
                           initial_value=self.init).named("W_in_{}".format(k))
            for k in self.metadata['gates']
        }

        self.W_recur = {
            k: ng.variable(axes=self.w_re_axes,
                           initial_value=self.init_inner).named(
                               "W_re_{}".format(k))
            for k in self.metadata['gates']
        }

        self.b = {
            k: ng.variable(axes=self.hidden_axes,
                           initial_value=0).named("bias_{}".format(k))
            for k in self.metadata['gates']
        }

        h = self.h_init
        c = self.c_init

        h_list = []
        c_list = []

        # feedforward computation
        in_s = get_steps(in_obj, self.recurrent_axis, self.backward)

        # recurrent computation
        for i in range(self.recurrent_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                [h, c] = self._step(in_s[i], [h, c])
                h_list.append(h)
                c_list.append(c)

        if self.return_sequence is True:
            if self.backward:
                h_list = h_list[::-1]
                c_list = c_list[::-1]
            lstm_out = ng.stack(h_list,
                                self.recurrent_axis,
                                pos=self.recurrent_axis_idx)
        else:
            lstm_out = h_list[-1]

        if self.reset_cells is True:
            return lstm_out
        else:
            return ng.sequential([
                ng.doall([
                    ng.assign(self.h_init, h_list[-1]),
                    ng.assign(self.c_init, c_list[-1])
                ]), lstm_out
            ])
示例#18
0
def assign_ops(ops, values):
    assign_ops = [ng.AssignOp(op, value) for op, value in zip(ops, values)]
    return ng.sequential(assign_ops)
示例#19
0
loss1 = ng.cross_entropy_multi(logits1,
                               ng.one_hot(label1, axis=ax.Y), usebits=False)

loss2 = ng.cross_entropy_multi(logits2,
                               ng.one_hot(label2, axis=ax.Y), usebits=False)

# Total Loss
train_loss = loss1 + loss2

# Set optimizer (no learning rate scheduler used)
optimizer = Adam(learning_rate=2e-3)


print('compiling the graph')
# Cost set up
batch_cost = ng.sequential(
    [optimizer(train_loss), ng.mean(train_loss, out_axes=())])

# Predicted class is the max probability out of the 2=3
# Required Outputs- Batch Cost, Train Probability,misclass train
train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'],
                     logits=ng.stack(logits_concat, span, 1),
                     labels=inputs['answer'], drop=dropout_val)

# Inference Mode for validation dataset:
with Layer.inference_mode_on():
    eval_outputs = dict(logits=ng.stack(logits_concat, span, 1),
                        labels=inputs['answer'], drop=drop_pointer)


# Now bind the computations we are interested in
print('generating transformer')
示例#20
0
 def _pre_call_hook(self):
     self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t])
     self.ell = self.lrate * ng.sqrt(1 - self.beta_2**self.t) / (
         1 - self.beta_1**self.t)
示例#21
0
def mnist_mlp(args):
    # write tensorflow models
    x = tf.placeholder(tf.float32, [args.batch_size, 784])
    t = tf.placeholder(tf.float32, [args.batch_size, 10])
    w = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))
    y = tf.matmul(x, w) + b
    cost = tf.reduce_mean(
        -tf.reduce_sum(t * tf.log(tf.nn.softmax(y)), reduction_indices=[1]))
    init = tf.global_variables_initializer()

    # import graph_def
    importer = TFImporter()
    importer.import_graph_def(tf.get_default_graph().as_graph_def())

    # get handle of ngraph ops
    x_ng, t_ng, cost_ng, init_op_ng = importer.get_op_handle(
        [x, t, cost, init])

    # transformer and computations
    transformer = ngt.make_transformer()
    updates = CommonSGDOptimizer(args.lrate).minimize(cost_ng,
                                                      cost_ng.variables())
    train_comp = transformer.computation(ng.sequential([updates, cost_ng]),
                                         x_ng, t_ng)
    init_comp = transformer.computation(init_op_ng)
    transformer.initialize()

    # train
    if args.random_data is not None:
        mnist = args.random_data
        mnist.reset(0)
    else:
        mnist = input_data.read_data_sets(args.data_dir, one_hot=True)

    init_comp()
    ng_cost_vals = []
    for idx in range(args.max_iter):
        batch_xs, batch_ys = mnist.train.next_batch(args.batch_size)
        cost_val = train_comp(batch_xs, batch_ys)
        ng_cost_vals.append(float(cost_val))
        print("[Iter %s] Cost = %s" % (idx, cost_val))

    transformer.close()

    # train in tensorflow as comparison
    with tf.Session() as sess:
        # train in tensorflow
        train_step = tf.train.GradientDescentOptimizer(
            args.lrate).minimize(cost)
        sess.run(init)
        if args.random_data is not None:
            mnist = args.random_data
            mnist.reset(0)
        else:
            mnist = input_data.read_data_sets(args.data_dir, one_hot=True)
        tf_cost_vals = []
        for idx in range(args.max_iter):
            batch_xs, batch_ys = mnist.train.next_batch(args.batch_size)
            cost_val, _ = sess.run([cost, train_step],
                                   feed_dict={
                                       x: batch_xs,
                                       t: batch_ys
                                   })
            tf_cost_vals.append(float(cost_val))
            print("[Iter %s] Cost = %s" % (idx, cost_val))

    return ng_cost_vals, tf_cost_vals
示例#22
0
    loss = ng.ctc(output, ng.flatten(inputs["char_map"]),
                  ng.flatten(inputs["audio_length"]),
                  ng.flatten(inputs["char_map_length"]))

    optimizer = GradientDescentMomentum(
        args.lr,
        momentum_coef=args.momentum,
        gradient_clip_norm=args.gradient_clip_norm,
        nesterov=args.nesterov)

    start = time.time()
    updates = optimizer(loss)
    stop = time.time()
    logger.debug("Optimizer graph creation took {} seconds".format(stop -
                                                                   start))
    mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])

    # Create computation and initialize the transformer to allocate weights
    train_computation = ng.computation([mean_cost, output], "all")
    if inference is True:
        with Layer.inference_mode_on():
            eval_output = ds2(inputs["audio"],
                              spatial_axes={
                                  "H": "frequency",
                                  "W": "time"
                              })
        eval_computation = ng.computation(eval_output, "all")

    # Now bind the computations we are interested in
    with closing(ngt.make_transformer()) as transformer:
        train_function = transformer.add_computation(train_computation)
示例#23
0
def train_mnist_mlp(transformer_name,
                    data_dir=None,
                    rng_seed=12,
                    batch_size=128,
                    train_iter=10,
                    eval_iter=10):
    assert transformer_name in ['cpu', 'hetr']
    assert isinstance(rng_seed, int)

    # Apply this metadata to graph regardless of transformer,
    # but it is ignored for non-HeTr case
    hetr_device_ids = (0, 1)

    # use consistent rng seed between runs
    np.random.seed(rng_seed)

    # Data
    train_data, valid_data = MNIST(path=data_dir).load_data()
    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=train_iter)
    valid_set = ArrayIterator(valid_data, batch_size)
    inputs = train_set.make_placeholders()
    ax.Y.length = 10

    # Model
    with ng.metadata(device_id=hetr_device_ids, parallel=ax.N):
        seq1 = Sequential([
            Preprocess(functor=lambda x: x / 255.),
            Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
            Affine(axes=ax.Y,
                   weight_init=GaussianInit(),
                   activation=Logistic())
        ])

        train_prob = seq1(inputs['image'])
        train_loss = ng.cross_entropy_binary(
            train_prob, ng.one_hot(inputs['label'], axis=ax.Y))

        optimizer = GradientDescentMomentum(0.1, 0.9)
        batch_cost = ng.sequential(
            [optimizer(train_loss),
             ng.mean(train_loss, out_axes=())])
        train_outputs = dict(batch_cost=batch_cost)

        with Layer.inference_mode_on():
            inference_prob = seq1(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              inputs['label'])
        eval_loss = ng.cross_entropy_binary(
            inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
        eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

    # Runtime
    with closing(
            ngt.make_transformer_factory(transformer_name)()) as transformer:
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)
        loss_computation = make_bound_computation(transformer, eval_outputs,
                                                  inputs)

        train_costs = list()
        for step in range(train_iter):
            out = train_computation(next(train_set))
            train_costs.append(float(out['batch_cost']))

        ce_loss = list()
        for step in range(eval_iter):
            out = loss_computation(next(valid_set))
            ce_loss.append(np.mean(out['cross_ent_loss']))

        return train_costs, ce_loss