Exemplo n.º 1
0
 def make_network(scope1=None, scope2=None):
     # 2 layer network, each layer has its own scope
     x = ng.placeholder(axes)  # inputs
     t = ng.placeholder(ng.make_axes([ng.make_axis(length=1),
                                      N]))  # targets
     with Layer.variable_scope(scope1):
         layer1 = Affine(ConstantInit(val=Wlin1),
                         nout=nout1,
                         bias_init=ConstantInit(val=Wbias1),
                         activation=Rectlin(),
                         batch_norm=False)
     with Layer.variable_scope(scope2):
         layer2 = Affine(ConstantInit(val=Wlin2),
                         nout=1,
                         bias_init=ConstantInit(val=Wbias2),
                         activation=Logistic(),
                         batch_norm=False)
     seq = Sequential([layer1, layer2])
     p_t = seq(x)
     t_cast = ng.cast_axes(t, p_t.axes)  # TODO: how can this be avoided?
     loss = ng.cross_entropy_binary(p_t, t_cast)
     return seq, x, t, loss
Exemplo n.º 2
0
def test_pooling():
    """
    test pooling forward and backward path
    """
    N = 128
    C = 3
    D = 1
    H = W = 32

    J = T = 1
    R = S = 2
    ngt.make_transformer()

    padding = dict(pad_d=0, pad_h=0, pad_w=0, pad_c=0)
    strides = dict(str_d=1, str_h=1, str_w=1, str_c=1)
    fshape = dict(J=J, T=T, R=R, S=S)

    pool_params = dict(op='max')
    pool_params.update(padding)
    pool_params.update(strides)
    pool_params.update(fshape)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_i.set_shape((C, D, H, W, N))
    inputs = ng.placeholder(axes=ax_i)

    ax_o = ng.make_axes([
        ng.make_axis(roles=[ar.features_input]).named('C'),
        ng.make_axis(roles=[ar.features_0]).named('D'),
        ng.make_axis(roles=[ar.features_1]).named('H'),
        ng.make_axis(roles=[ar.features_2]).named('W'), ax.N
    ])

    ax_o[:-1].set_shape((output_dim(C, J, padding['pad_c'], strides['str_c']),
                         output_dim(D, T, padding['pad_d'], strides['str_d']),
                         output_dim(H, R, padding['pad_h'], strides['str_h']),
                         output_dim(W, S, padding['pad_w'], strides['str_w'])))
    # randomly initialize
    input_value = rng.uniform(-1, 1, ax_i)

    assert input_value.shape == ax_i.lengths

    # compute convolution with graph
    output = ng.pooling(pool_params, inputs, axes=ax_o)
    targets = ng.placeholder(axes=ax_o)

    costs = ng.cross_entropy_binary(ng.sigmoid(output), targets)
    error = ng.sum(costs, out_axes=()) / ng.batch_size(costs)
    d_inputs = ng.deriv(error, inputs)

    targets_value = rng.uniform(.1, 0.9, output.axes)

    with executor([output, error, d_inputs], inputs, targets) as conv_executor:
        result_ng, err_ng, gradI_ng = conv_executor(input_value, targets_value)

    # Now compute reference values via NEON
    NervanaObject.be.bsz = N
    neon_layer = Pooling(fshape=fshape,
                         padding=padding,
                         strides=strides,
                         op="max")

    inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N))
    neon_layer.configure((C, H, W))
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas(DummyDeltaBuffers())

    result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths)

    act_result_ne = 1. / (1.0 + np.exp(-result_ne))
    err = neon_layer.be.array(
        (act_result_ne - targets_value).reshape(-1, N) / float(N))
    gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths)

    # Compare fprop
    ng.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6)

    # Compare bprop
    ng.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6)
Exemplo n.º 3
0
def test_convolution(transformer_factory):
    """
    test convolution forward path
    """
    N = 128
    C, K = 3, 8
    D, T = 1, 1
    H = W = 32
    R = S = 2

    padding = dict(pad_d=0, pad_h=0, pad_w=0)
    strides = dict(str_d=1, str_h=1, str_w=1)
    conv_params = padding.copy()
    conv_params.update(strides)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K])
    ax_i.set_shape((C, D, H, W, N))
    ax_f.set_shape((C, T, R, S, K))
    ax_o = ng.make_axes([
        ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length,
                     name='C',
                     roles=[ar.Channel]),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_d'],
                     strides['str_d'],
                     role=ar.Depth),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_h'],
                     strides['str_h'],
                     role=ar.Height),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_w'],
                     strides['str_w'],
                     role=ar.Width), ax.N
    ])

    inputs = ng.placeholder(axes=ax_i)
    filters = ng.placeholder(axes=ax_f)

    # randomly initialize
    input_value = rng.uniform(-1, 1, ax_i)
    filter_value = rng.uniform(-1, 1, ax_f)

    assert input_value.shape == ax_i.lengths
    assert filter_value.shape == ax_f.lengths

    inputs = ng.placeholder(ax_i)
    filters = ng.placeholder(ax_f)

    output = ng.convolution(conv_params, inputs, filters, axes=ax_o)
    targets = ng.placeholder(axes=output.axes)

    costs = ng.cross_entropy_binary(ng.sigmoid(output), targets)
    error = ng.sum(costs, out_axes=()) / ng.batch_size(costs)
    d_inputs = ng.deriv(error, inputs)
    d_filters = ng.deriv(error, filters)

    targets_value = rng.uniform(.1, 0.9, output.axes)

    conv_executor = executor([output, error, d_inputs, d_filters], inputs,
                             filters, targets)
    result_ng, err_ng, gradI_ng, gradF_ng = conv_executor(
        input_value, filter_value, targets_value)

    # Now compute reference values via NEON
    NervanaObject.be.bsz = N
    neon_layer = Convolution(fshape=(R, S, K),
                             padding=padding,
                             strides=strides)

    inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N))
    neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K))
    neon_layer.dW = neon_layer.be.empty_like(neon_layer.W)
    neon_layer.configure((C, H, W))
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas(DummyDeltaBuffers())

    result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths)

    act_result_ne = 1. / (1.0 + np.exp(-result_ne))
    err = neon_layer.be.array(
        (act_result_ne - targets_value).reshape(-1, N) / float(N))
    gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths)
    gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths)

    # Compare fprop
    np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6)

    # Compare bprop
    np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6)

    # Compare update
    np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
Exemplo n.º 4
0
 def __init__(self):
     self.ng_computation = lambda Y, T: ng.cross_entropy_binary(Y, T)
Exemplo n.º 5
0
valid_set = ArrayIterator(valid_data, args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = 10

######################
# Model specification
seq1 = Sequential([Preprocess(functor=lambda x: x / 255.),
                   Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())])

optimizer = GradientDescentMomentum(0.1, 0.9)

output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
loss_computation = make_bound_computation(transformer, loss_outputs, inputs)

cbs = make_default_callbacks(output_file=args.output_file,
                             frequency=args.iter_interval,
                             train_computation=train_computation,
                             total_iterations=args.num_iterations,
                                     recurrent_units=hidden_sizes,
                                     return_sequence=True).layers +
        [Logistic()])

# Optimizer
if args.modeltype == "TCN":
    optimizer = Adam(learning_rate=args.lr,
                     gradient_clip_value=args.grad_clip_value)
else:
    optimizer = GradientDescentMomentum(
        learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
fwd_prop = model(inputs['X'])
fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])

with Layer.inference_mode_on():
    preds = model(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
eval_computation = ng.computation([eval_loss], "all")
predict_computation = ng.computation([preds], "all")

# Cost calculation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

trainer = TimeseriesTrainer(optimizer,
Exemplo n.º 7
0
args = parser.parse_args()

fc_layers_deep = [int(s) for s in args.deep_parameters.split(',')]

cs_loader = data.CensusDataset(args.batch_size)

inputs = make_placeholders(args.batch_size, cs_loader)

model = WideDeepClassifier(cs_loader.parameters['dimensions_embeddings'],
                           cs_loader.parameters['tokens_in_embeddings'],
                           fc_layers_deep,
                           deep_activation_fn=Rectlin())

wide_deep = model(args.batch_size, inputs)

loss = ng.cross_entropy_binary(wide_deep, inputs['Y'])

optimizer = Adagrad(args.learning_rate)

# recall that optimizer does not generate output

batch_cost = ng.sequential([optimizer(loss), ng.sum(loss, out_axes=())])


def compute_accuracy(data):
    accuracy = 0.0
    total = 0.0

    for value in data.values():

        x_d = value[0]
Exemplo n.º 8
0
valid_set = ArrayIterator(valid_data, args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = 10

######################
# Model specification
seq1 = Sequential([
    Preprocess(functor=lambda x: x / 255.),
    Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
    Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())
])

optimizer = GradientDescentMomentum(0.1, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_binary(train_prob,
                                     ng.one_hot(inputs['label'], axis=ax.Y))

batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                      inputs['label'])
eval_loss = ng.cross_entropy_binary(inference_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

# Now bind the computations we are interested in
Exemplo n.º 9
0
 def cost(y, t):
     return ng.cross_entropy_binary(y, t)
Exemplo n.º 10
0
def train_mnist_mlp(transformer_name,
                    data_dir=None,
                    rng_seed=12,
                    batch_size=128,
                    train_iter=10,
                    eval_iter=10):
    assert transformer_name in ['cpu', 'hetr']
    assert isinstance(rng_seed, int)

    # Apply this metadata to graph regardless of transformer,
    # but it is ignored for non-HeTr case
    hetr_device_ids = (0, 1)

    # use consistent rng seed between runs
    np.random.seed(rng_seed)

    # Data
    train_data, valid_data = MNIST(path=data_dir).load_data()
    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=train_iter)
    valid_set = ArrayIterator(valid_data, batch_size)
    inputs = train_set.make_placeholders()
    ax.Y.length = 10

    # Model
    with ng.metadata(device_id=hetr_device_ids, parallel=ax.N):
        seq1 = Sequential([
            Preprocess(functor=lambda x: x / 255.),
            Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
            Affine(axes=ax.Y,
                   weight_init=GaussianInit(),
                   activation=Logistic())
        ])

        train_prob = seq1(inputs['image'])
        train_loss = ng.cross_entropy_binary(
            train_prob, ng.one_hot(inputs['label'], axis=ax.Y))

        optimizer = GradientDescentMomentum(0.1, 0.9)
        batch_cost = ng.sequential(
            [optimizer(train_loss),
             ng.mean(train_loss, out_axes=())])
        train_outputs = dict(batch_cost=batch_cost)

        with Layer.inference_mode_on():
            inference_prob = seq1(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              inputs['label'])
        eval_loss = ng.cross_entropy_binary(
            inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
        eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

    # Runtime
    with closing(
            ngt.make_transformer_factory(transformer_name)()) as transformer:
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)
        loss_computation = make_bound_computation(transformer, eval_outputs,
                                                  inputs)

        train_costs = list()
        for step in range(train_iter):
            out = train_computation(next(train_set))
            train_costs.append(float(out['batch_cost']))

        ce_loss = list()
        for step in range(eval_iter):
            out = loss_computation(next(valid_set))
            ce_loss.append(np.mean(out['cross_ent_loss']))

        return train_costs, ce_loss