示例#1
0
def test_rmsprop():
    results = []
    for scale in scales:
        A = cgt.shared(1.0)
        B = cgt.shared(1.0)
        updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01)
        do_update = cgt.function([], [], updates=updates)
        for _ in range(10):
            do_update()

        assert np.allclose(A.op.get_value(), B.op.get_value())
        results.append(A.op.get_value().copy())

    assert np.allclose(results, torch_values['rmsprop'])
示例#2
0
def run_rmsprop():
    results = []
    for scale in scales:
        A = cgt.shared(1.0)
        B = cgt.shared(1.0)
        updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01)
        do_update = cgt.function([], [], updates=updates)
        for _ in range(10):
            do_update()

        assert np.allclose(A.op.get_value(), B.op.get_value())
        results.append(A.op.get_value().copy())

    assert np.allclose(results, torch_values['rmsprop'])
示例#3
0
def main():
    print("Loading data...")
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    y = cgt.vector("y", dtype='i8')

    model = build_model(X, 0.0)
    loss = -cgt.mean(categorical.loglik(y, model))

    updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01)
    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])


    batch_size=128
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])

    nnbuilder.save_weights(model, 'mnist')
示例#4
0
def main(num_epochs=NUM_EPOCHS):
    #cgt.set_precision('half')
    print("Building network ...")
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2))
    l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN)
    l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True)
    #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid)
    #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True)
    #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify)
    #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True)
    l_forward_slice = l_forward[:, MAX_LENGTH-1, :]  # Take the last element in the forward slice time dimension
    l_backward_slice = l_backward[:, 0, :]  # And the first element in the backward slice time dimension
    l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1)
    l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh)
    target_values = cgt.vector('target_output')
    predicted_values = l_out[:, 0]  # For this task we only need the last value
    cost = cgt.mean((predicted_values - target_values)**2)
    # Compute SGD updates for training
    print("Computing updates ...")
    updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE)
    #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05)
    # cgt functions for training and computing cost
    print("Compiling functions ...")
    train = cgt.function([X, target_values], cost, updates=updates)
    compute_cost = cgt.function([X, target_values], cost)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = gen_data()

    print("Training ...")
    time_start = time.time()
    try:
        for epoch in range(num_epochs):
            for _ in range(EPOCH_SIZE):
                X, y, m = gen_data()
                train(X, y)
            cost_val = compute_cost(X_val, y_val)
            print("Epoch {} validation cost = {}".format(epoch+1, cost_val))
            print ('Epoch took ' + str(time.time() - time_start))
            time_start = time.time()
    except KeyboardInterrupt:
        pass
示例#5
0
def test_the_test_problem():
    #Works
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 20  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription. includes start character.
    num_out_classes = 27
    num_out_classes_true = num_out_classes + 2
    num_batches = 756
    num_epochs = 30

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing temporal dense layer'
    d1 = nnbuilder.temporalDenseLayer(feats, num_units=128, activation=cgt.sigmoid)
    #d2 = nnbuilder.temporalDenseLayer(d1, num_units=128, activation=cgt.sigmoid)
    d3 = nnbuilder.temporalDenseLayer(d1, num_units=num_out_classes_true, activation=nnbuilder.linear)
    out = nn.three_d_softmax(d3, axis=2)

    log_probs = None
    for iter_step in range(0, max_label_length):
        this_character_dist_bc = out[:, iter_step, :]
        prev_out_bc = ground_labels_basis[:, iter_step, :]
        log_probs_pre = prev_out_bc * this_character_dist_bc
        log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1))
        if log_probs is None:
            log_probs = cgt.sum(log_probs_pre)
        else:
            log_probs += cgt.sum(log_probs_pre)

    log_probs = -log_probs

    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling objective function'
    updates = nn.rmsprop(log_probs, nn.get_parameters(log_probs), learning_rate=0.01)
    pred_train = cgt.function([feats, ground_labels_basis], [], updates=updates)
    pred_fun = cgt.function([feats, ground_labels_basis], [log_probs])
    most_likely_chars = cgt.argmax(out, axis=1)
    actual_predictions = cgt.function([feats, ground_labels_basis], [most_likely_chars])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.mean(test_data)

    print 'now training'
    for one_epoch in range(0, num_epochs):
        trained = 0
        last_time = time.time()
        print 'starting epoch ' + str(one_epoch)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            pred_train(batch, labels_basis)

        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            trained += pred_fun(batch, labels_basis)[0]

        trained = trained/batch_iter
        print 'train loss is ' + str(trained)
        print 'that took ' + str(time.time() - last_time) + ' seconds'

        act_pred = actual_predictions(batch, labels_basis)[0]
        print 'an actual prediction is '
        print act_pred
示例#6
0
def test_seq_2_seq():
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 3  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription.
    num_out_classes = 27  # 26 letters and space.
    num_out_classes_true = 27 + 2  # Start and end tokens are added.
    num_batches = 512  # 1032
    num_epochs = 40

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing seq2seq'
    seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'making train objective'
    train_objective = seq2seq.get_train_objective(max_label_length=max_label_length,
                                                  ground_labels_basis_btc=ground_labels_basis)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'making updates'
    updates = nn.rmsprop(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001)
    #updates = nn.nesterov_momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, mu=0.4)
    #updates = nn.momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.00001, mu=0.4)
    #updates = nn.adadelta(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, rho=0.95)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling train function, test function, and prediction output function'
    train_function = cgt.function([feats, ground_labels_basis], [], updates=updates)
    test_function = cgt.function([feats, ground_labels_basis], [train_objective])
    pred = seq2seq.make_prediction(ground_labels_basis_btc=ground_labels_basis, max_label_length=feat_t_steps)
    pred_fun = cgt.function([feats, ground_labels_basis], [pred])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.std(test_data)

    print 'now training'
    last_time = time.time()
    for one_epoch in range(0, num_epochs):
        tested = 0
        print 'starting epoch ' + str(one_epoch)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            train_function(batch, labels_basis)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            tested += test_function(batch, labels_basis)[0]

        tested = tested / batch_iter
        print 'train loss for batch ' + str(batch_iter) + ' is ' + str(tested)

        print 'an actual prediction is '
        print pred_fun(batch, labels_basis)[0]
        print 'the truth is'
        print test_labels[batch_iter, :, 0:feat_t_steps]

        print 'that took ' + str(time.time() - last_time) + ' seconds'
        last_time = time.time()


    prediction_final = pred_fun(batch, labels_basis)[0]
    print prediction_final