Exemplo n.º 1
0
def wine_test(epochs=200, batch_size=16, lr=0.1):
    print(
        'wine test: params - epochs {0}, batch_size: {1}, learning rate: {2}'.
        format(epochs, batch_size, lr))
    features, target = load_wine(return_X_y=True)

    # Make a train/test split using 30% test size
    RANDOM_STATE = 42
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.9, random_state=RANDOM_STATE)
    sc = StandardScaler(
    )  # makes every features zero mean standard deviation 1 - makes learning problem much easier (massages the error function so sgd works better)
    sc.fit(X_train)
    s = SoftmaxClassifier(num_classes=3)
    X_train = sc.transform(X_train)
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]  # adds bias var
    X_test = sc.transform(X_test)
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]  # adds bias var
    s.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr)
    print('Softmax Wine Classifier')
    print_score(s, X_train, X_test, y_train, y_test)
    hist = s.history
    fig, ax = plt.subplots()
    ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x')
    ax.set_title('Cost as a function of epoch for wine data')
    ax.set_xlabel('epoch')
    ax.set_ylabel('Ein (1/n NLL)')
    export_fig(fig, 'softmax_wine_cost_per_epoch.png')
    plt.show()
Exemplo n.º 2
0
def pick_hyperparams(X_train, y_train, X_val, y_val, learning_rates, regularization_strengths, iterations=4000, batches=400):
    results = {}
    best_val = -1
    best_softmax = None

    ################################################################################
    # TODO:                                                                        #
    # Use the validation set to set the learning rate and regularization strength. #
    # Save the best trained softmax classifer in best_softmax.                     #
    # Hint: about 10 lines of code expected
    ################################################################################
    for lr in learning_rates:
        for reg in regularization_strengths:
            print lr, reg
            softmax = SoftmaxClassifier()
            softmax.train(X_train, y_train, lr, reg, iterations, batches)
            predy = softmax.predict(X_val)
            val = np.mean(predy == y_val)
            predtrain = softmax.predict(X_train)
            valtrain = np.mean(predtrain == y_val)
            results[(lr, reg)] = (valtrain, val)
            if val > best_val:
                best_val = val
                best_softmax = softmax

    return best_softmax, results, best_val
Exemplo n.º 3
0
def digits_visualize(epochs=1, batch_size=64, lr=0.01):
    sc = SoftmaxClassifier(num_classes=10)
    X_train, y_train = load_digits_train_data()
    sc.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr)
    w = sc.W
    rs = w.reshape(28, 28, 10, order='F')
    rs2 = np.transpose(rs, axes=[1, 0, 2])
    fig, ax = plt.subplots()
    ax.imshow(rs2.reshape(28, -1, order='F'), cmap='bone')
    ax.set_title('digits weight vector visualized')
    export_fig(fig, 'softmax_weight_vector.png')
    plt.show()
Exemplo n.º 4
0
def runSoftmax():
    # Load the dataset
    f = gzip.open('../mnist.pkl.gz', 'rb')
    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    f.close()
    if len(sys.argv) > 1:
        exp_name = sys.argv[1]
    else:
        exp_name = 'exp'
    os.makedirs(exp_name, exist_ok=True)
    logger = open("./" + exp_name + "/log", "w")

    data = {
        'X_train': train_set[0],  # training data
        'y_train': train_set[1],  # training labels
        'X_val': valid_set[0],  # validation data
        'y_val': valid_set[1]  # validation labels
    }
    model = SoftmaxClassifier(input_dim=28 * 28, hidden_dim=1000, reg=0.0)
    solver = Solver(model,
                    data,
                    logger,
                    update_rule='sgd',
                    optim_config={
                        'learning_rate': 0.02,
                    },
                    lr_decay=1,
                    num_epochs=5,
                    batch_size=32,
                    print_every=100)
    solver.train()
    test_acc = solver.check_accuracy(test_set[0], test_set[1])
    toprint = "test_acc: " + str(test_acc)
    print(toprint)
Exemplo n.º 5
0
def digits_test(epochs=10, batch_size=32, lr=0.05):
    print(
        'digits test: params - epochs {0}, batch_size: {1}, learning rate: {2}'
        .format(epochs, batch_size, lr))
    sc = SoftmaxClassifier(num_classes=10)
    X_train, y_train = load_digits_train_data()
    X_test, y_test = load_digits_test_data()
    sc.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr)
    print_score(sc, X_train, X_test, y_train, y_test)
    fig, ax = plt.subplots()
    hist = sc.history
    ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x')
    ax.set_xlabel('epoch')
    ax.set_ylabel('Ein (1/n NLL)')
    ax.set_title('softmax cost on digits as function of epoch')
    export_fig(fig, 'softmax_cost_per_epoch.png')
    plt.show()
Exemplo n.º 6
0
def get_classifier(classifier):
    if classifier == 'svc':
        return LinearSVC()
    elif classifier == 'softmax':
        return SoftmaxClassifier()
    elif classifier == 'xgb':
        return xgb.XGBClassifier()
    else:
        return None
Exemplo n.º 7
0
def main():
    from keras.datasets import mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    # pdb.set_trace()
    # data preprocessing for neural network with fully-connected layers2222
    data = {
        'X_train': np.array(x_train[:55000], np.float32).reshape(
            (55000, -1)),  # training data
        'y_train': np.array(y_train[:55000], np.int32),  # training labels
        'X_val': np.array(x_train[55000:], np.float32).reshape(
            (5000, -1)),  # validation data
        'y_val': np.array(y_train[55000:], np.int32),  # validation labels
    }
    model = SoftmaxClassifier(hidden_dim=100)

    # data preprocessing for neural network with convolutional layers333333
    # data = {
    #    'X_train': np.array(x_train[:5000], np.float32).reshape((5000, 1,  28, 28)), # training data
    #    'y_train': np.array(y_train[:5000], np.int32), # training labels
    #    'X_val': np.array(x_train[55000:], np.float32).reshape((5000), 1, 28, 28), # validation data
    #    'y_val': np.array(y_train[55000:], np.int32), # validation labels
    # }
    # model = ConvNet(hidden_dim=100)
    # running experiments with convolutional neural network could be time-consuming
    # you may use a small number of training samples for debugging
    # and then take use of all training data to report your experimental results.
    solver = Solver(model,
                    data,
                    update_rule='sgd',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    num_epochs=10,
                    batch_size=100,
                    print_every=10)
    solver.train()

    # Plot the training losses
    plt.plot(solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()
    plt.savefig('loss.png')
    plt.close()

    test_acc = solver.check_accuracy(X=np.array(x_test, np.float32).reshape(
        (10000, -1)),
                                     y=y_test)
    # test_acc = solver.check_accuracy(X=np.array(x_test, np.float32).reshape((10000, 1, 28, 28)), y=y_test)
    print('Test accuracy', test_acc)
Exemplo n.º 8
0
def train():
    # load data
    data = load_cifar10()
    train_data = {k: data[k] for k in ['X_train', 'y_train', 'X_val', 'y_val']}

    # initialize model
    model = SoftmaxClassifier(hidden_dim=300)

    # start training

    #######################################################################
    # TODO: Set up model hyperparameters                                  #
    #######################################################################
    model, train_acc_history, val_acc_history = trainNetwork(
        model,
        train_data,
        learning_rate=5e-3,
        lr_decay=0.9,
        num_epochs=40,
        batch_size=128,
        print_every=1000)
    #######################################################################
    #                         END OF YOUR CODE                            #
    #######################################################################

    # report test accuracy
    acc = testNetwork(model, data['X_test'], data['y_test'])
    print("Test accuracy: {}".format(acc))

    #######################################################################
    # Save your model with model.save(filepath) once you finish training  #
    #######################################################################
    model.save('wh')
    import matplotlib.pyplot as plt
    plt.plot(train_acc_history, label='train')
    plt.plot(val_acc_history, label='validation')
    plt.legend(loc='upper left')
    plt.savefig('pltwohidden.jpg')
Exemplo n.º 9
0
>>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d

# TODO: Use the validation set to tune hyperparameters for softmax classifier
# choose learning rate and regularization strength (use the code from softmax_hw.py)


<<<<<<< HEAD
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-4,1e-3,1e-2,1e-1]
regularization_strengths = [0.01,0.05,0.1,0.5,1]
for lr in learning_rates:
	for rs in regularization_strengths:
		print("calculating: lr=%e,reg=%e"%(lr,rs))
		ns=SoftmaxClassifier()
		ns.train(X_train,y_train,lr,rs,batch_size=400,num_iters=2000)
		ta=np.mean(y_train == ns.predict(X_train))
		va=np.mean(y_val == ns.predict(X_val))
		results[lr,rs]=(ta,va)
		if va>best_val:
			best_val=va
			best_softmax=ns



# TODO: Evaluate best softmax classifier on set aside test set (use the code from softmax_hw.py)
# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
Exemplo n.º 10
0
   print("Fetching")
   X , y = fetch_openml('mnist_784',version=1,return_X_y=True)
   y = y.astype(np.int32)
   np.save('mnist_data.npy',X)
   np.save('mnist_labels.npy',y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

print (X_train.shape)

data = {'X_train': X_train, 'y_train': y_train,
        'X_val': X_val, 'y_val': y_val,
        'X_test': X_test, 'y_test': y_test
        }

model = SoftmaxClassifier()
solver = Solver(model, data,
                 update_rule='sgd',
                 optim_config={
                   'learning_rate': 2e-3,
                 },
                 lr_decay=1,
                 num_epochs=1, batch_size=50,
                 print_every=2)

solver.train()

acc = solver.check_accuracy(X=X_test, y=y_test)
print(acc)
Exemplo n.º 11
0
# We use the Frobenius norm to compare the two versions
# of the gradient.

grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set and the test set.

results = {}
best_val = -1
best_softmax = SoftmaxClassifier()
learning_rates = [2e-6]#[1e-5, 2e-6, 12-7]
regularization_strengths = [0.01]#[.1, .01, .05, .005]

################################################################################
# TODO:                                                                        #
# Use the validation set to set the learning rate and regularization strength. #
# Save the best trained softmax classifer in best_softmax.                     #
# Hint: about 10 lines of code expected
################################################################################

classifier = SoftmaxClassifier()
accuracy = lambda x,y: np.mean(classifier.predict(x) == y)
for lr in learning_rates:
  for reg in regularization_strengths:
    classifier.train(X_train, y_train, learning_rate=lr, reg=reg)
Exemplo n.º 12
0
import numpy as np
from data import make_spiral_data, plot_2d_data
from softmax import SoftmaxClassifier


if __name__ == '__main__':

    # generate spiral data
    n_classes = 3
    data, y = make_spiral_data(100, n_classes, 2)
    fig = plot_2d_data(data, y)
    fig.show()

    # train model
    model = SoftmaxClassifier(n_classes)
    model.fit(data, y)
    print("Training accuracy is {:0.2f}".format(model.training_accuracy))
    print("Training loss is {:0.3f}".format(model.training_loss))
    boundaries = model.plot_boundaries(data, y)
    boundaries.show()
    loss_vs_epoch = model.plot_training_loss()
    loss_vs_epoch.show()

    # test model against 'new' data
    new_data, new_y = make_spiral_data(50, n_classes, 2)
    predictions = model.predict(new_data)
    print("Test accuracy is {:0.2f}".format(model.accuracy(new_data, new_y)))

Exemplo n.º 13
0
# work with large amount of files (50 000 froze my laptop completely until reboot)
#classify = one_vs_allLogisticRegressor(np.arange(10))
#best_params, best_classifier = utils.getBestRegOVA(classify, X_train, y_train, X_val, y_val, regularization_strengths, pen='l1')

"""
#best LR = [15]
learning_rates = [15] # [11,12,13,14,15,16,17,18,19,20]
#Best RS = [1e-3]
regularization_strengths = [2e-3] #[1e-3,2e-3,3e-3,4e-3,5e-3,6e-3,7e-3]
classify = SoftmaxClassifier()
# takes classifier, X_train, y_train, X_val, y_val, learning_rates, regularization_strengths and optionally print_train and print_val
best_params, best_classifier = utils.getBestRegAndLearnSoftMax(classify, X_train, y_train, X_val, y_val, learning_rates, regularization_strengths)
# end
"""
print "\nTraining the classifier..."
best_classifier = SoftmaxClassifier()
best_classifier.train(X_train, y_train, reg=1e1, learning_rate=15)
print np.mean(best_classifier.predict(X_val) == y_val)
print theta.shape
hogimg = best_classifier.theta[1025:1537,:].reshape(8,8,8,10)

utils.visualizeHOGTheta(hogimg)

print "\nMaking the final prediction..."
sys.stdout.flush()
labels = []
ids = []
batch = 50000
for j in range(3, 6):
	print "\nPart ", j + 1, " of 3"
	sys.stdout.flush()
Exemplo n.º 14
0
np.savetxt("accuracies.txt",accuracies,delimiter=',')


print "best batch size is ", best_batch_size
print "best iteration is ", best_iteration
'''
best_iteration = 3000
best_batch_size = 500


# select lambda using fmin train
best_softmax_fmin = None
best_val_fmin=-1
for reg in regularization_strengths:
    softmax=SoftmaxClassifier()
    softmax.train(X_train,y_train,best_learning_rate,reg=best_reg, num_iters=best_iteration,
                  batch_size=best_batch_size, verbose=True)
    y_pred_val=softmax.predict(X_val)
    current_val = np.mean(y_pred_val==y_val)
    if(current_val>best_val_fmin):
      best_val_fmin = current_val
      best_softmax_fmin = softmax
      best_reg_fmin = reg

print "best reg for fmin is ", best_reg_fmin

if best_softmax_fmin:
  y_test_pred = best_softmax_fmin.predict(X_test)
  test_accuracy = np.mean(y_test == y_test_pred)
  print 'softmax_fmin on raw pixels final test set accuracy: %f' % (test_accuracy, )
Exemplo n.º 15
0
MUSIC_DIR = "../music/"
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
X, y = music_utils.read_ceps(genres, MUSIC_DIR)
# X,y = music_utils.read_fft(genres,MUSIC_DIR)

# TODO: Split into train, validation and test sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = cross_validation.train_test_split(X_train, y_train, test_size=0.1)

# TODO: Use the validation set to tune hyperparameters for softmax classifier
# choose learning rate and regularization strength (use the code from softmax_hw.py)

results = {}
best_val = -1
best_params = None
best_softmax = SoftmaxClassifier()
learning_rates = [0.005]
regularization_strengths = [1]

classifier = SoftmaxClassifier()
accuracy = lambda x, y: np.mean(classifier.predict(x) == y)
for lr in learning_rates:
    for reg in regularization_strengths:
        classifier.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=10000, batch_size=y_train.size / 4)
        val_accuracy = accuracy(X_val, y_val)
        train_accuracy = accuracy(X_train, y_train)
        results[(lr, reg)] = (train_accuracy, val_accuracy)

        print "lr %e reg %e train accuracy: %f val accuracy: %f" % (lr, reg, train_accuracy, val_accuracy)

        if val_accuracy > best_val:
Exemplo n.º 16
0
# We use the Frobenius norm to compare the two versions
# of the gradient.

grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

results = {}
best_val = -1
best_softmax = None
#learning_rates = [1e-7, 5e-7, 1e-6, 5e-6]
#regularization_strengths = [ 5e4, 1e5, 5e5, 1e8]
learning_rates = [5e-6]
regularization_strengths = [1e5]

cl = SoftmaxClassifier()
loss_hist = []
for lr, rs in itertools.product(learning_rates, regularization_strengths):
    _ = cl.train(X_train,
                 y_train,
                 lr,
                 rs,
                 num_iters=4000,
                 batch_size=400,
                 verbose=True)
    loss, _ = cl.loss(X_val, y_val, rs)
    pred_t = cl.predict(X_train)
    pred_v = cl.predict(X_val)
    #embed()
    train_match = np.where(pred_t == y_train)
    train_accuracy = float(len(train_match[0])) / len(y_train)
Exemplo n.º 17
0
    data = {
        'X_train': x_train,
        'y_train': y_train,
        'X_val': x_val,
        'y_val': y_val,
        'X_test': x_test,
        'y_test': y_test
    }

    return data


data = load_data(choice='multi')
print(data['X_train'].shape)
model = SoftmaxClassifier(hidden_dim=50, reg=0)

solver = Solver(model,
                data,
                update_rule='sgd',
                optim_config={
                    'learning_rate': 2e-3,
                },
                lr_decay=1,
                num_epochs=1,
                batch_size=50,
                print_every=2)

solver.train()

acc = solver.check_accuracy(X=data['X_test'], y=data['y_test'])
# We use the Frobenius norm to compare the two versions
# of the gradient.

grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

results = {}
best_val = -1
best_softmax = None
#learning_rates = [1e-7, 5e-7, 1e-6, 5e-6]
#regularization_strengths = [ 5e4, 1e5, 5e5, 1e8]
learning_rates = [ 5e-6]
regularization_strengths = [1e5]

cl = SoftmaxClassifier()
loss_hist = []
for lr,rs in itertools.product(learning_rates,regularization_strengths):
    _ = cl.train(X_train, y_train, lr, rs, num_iters = 4000, batch_size = 400, verbose = True)
    loss,_ = cl.loss(X_val,y_val, rs)
    pred_t = cl.predict(X_train)
    pred_v = cl.predict(X_val)
    #embed()
    train_match = np.where(pred_t == y_train)
    train_accuracy = float(len(train_match[0]))/len(y_train)
    val_match = np.where(pred_v == y_val)
    val_accuracy = float(len(val_match[0]))/len(y_val)
    results[(lr,rs)] = (train_accuracy,val_accuracy)
    loss_hist.append(loss)
    # print("For lr,rs = ",lr,rs, "Loss  value = ",loss)
Exemplo n.º 19
0
X_train, X_val, y_train, y_val = cross_validation.train_test_split(X_train, y_train, test_size=0.1)

# TODO: Use the validation set to tune hyperparameters for softmax classifier
# choose learning rate and regularization strength (use the code from softmax_hw.py)
batch_sizes = [200, 300, 400]
iterations = [1000,2000,3000]
learning_rates = [5e-7, 1e-6, 5e-6]
regularization_strengths = [1e2, 1e3,1e4, 1e5]
best_val = -1
best_softmax = None

for batch_idx, batch_size in enumerate(batch_sizes):
  for it_idx, iteration in enumerate(iterations):
    for learningrate in learning_rates:
      for regularization in regularization_strengths:
        softmax=SoftmaxClassifier()

        softmax.train(X_train,y_train,learningrate,reg=regularization, num_iters=iteration,batch_size=batch_size, verbose=True)
        y_pred_val=softmax.predict(X_val)
        current_val = np.mean(y_pred_val==y_val)
        if(current_val>best_val):
          best_val = current_val
          best_softmax = softmax
          best_learning_rate = learningrate
          best_reg = regularization
          best_iteration = iteration
          best_batch_size = batch_size

print "best batch size is ", best_batch_size
print "best iteration is ", best_iteration
print "best reg is ", best_reg