def main(): """ Example of how to load and parse MNIST data. """ train_set, test_set = load_data() # train_set is a two-element tuple. The first element, i.e., # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k # rows in the matrix, each row corresponding to a single example. # There are 784 columns, each corresponding to the value of a # single pixel in the 28x28 image. print "\nDimensions of training set feature matrix:", print train_set[FEATURE].shape # The labels for each example are maintained separately in train_set[1]. # This is a 60,000 x 1 numpy matrix, where each element is the label # for the corresponding training example. print "\nDimensions of training set label matrix:", train_set[LABEL].shape # Example of how to access a individual training example (in this case, # the third example, i.e., the training example at index 2). We could # also just use print to output it to the screen, but pretty_print formats # the data in a nicer way: if you squint, you should be able to make out # the number 4 in the matrix data. print "\nFeatures of third training example:\n" pretty_print(train_set[FEATURE][2]) # And here's the label that goes with that training example print "\nLabel of first training example:", train_set[LABEL][2], "\n"
def final_test(): train_set, test_set = load_data() X = train_set[FEATURE] Y =train_set[LABEL] X_test = test_set[FEATURE] Y_test =test_set[LABEL] clf = SGDClassifier(loss='log',alpha=0.0002, shuffle=False, n_iter=50) print "Training..." clf.fit(X, Y) print "Scoring..." score = clf.score(X_test, Y_test, sample_weight=None) print score
def main(): train_set, test_set = load_data() train_set_size = len(train_set[FEATURE]) heat_map = dict((i,[0 for i in xrange(784)])for i in xrange(10)) count_map = list(range(10)) for feature,lable in zip(train_set[FEATURE],train_set[LABEL]): for i in xrange(784): heat_map[lable][i] += feature[i] count_map[lable]+=1 #average for num,psum in heat_map.items(): heat_map[num] = [k/count_map[num] for k in psum] afile = open(r'heatMap.pkl', 'wb') pickle.dump(heat_map, afile) afile.close()
def main(): """ Example of how to load and parse MNIST data. """ train_set, test_set = load_data() # train_set is a two-element tuple. The first element, i.e., # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k # rows in the matrix, each row corresponding to a single example. # There are 784 columns, each corresponding to the value of a # single pixel in the 28x28 image. print "\nDimensions of training set feature matrix:", print train_set[FEATURE].shape # The labels for each example are maintained separately in train_set[1]. # This is a 60,000 x 1 numpy matrix, where each element is the label # for the corresponding training example. print "\nDimensions of training set label matrix:", train_set[LABEL].shape # Example of how to access a individual training example (in this case, # the third example, i.e., the training example at index 2). We could # also just use print to output it to the screen, but pretty_print formats # the data in a nicer way: if you squint, you should be able to make out # the number 4 in the matrix data. print "\nFeatures of third training example:\n" #pretty_print(train_set[FEATURE][2]) # And here's the label that goes with that training example print "\nLabel of the third training example:", train_set[LABEL][10], "\n" img = Image.new("RGB",(28,28)) px = img.load() a = (train_set[FEATURE][10]) #pretty_print(a) for x in xrange(28): for y in xrange(28): v = int((a[y*28 + x])*255) px[x,y] = (v,v,v) img.save('10.png')
# -*- coding: utf-8 -*- """ Created on Fri Feb 9 12:42:35 2018 @author: Nancy """ from read_mnist import load_data # make sure read_mnist is commented/uncommented to provide correct data from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import classification_report #loading MNIST digits dataset train_set, valid_set, test_set = load_data() best_k = 0 max_accuracy = 0 # loop over various values of `k` for the k-Nearest Neighbor classifier for k in range(1, 30, 2): # train the k-Nearest Neighbor classifier with the current value of `k` model = KNeighborsClassifier(n_neighbors=k) model.fit(train_set[0], train_set[1]) # evaluate the model and update the best accuracy and corresponding k score = model.score(valid_set[0], valid_set[1]) if (score > max_accuracy): max_accuracy = score best_k = k # find the value of k that has the largest accuracy
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.utils import check_random_state FEATURE = 0 LABEL = 1 # Author: Arthur Mensch <*****@*****.**> # License: BSD 3 clause # Turn down for faster convergence t0 = time.time() train_samples = 5000 train_set, test_set = load_data() X_train,y_train = train_set[FEATURE][:5000],train_set[LABEL][:5000] X_test, y_test = test_set[FEATURE][-10000:], test_set[LABEL][-10000:] # Turn up tolerance for faster convergence clf = LogisticRegression(C=50. / train_samples, multi_class='multinomial', penalty='l1', solver='saga', tol=0.1) clf.fit(X_train, y_train) sparsity = np.mean(clf.coef_ == 0) * 100 score = clf.score(X_test, y_test) # print('Best C % .4f' % clf.C_) print("Sparsity with L1 penalty: %.2f%%" % sparsity) print("Test score with L1 penalty: %.4f" % score) coef = clf.coef_.copy() plt.figure(figsize=(10, 5))
import numpy as np from utils import visualise from read_mnist import load_data import random y_train,x_train,y_test,x_test=load_data() print("Train data label dim: {}".format(y_train.shape)) print("Train data features dim: {}".format(x_train.shape)) print("Test data label dim: {}".format(y_test.shape)) print("Test data features dim:{}".format(x_test.shape)) # uncomment to visualise dataset # visualise(x_train) def sigmoid(x): return 1/(1+ np.exp(-x)) def sigmoid_grad(x): return sigmoid(x).T @ (1 - sigmoid(x)) def softmax(x): for i,f in enumerate(x): f -= np.max(f) # for numerical stabiluty p = np.exp(f) / np.sum(np.exp(f)) x[i,:]=p return x def cross_entropy(X,y): """ X is the output from fully connected layer (num_examples x num_classes) y is labels (num_examples x 1)
def main(): """ Example of how to load and parse MNIST data. """ train_set, test_set = load_data() # train_set is a two-element tuple. The first element, i.e., # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k # rows in the matrix, each row corresponding to a single example. # There are 784 columns, each corresponding to the value of a # single pixel in the 28x28 image. print "\nDimensions of training set feature matrix:", print train_set[FEATURE].shape # The labels for each example are maintained separately in train_set[1]. # This is a 60,000 x 1 numpy matrix, where each element is the label # for the corresponding training example. #print "\nDimensions of training set label matrix:", train_set[LABEL].shape # Example of how to access a individual training example (in this case, # the third example, i.e., the training example at index 2). We could # also just use print to output it to the screen, but pretty_print formats # the data in a nicer way: if you squint, you should be able to make out # the number 4 in the matrix data. #print "\nFeatures of third training example:\n" #pretty_print(train_set[FEATURE][2]) # And here's the label that goes with that training example #print "\nLabel of first training example:", train_set[LABEL][2], "\n" # The test_set is organized in the same way, but only contains 10k # examples. Don't touch this data until your model is frozen! Perform all # cross-validation, model selection, hyperparameter tuning etc. on the 60k # training set. Use the test set simply for reporting performance. # cross validation # http://scikit-learn.org/stable/modules/cross_validation.html #Nearest Neighbor #http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier.predict_proba X= train_set[FEATURE] Y=train_set[LABEL] kf= KFold(60000,6) #want kfold(60000,6) #print(kf) #print len(kf) print "creating cross validation sets...." for train_index, test_index in kf: #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] #print X_train #print X_train.shape #print X_test.shape print "Making classifier...." neigh = KNeighborsClassifier(n_neighbors=3) print "Fitting Classifier..." neigh.fit(X_train, y_train) "Calculating test scores..." print neigh.score(X_test, y_test, sample_weight=None)
def main(): """ Example of how to load and parse MNIST data. """ train_set, test_set = load_data() # train_set is a two-element tuple. The first element, i.e., # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k # rows in the matrix, each row corresponding to a single example. # There are 784 columns, each corresponding to the value of a # single pixel in the 28x28 image. print "\nDimensions of training set feature matrix:", print train_set[FEATURE].shape # The labels for each example are maintained separately in train_set[1]. # This is a 60,000 x 1 numpy matrix, where each element is the label # for the corresponding training example. #print "\nDimensions of training set label matrix:", train_set[LABEL].shape # Example of how to access a individual training example (in this case, # the third example, i.e., the training example at index 2). We could # also just use print to output it to the screen, but pretty_print formats # the data in a nicer way: if you squint, you should be able to make out # the number 4 in the matrix data. #print "\nFeatures of third training example:\n" #pretty_print(train_set[FEATURE][2]) # And here's the label that goes with that training example #print "\nLabel of first training example:", train_set[LABEL][2], "\n" # The test_set is organized in the same way, but only contains 10k # examples. Don't touch this data until your model is frozen! Perform all # cross-validation, model selection, hyperparameter tuning etc. on the 60k # training set. Use the test set simply for reporting performance. # cross validation # http://scikit-learn.org/stable/modules/cross_validation.html #Nearest Neighbor #http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier.predict_proba X= train_set[FEATURE] Y=train_set[LABEL] kf= KFold(60000,6) #want kfold(60000,6) #print(kf) #print len(kf) print "creating cross validation sets...." alpha_scores = [[],[]] for mult in [0.0001]: for const in [2]: alph = mult * const print "Testing alpha = " + str(alph) k = 0 scores = [] for train_index, test_index in kf: print " k = " + str(k) X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] #clf = LogisticRegression(penalty='l2', dual=False, solver="lbfgs") clf = SGDClassifier(loss='log',alpha=alph, shuffle=False, n_iter=50) clf.fit(X_train, y_train) score = clf.score(X_test, y_test, sample_weight=None) print score scores.append(score) k+=1 avg_score = sum(scores)/(float(len(scores))) print " Average Score: "+ str(avg_score) alpha_scores[0].append(alph) alpha_scores[1].append(avg_score) for i in range(len(alpha_scores[0])): print "alpha: " + str(alpha_scores[0][i]) + " score: " + str(alpha_scores[1][i]) fig = plt.figure() ax = plt.gca() ax.plot(alpha_scores[0],alpha_scores[1], c='red') ax.set_xscale('log') plt.title("Alpha Exploration - Log Regression") plt.xlabel("Alpha") plt.ylabel("Score") plt.show()