def prepare_data(): # prepare data training_data = list(read(dataset='training', path='./')) testing_data = list(read(dataset='testing', path='./')) split = len(training_data) label, pixels = training_data[0] train_data = np.zeros(shape=(len(training_data), 1) + pixels.shape) train_label = np.zeros(shape=(len(training_data), 10)) for n in range(len(training_data)): train_label[n, training_data[n][0]] = 1 train_data[n, 0, :, :] = training_data[n][1] / 255.0 Te_label, Te_pixels = testing_data[0] test_data = np.zeros(shape=(len(testing_data), 1) + Te_pixels.shape) test_label = np.zeros(shape=(len(testing_data), 10)) for n in range(len(testing_data)): test_label[n, testing_data[n][0]] = 1 test_data[n, 0, :, :] = testing_data[n][1] / 255.0 # Downsample training data n_train_samples = 30000 train_idxs = np.random.random_integers(0, split - 1, n_train_samples) train_data = train_data[train_idxs, ...] train_label = train_label[train_idxs, ...] return train_data, train_label, test_data, test_label
def main(): lb, im = mnist.read(dataset="training") lbt, imt = mnist.read(dataset="testing") im = im.reshape((len(im),len(im[0])*len(im[0]))) imt = imt.reshape((len(imt),len(imt[0])*len(imt[0]))) im.astype(float) imt.astype(float) # validation set imv = im[:500] lbv = lb[:500] imtt = im[500:] lbtt = lb[500:] tag = np.zeros(len(lbv),dtype=int) if READ: sigma = [] mu = [] for i in range(10): ts = np.loadtxt('sigma'+str(i)+'.out') tm = np.loadtxt('mu'+str(i)+'.out') sigma.append(ts) mu.append(tm) sigma = np.array(sigma) mu = np.array(mu) sigma_inv = [] for i in range(len(sigma)): sigma_inv.append(np.linalg.inv(sigma[i])) sigma_inv = np.array(sigma_inv) else: data = zip(lbtt, imtt) datat = zip(lbv, imv) # group data into 10 groups data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])] sigma, mu = cov.cov(data_grouped, 10000) sigma = np.array(sigma) mu = np.array(mu) sigma_inv = [] for i in range(len(sigma)): sigma_inv.append(np.linalg.inv(sigma[i])) sigma_inv = np.array(sigma_inv) #print np.linalg.slogdet(sigma[0]) pi = class_prob(lb) cnt = 0 for imt_data in imv: p = -sys.maxint ind = -1 pj = class_gauss(imt_data, sigma, sigma_inv, mu) for j in range(len(pj)): temp = math.log(pi[j])+pj[j] if (p < temp): p = temp ind = j print lbv[cnt],':',ind if lbv[cnt] == ind: tag[cnt] = 1 cnt += 1 print 'hit:',sum(tag) print 'total:', len(tag) print 'miss rate:', 1.0-float(sum(tag))/len(tag)
def trainNetwork(self): training_set = mnist.read(path="./mnist") vectorized_training_set = mnist.read(path="./mnist") self.network.train(vectorized_training_set, epochs=self.epochs, batchsize=1, eta = self.eta / 60000) self.Status_Label.setText("Training finished!") self.StatusLamp.setGreen()
def main(): hidden_dim = 200 batch_size = 100 print("hidden_dim: " + str(hidden_dim) + ", batch_size: " + str(batch_size)) training_set = np.array( list(mnist.read(dataset="training", path="/home/ywang/mnist"))) print("size of the training set: " + str(len(training_set))) testing_set = np.array( list(mnist.read(dataset="testing", path="/home/ywang/mnist"))) print("size of the testing set: " + str(len(testing_set))) np.random.seed(0) p = 1.0 backstitch_alpha = 0.3 print("backstitch alpha: " + str(backstitch_alpha)) ratio = 0.5 training_subset = training_set[np.random.binomial(1, p, len(training_set)) == 1] print("number of the actual training examples: " + str(len(training_subset))) # resize the images to make the number of input features 4 times smaller #training_images = np.stack([training_subset[i][0] for i in range(len(training_subset))]) training_images = np.stack([ ndimage.zoom(training_subset[i][0], ratio) for i in range(len(training_subset)) ]) print("image size is: " + str(training_images.shape[1]) + " by " + str(training_images.shape[2])) training_images = np.reshape(training_images, [training_images.shape[0], -1]) training_labels = np.stack( [training_subset[i][1] for i in range(len(training_subset))]) training_examples = (training_images, training_labels) #testing_images = np.stack([testing_set[i][0] for i in range(len(testing_set))]) testing_images = np.stack([ ndimage.zoom(testing_set[i][0], ratio) for i in range(len(testing_set)) ]) testing_images = np.reshape(testing_images, [testing_images.shape[0], -1]) testing_labels = np.stack( [testing_set[i][1] for i in range(len(testing_set))]) testing_examples = (testing_images, testing_labels) nnet = NN(num_layers=1, input_dim=training_images.shape[1], hidden_dim=hidden_dim, num_classes=10, batch_size=batch_size, test_examples=testing_examples, nonlin='Tanh', update='natural', alpha=backstitch_alpha) nnet.Train(training_examples)
def test_numerical_gradient_checking(self): label, image = next(mnist.read()) ninput = [pixel / 255 for row in image for pixel in row] expected = [1 if i == label else 0 for i in range(10)] nnet = NeuralNetwork([784, 16, 16, 10]) epsilon = 1e-5 numgrad = [np.empty(wmatrix.shape) for wmatrix in nnet.weight] for k, wmatrix in enumerate(nnet.weight): for i, w in np.ndenumerate(wmatrix): wmatrix[i] = w - epsilon nnet.feedforward(ninput) a = nnet.get_error(expected) wmatrix[i] = w + epsilon nnet.feedforward(ninput) b = nnet.get_error(expected) numgrad[k][i] = (b - a) / 2 * epsilon wmatrix[i] = w error_gradient = nnet.get_error_gradient(expected) unit = lambda v: v / norm(v) if (v != 0).any() else np.zeros(v.shape) for k in range(len(nnet.weight)): ag = error_gradient[k] ng = numgrad[k] print(f"custom = {norm(unit(ag) - unit(ng))}") print( f"derived from cs231 = {norm(unit(ag) * norm(ng) - ng) / max(norm(ag), norm(ng))}" )
def learn_digits(self): train_digits = mnist.read("training") k_number = [] k_label = [] for i in range(5000): k = train_digits.next() k_label.append(k[0]) k_number.append(k[1]) y = np.array(list(k_label)) x = np.array(list(k_number)) print y[0] print x[0] # Now we prepare train_data and test_data. train = x[:5000].reshape(-1,784).astype(np.float32) # Create labels for train and test data k = np.arange(10) train_labels = y[:5000].astype(np.int) # Initiate kNN, train the data, then test it with test data for k=1 knn = cv2.KNearest() knn.train(train,train_labels) number = self.edit_image(self.snap()) number = number.reshape(-1, 784).astype(np.float32) nparray = np.array(number) ret2, result2, neighbours2, dist2 = knn.find_nearest(nparray,k=5) print result2
def main(): lb, im = mnist.read(dataset="training") lbt, imt = mnist.read(dataset="testing") tested = len(lbt) tag = np.zeros(tested,dtype=int) im = im.reshape((len(im),len(im[0])*len(im[0]))) imt = imt.reshape((len(imt),len(imt[0])*len(imt[0]))) #im = im[:100] #imt = imt[:100] im.astype(float) imt.astype(float) data = zip(lb, im) datat = zip(lbt, imt) # group data into 10 groups data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])] cov(data_grouped, 1.0)
def load_mnist(dataset_type, n_samples): images, raw_targets = read(range(10), dataset_type) images = images.reshape(-1, 784)[:n_samples] / 255.0 raw_targets = raw_targets[:n_samples].flatten() targets = numpy.zeros((n_samples, 10)) targets[(range(n_samples), raw_targets)] = 1.0 return images, targets
def learn_digits(self): train_digits = mnist.read("training") k_number = [] k_label = [] for i in range(5000): k = train_digits.next() k_label.append(k[0]) k_number.append(k[1]) y = np.array(list(k_label)) x = np.array(list(k_number)) print y[0] print x[0] # Now we prepare train_data and test_data. train = x[:5000].reshape(-1, 784).astype(np.float32) # Create labels for train and test data k = np.arange(10) train_labels = y[:5000].astype(np.int) # Initiate kNN, train the data, then test it with test data for k=1 knn = cv2.KNearest() knn.train(train, train_labels) number = self.edit_image(self.snap()) number = number.reshape(-1, 784).astype(np.float32) nparray = np.array(number) ret2, result2, neighbours2, dist2 = knn.find_nearest(nparray, k=5) print result2
def run_mnist(): # FIXME: running EM on MNIST has the problem that all data collapses to one class # This is because the likelihood for that class is slightly higher than all other. # Probably has to do with the variance being lower for one, form k-means, # and that being more important than closeness to mean for such high dimensional data? # Running it with 0 iterations (i.e. on k-means) work fine, then it finds different orientations of the digits. data_per_class = 20 training_data = list(mnist.read("training")) dim_x, dim_y = np.shape(training_data[0][1]) ones = [d[1] for d in training_data if d[0] == 1] fours = [d[1] for d in training_data if d[0] == 4] fives = [d[1] for d in training_data if d[0] == 5] ones = ones[:data_per_class] fours = fours[:data_per_class] fives = fives[:data_per_class] data = np.array(ones + fours + fives).reshape((-1, dim_x * dim_y)) solver = EM(data=data, num_classes=3, num_nuisances=3) split_data, thetas = solver.fit(max_iter=1) for c, class_thetas in enumerate(thetas): for n, theta in enumerate(class_thetas): print(f"Prior: {theta.prior}, Var: {theta.variance}") mnist.show(thetas[c][n].mean.reshape(28, 28))
def main(): lb, im = mnist.read(dataset="training") lbt, imt = mnist.read(dataset="testing") tested = len(lbt) if not KDTREE: tag = np.zeros(tested, dtype=int) im = im.reshape((len(im), len(im[0]) * len(im[0]))) imt = imt.reshape((len(imt), len(imt[0]) * len(imt[0]))) for i in range(tested): ind = knn_naive(imt[i], im, 10000) if lb[ind] == lbt[i]: tag[i] = 1 print "Good." else: tag[i] = 0 print "Oh no." print "Predicted", i, ":", lb[ind] print "Actual", i, ":", lbt[i] print "Hit:", np.sum(tag) print "Miss rate:", 1.0 - float(np.sum(tag)) / float(len(lbt)) else: im = im.reshape((len(im), len(im[0]) * len(im[0]))) imt = imt.reshape((len(imt), len(imt[0]) * len(imt[0]))) if not PS: im, lb = randomselector(im, lb, 40000) else: import ps im, lb = ps.prototypeselector(im, lb, 10000) # im,lb = prototypeselector(im,lb,1000) print "Fitting trainning set..." neigh = KNeighborsClassifier(n_neighbors=1) neigh.fit(im, lb) print "Done!" tag = np.zeros(tested, dtype=int) for i in range(tested): tmp = neigh.predict(imt[i]) if tmp[0] == lbt[i]: tag[i] = 1 print "Good." else: tag[i] = 0 print "Oh no." print "Predicted", i, ":", tmp print "Actual", i, ":", lbt[i] print "Hit:", np.sum(tag) print "Miss rate:", 1.0 - float(np.sum(tag)) / float(len(lbt))
def read_mnist(partial=False): logger = logging.getLogger("mnist") digits1 = [0, 1, 2, 3, 4] digits2 = [5, 6, 7, 8, 9] # Train on a subset m1 of digit1 and m2 of digit2 of the 60k training data. if partial: m1 = 5000 m2 = 5000 else: m1 = 60000 m2 = 60000 # read training data logger.info("Reading training data ...") images, labels = mnist.read(digits1 + digits2, dataset="training", path=os.path.join("examples", "data")) logger.info("done.") def extract(images, labels): images = images / 256.0 C1 = [k for k in xrange(len(labels)) if labels[k] in digits1] C2 = [k for k in xrange(len(labels)) if labels[k] in digits2] random.shuffle(C1) random.shuffle(C2) # Extract the random subsets together as a data matrix X (1 row per datapoint) train = C1[:m1] + C2[:m2] random.shuffle(train) X = array(images[train, :]) d = array([2 * (k in digits1) - 1 for k in labels[train]]) return (X, d) (X, d) = extract(images, labels) logger.info("Reading test data ...") timages, tlabels = mnist.read(digits1 + digits2, dataset="testing", path=os.path.join("examples", "data")) (Xt, dt) = extract(timages, tlabels) logger.info("done.") return (X, d, Xt, dt)
def evaluate(path='.'): params = train(path) images, labels = mnist.read(range(10), 'testing', path) c = 0 N = images.shape[0] for i in range(N): c += int(classify(images[i], params) == labels[i]) return c / float(N)
def evaluate(path='.'): params = train(path) images, labels = mnist.read(range(10), 'testing', path) c = 0 N = images.shape[0] for i in range(N): c += int(classify(images[i], params) == labels[i]) return c/float(N)
def train(path='.'): params = None for i in range(10): images, _ = mnist.read([i], 'training', path) size = images[0].shape if params is None: params = np.zeros((10, size[0], size[1])) params[i] = images.mean(axis=0) return params
def test_trained(params=None, head=100, tail=100): "Tests a network with params against first `head` and last `tail` examples" params = params if params is not None else load_params() nnet = NeuralNetwork(DLAYERS, params) mnist_db = list(mnist.read()) print("[KNOWN]") test_and_report_against(nnet, mnist_db[:head]) # Training dataset print("[UNKNOWN]") test_and_report_against(nnet, mnist_db[-tail:]) # Unknown dataset
def read_mnist(partial=False): logger = logging.getLogger("mnist") digits1 = [0,1,2,3,4] digits2 = [5,6,7,8,9] # Train on a subset m1 of digit1 and m2 of digit2 of the 60k training data. if partial: m1 = 5000; m2 = 5000 else: m1 = 60000; m2 = 60000 # read training data logger.info("Reading training data ...") images, labels = mnist.read(digits1 + digits2, dataset = "training", path = os.path.join("examples", "data")) logger.info("done.") def extract(images, labels): images = images / 256.0 C1 = [ k for k in xrange(len(labels)) if labels[k] in digits1 ] C2 = [ k for k in xrange(len(labels)) if labels[k] in digits2 ] random.shuffle(C1) random.shuffle(C2) # Extract the random subsets together as a data matrix X (1 row per datapoint) train = C1[:m1] + C2[:m2] random.shuffle(train) X = array(images[train,:]) d = array([ 2*(k in digits1) - 1 for k in labels[train] ]) return (X,d) (X,d) = extract(images, labels) logger.info("Reading test data ...") timages, tlabels = mnist.read(digits1 + digits2, dataset = "testing", path = os.path.join("examples", "data")) (Xt,dt) = extract(timages, tlabels) logger.info("done.") return (X, d, Xt, dt)
def load_mnist_all(dataset): #load all the data images, labels = mnist.read(digits, dataset=dataset, path="./mnist/") #turn cpxopt into numpy 2d array and a list images = np.array(images) labels = list(labels) return normalize_mnist(images), labels
def adaboost(): X_train, y_train = read('train') X_test, y_test = read('test') X_train = X_train.reshape((X_train.shape[0], -1)) X_test = X_test.reshape((X_test.shape[0], -1)) bdt_discrete = AdaBoostClassifier(DecisionTreeClassifier( max_depth=10, min_samples_split=20, min_samples_leaf=5), n_estimators=500, learning_rate=0.5, algorithm='SAMME') bdt_discrete.fit(X_train, y_train) discrete_test_errors = [] for discrete_train_predict in bdt_discrete.staged_predict(X_test): discrete_test_errors.append( 1. - accuracy_score(discrete_train_predict, y_test)) return bdt_discrete, discrete_test_errors
class mainWindow(): times = 1 timestart = time.clock() to_read = [1, 8] #[0,1,2,3,4,5,6,7,8,9] train, labels = mnist.read(to_read) data = imresize(train[0], (400, 500)) def __init__(self): self.root = Tkinter.Tk() self.frame = Tkinter.Frame(self.root, width=500, height=400) self.frame.pack() self.canvas = Tkinter.Canvas(self.frame, width=500, height=400) self.canvas.place(x=-2, y=-2) self.root.after(1, self.start) # INCREASE THE 0 TO SLOW IT DOWN self.root.mainloop() self.shakerato = False self.smokypixels = True self.max_rand = 255 to_read = [1, 8] #[0,1,2,3,4,5,6,7,8,9] self.train, self.labels = mnist.read(to_read) self.data = imresize(self.train[0], (400, 500)) def change_img(self): this_p = numpy.random.randint(len(self.labels)) self.train[this_p][self.labels[this_p] * (28 / 10):(self.labels[this_p] + 1) * (28 / 10), 0:3] = 255 self.data = imresize(numpy.fliplr(self.train[this_p]), (400, 500)) def start(self): self.im=Image.fromstring('L', (self.data.shape[1],\ self.data.shape[0]), self.data.astype('b').tostring()) self.photo = ImageTk.PhotoImage(image=self.im) self.canvas.create_image(0, 0, image=self.photo, anchor=Tkinter.NW) self.root.update() self.times += 1 #### HERE REGULATES TIME BETWEEN IMAGES if self.times % 25 == 0: #print "%.02f FPS"%(self.times/(time.clock()-self.timestart)) self.change_img() self.root.after(25, self.start) #if(self.shakerato == True): #self.data=numpy.roll(self.data,numpy.random.choice(([-1,1])),numpy.random.choice(([-1,1]))) #elif(self.smokypixels == True): ### THIS REGULATES PIXELS UPDATES tmp_data = self.data x, y = numpy.where(tmp_data <= 150) tmp_data[x, y] = 130 x, y = numpy.where(tmp_data > 133) #133 for i in range(len(x) * 2): this_p = numpy.random.randint(len(x)) self.data[x[this_p], y[this_p]] = numpy.random.randint(20)
def __init__(self): self.root = Tkinter.Tk() self.frame = Tkinter.Frame(self.root, width=500, height=400) self.frame.pack() self.canvas = Tkinter.Canvas(self.frame, width=500, height=400) self.canvas.place(x=-2, y=-2) self.root.after(1, self.start) # INCREASE THE 0 TO SLOW IT DOWN self.root.mainloop() self.shakerato = False self.smokypixels = True self.max_rand = 255 to_read = [1, 8] #[0,1,2,3,4,5,6,7,8,9] self.train, self.labels = mnist.read(to_read) self.data = imresize(self.train[0], (400, 500))
def save_mnist(dataset, dst): label_file = open(Path(dst, "label.txt"), "w") generator = read(dataset=dataset, path=mnist_source) count = [0 for i in range(10)] for label, image in generator: filename = "{}_{:04d}.png".format(label, count[label]) label_file.write("{},{}\n".format(filename, label)) imwrite(str(Path(dst, filename)), image) count[label] += 1 label_file.close()
def __init__(self): self.root = Tkinter.Tk() self.frame = Tkinter.Frame(self.root, width=500, height=400) self.frame.pack() self.canvas = Tkinter.Canvas(self.frame, width=500,height=400) self.canvas.place(x=-2,y=-2) self.root.after(1,self.start) # INCREASE THE 0 TO SLOW IT DOWN self.root.mainloop() self.shakerato = False self.smokypixels = True self.max_rand = 255 to_read = [1,8]#[0,1,2,3,4,5,6,7,8,9] self.train , self.labels = mnist.read(to_read) self.data = imresize(self.train[0], (400,500))
def backpropagation_main(): label, image = next(mnist.read()) ninput = [pixel / 255 for row in image for pixel in row] expected = [1 if i == label else 0 for i in range(10)] nnet = NeuralNetwork(DLAYERS, params=None) # nnet = NeuralNetwork(DLAYERS, params=load_params()) for i in range(1000000000000): guess = nnet.feedforward(ninput) cost = nnet.get_error(expected) print(f"[{i + 1}] cost = {cost}, guess = {guess}") try: nnet.backpropagate(expected) except KeyboardInterrupt: break guess = nnet.feedforward(ninput) cost = nnet.get_error(expected) print(f"[{i + 1}] cost = {cost}") save_params(nnet.params)
def getTrainData(count, digit): #count is the number of training data #digit is the specific nimber between 0 and 9 from training dataset trainingData = np.zeros((count, 400), int) i = 0 for img in read("training"): if i < count: if img[0] == digit: #This is the label row, col = img[1].shape im = img[1] im = im.reshape(28, 28) im2 = cv2.resize(im, (20, 20), interpolation=cv2.INTER_CUBIC) im2 = im2.reshape(1, 400) trainingData[i, :] = im2 i += 1 trainingData = (trainingData > 125).astype(int) return trainingData
def load_mnist(dataset, digitA, digitB): #use only the data points with labels A or B images, labels = mnist.read(digits, dataset=dataset, path="./mnist/") #turn cpxopt into numpy 2d array and a list images = np.array(images) labels = list(labels) new_images = [] new_labels = [] for i, label in enumerate(labels): if label == digitA or label == digitB: new_images.append(images[i,:]) if label == digitA: new_labels.append(1.0) if label == digitB: new_labels.append(-1.0) return normalize_mnist(np.array(new_images)), new_labels
import numpy as np import mnist # Load the raw MNIST X_train, y_train = mnist.read(dataset='training') X_test, y_test = mnist.read(dataset='testing') # Subsample the data for more efficient code execution in this exercise num_training = 6000 X_train = X_train[:num_training] y_train = y_train[:num_training] num_test = 500 X_test = X_test[:num_test] y_test = y_test[:num_test] # Reshape the image data into rows # Datatype int allows you to subtract images (is otherwise uint8) X_train = np.reshape(X_train, (X_train.shape[0], -1)).astype('int') X_test = np.reshape(X_test, (X_test.shape[0], -1)).astype('int') # As a sanity check, we print out the size of the training and test data. print('Training data shape: ', X_train.shape) print('Training labels shape: ', y_train.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) ############################################################################### # # # Implement the k Nearest Neighbors algorithm here #
# Do we have a fancy GPU? device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Parameters num_epochs = 100 num_classes = 10 # i.e. the digits batch_size = 300 # How many samples per batch use learning_rate = 0.001 # Load MNIST data into X_train = [] X_test = [] Y_train = [] Y_test = [] for element in m.read("training"): Y_train.append(element[0]) X_train.append(element[1]) for element in m.read("testing"): Y_test.append(element[0]) X_test.append(element[1]) X_train = np.expand_dims(np.asarray(X_train, dtype=np.float32), axis=1) X_test = np.expand_dims(np.asarray(X_test, dtype=np.float32), axis=1) Y_train = np.asarray(Y_train, dtype=np.int64) Y_test = np.asarray(Y_test, dtype=np.int64) train_set = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(Y_train)) test_set = torch.utils.data.TensorDataset(torch.from_numpy(X_test),
from mnist import read, show, normalize import numpy as np import matplotlib.pyplot as plt train = list(read('train')) data = train[0] label = data[0] pixels = data[1:] pixels = np.array(pixels, dtype='uint8') pixels = pixels.reshape((28, 28)) plt.title('Example of MNIST pattern') plt.imshow(pixels, cmap='gray') plt.show()
train_obj = softmaxObjective(train_x, train_y, theta) print('Iteration {}: Training set objective function: {} \r'.format( ct, train_obj)) theta += -(alpha * gradient) assert theta.shape == (n, 10) ct += 1 print "" print("Training set accuracy: {0:0.2f}%".format( 100 * softmax_classifier_acuracy(theta, train_x, train_y))) print("Testing set accuracy: {0:0.2f}%".format( 100 * softmax_classifier_acuracy(theta, test_x, test_y))) cwd = os.getcwd() # "training" or "testing" train_images = read("training", cwd) n = 28 * 28 m = 1000 #No of training sets, max: 60000 train_y = np.empty([m, 1]) train_x = np.empty([m, n]) i = 0 for image in train_images: if i >= m: break train_x[i, :] = np.reshape(image[1], (1, n)) train_y[i] = image[0] i += 1 train_x = train_x[:i, :] train_y = train_y[:i] mean_x = np.mean(train_x, axis=0) std_x = np.std(train_x, axis=0) + 0.1
""" Implements a Neural Network """ from vectorflux import VectorFlux from mnist import read, show, normalize from vectorflux.layers import Dense from vectorflux.layers.Dropout import Dropout train = list(read('train')) test = list(read('test')) print("Train size: {}".format(len(train))) print("Test size: {}".format(len(test))) # Normalization for values test_x, test_y = normalize(test) train_x, train_y = normalize(train) vf = VectorFlux() vf.add(Dense(800, activation='sigmoid', input_shape=784, optimizer='Momentum')) vf.add(Dropout(0.5, input_shape=800)) vf.add(Dense(800, activation='sigmoid', input_shape=800, optimizer='ADAM')) vf.add(Dense(10, activation='sigmoid', input_shape=800)) vf.train(x_train=train_x, y_train=train_y, x_test=test_x, y_test=test_y, epochs=100000,
def test(self): images, labels = mnist.read('testing') test(images, labels)
def main(): lbt, imt = mnist.read(dataset="testing") print lbt[4] mnist.show(imt[4])
class GANeuralNetwork(Subject, NeuralNetwork): __mnist_db = list(mnist.read()) _genome: List[float] _fitness: float def __init__(self, params): """ Precondition: use set_layers_description() before any instanciation so dlayers is initialized """ super().__init__(GANeuralNetwork.dlayers, params) self._genome = params self._fitness = None @classmethod def create_random(cls): return GANeuralNetwork(cls.get_random_params()) @property def genome(self) -> List[float]: return self._genome @property def fitness(self) -> float: return self.batch_cost() if not self._fitness else self._fitness def batch_cost(self, batch_size=10, random_samples=False): "Runs a random minibatch and returns average network cost" costs = [None] * batch_size db = ( sample(GANeuralNetwork.__mnist_db, batch_size) if random_samples else GANeuralNetwork.__mnist_db[:batch_size] ) for i, (label, image) in enumerate(db): # TODO: parallelize runs # Run network ninput = [pixel / 255 for row in image for pixel in row] # Normalized guess = self(self.genome, ninput) # Cost calculation expected = [1 if i == label else 0 for i in range(10)] costs[i] = sum((g - e)**2 for g, e in zip(guess, expected)) cost = mean(costs) self._fitness = -cost # print(f"Average cost of {cost} after {batch_size} runs") return self._fitness # TODO: Think more about this and make it # Maybe a urand in [c +- d] range with c = (min + max) / 2, d = max - min @staticmethod def mutate(gen): return gen + randn() @classmethod def set_layers_description(cls, dlayers): """ Override of NeuralNetwork method that makes it static dlayers will be used as a static attribute of GANeuralNetwork class """ cls.dlayers = dlayers @classmethod def get_random_params(cls): return super().get_random_params(cls)
def train(self): images, labels = mnist.read('training') train(images, labels)
@author: Bruce """ import os import mnist, svmcmpl, cvxopt, random digits1 = [ 0 ] digits2 = [ 1 ] m1 = 4000; m2 = 4000 # read training data dpath = os.getcwd()+"\\data\\mnist" print( dpath) images, labels = mnist.read(digits1 + digits2, dataset = "training", path = dpath ) images = images / 256. C1 = [ k for k in range(len(labels)) if labels[k] in digits1 ] C2 = [ k for k in range(len(labels)) if labels[k] in digits2 ] random.seed() random.shuffle(C1) random.shuffle(C2) train = C1[:m1] + C2[:m2] random.shuffle(train) X = images[train,:] d = cvxopt.matrix([ 2*(k in digits1) - 1 for k in labels[train] ]) gamma = 4.0
import sys import numpy as np import scipy.misc import imageio sys.path.append("modules") import mnist sys.path.append("extension") import pyceptron training_set = mnist.read(dataset="training", path="mnist") testing_set = mnist.read(dataset="testing", path="mnist") architecture = [784, 300, 10] activation = "ReLU" softmax = 1 network = pyceptron.Network(architecture, activation=activation, softmax=softmax) network.load_state("states/784_300_10_sgd_ReLU_softmax_229.state") bad_lbl = list() bad_img = list() print(len(testing_set)) # Calculate the training error rate for i in range(len(testing_set)): sample = testing_set[i] prediction = network.predict(sample[0]) prediction = np.argmax(prediction) expectation = np.argmax(sample[1]) if not prediction == expectation:
import mnist import Image import os from pylab import imread, imshow, gray, mean from numpy import array from cvxopt import matrix from scipy.misc import imsave alphabets = ['a', 'b', 'c', 'A'] # read all the training data and labels for i in alphabets: print 'Reading char=' + str(i) + ', ord=' + str(ord(i)) images, labels = mnist.read([ord(i)]) for j in range(0, images.size[0]): img_matrix_1d = images[j,:] img_matrix = matrix(img_matrix_1d, (28,28)).trans() img_array = array(img_matrix) savePath = './raw/training/'+str(ord(i))+'/' if not os.path.exists(savePath): os.makedirs(savePath) imsave(savePath + str(j)+'.png', img_array)
from scipy.misc import imsave from sklearn import svm, metrics from sklearn.cross_validation import LeaveOneOut from sklearn.externals import joblib from sklearn.multiclass import OneVsRestClassifier import prepare_data import scipy from numpy import vstack from numpy import append import os # train the digits 0 alphabets =[] #['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] #'a', 'd', 'e', 'c', 'u', 's', 'o', 'f', 't'] alphabets_ord = map(ord, alphabets) ignoreList = ['0','1', '2', '3', '4', '5', '6', '7', '8', '9'] ignoreList = map(ord, ignoreList) images, labels = mnist.read(alphabets_ord, ignoreList = ignoreList) images = array(images) labels = array(labels).reshape(1, len(labels))[0] fonts = ['nothingyoucoulddo', 'comicsans', 'dakota', 'showhands', 'danielbd', 'danielbk', 'dandelion', 'daniel' ] for font in fonts: font_images, font_labels = mnist.read(alphabets_ord, './../../data/', font + '_img.idx', font + '_label.idx', ignoreList=ignoreList) font_images = array(font_images) font_labels = array(font_labels).reshape(1, len(font_labels))[0] images = vstack((images, font_images)) labels = append(labels, font_labels) ''' images, labels = mnist.read(alphabets_ord, './../../data/', 'comicsans_img.idx', 'comicsans_label.idx')
def __init__(self, config, phase='train'): self.config = config self.imgs, self.lbls = mnist.read(dataset=phase, path=self.config.mnist_dir)
import os import mnist from cv2 import imwrite src = "raw_mnist" def initialize_folders(): for i in range(10): folder_name = os.path.join(src, str(i)) os.makedirs(folder_name, exist_ok=True) if __name__ == "__main__": initialize_folders() gen = mnist.read() counter = 0 while True: label, img = next(gen) dir_name = os.path.join(src, str(label)) filename = os.path.join(src, str(label), "{}_{:04d}.png".format(label, len(os.listdir(dir_name)))) imwrite(filename, img) counter+=1 if counter%1000 == 0: print(counter)
def main(): f = 0.05 th = 6.27344414456 #f = 0.1 #th = 9.25216525759 #f = 0.15 #th = 12.6067380436 #f = 0.2 #th = 14.6211445211 lb, im = mnist.read(dataset="training") lbt, imt = mnist.read(dataset="testing") tested = len(lbt) tag = np.zeros(tested,dtype=int) im = im.reshape((len(im),len(im[0])*len(im[0]))) imt = imt.reshape((len(imt),len(imt[0])*len(imt[0]))) if READ: sigma = [] mu = [] for i in range(10): ts = np.loadtxt('sigma'+str(i)+'.out') tm = np.loadtxt('mu'+str(i)+'.out') sigma.append(ts) mu.append(tm) sigma = np.array(sigma) mu = np.array(mu) sigma_inv = [] for i in range(len(sigma)): sigma_inv.append(np.linalg.inv(sigma[i])) sigma_inv = np.array(sigma_inv) else: data = zip(lb, im) datat = zip(lbt, imt) # group data into 10 groups data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])] sigma, mu = cov.cov(data_grouped, 10000) sigma = np.array(sigma) mu = np.array(mu) sigma_inv = [] for i in range(len(sigma)): sigma_inv.append(np.linalg.inv(sigma[i])) sigma_inv = np.array(sigma_inv) #print np.linalg.slogdet(sigma[0]) pi = class_prob(lb) cnt = 0 ab = 0 for imt_data in imt: p = -sys.maxint p2 = -sys.maxint ind = -1 ind2 = -1 pj = class_gauss(imt_data, sigma, sigma_inv, mu) for j in range(len(pj)): temp = math.log(pi[j])+pj[j] if (p < temp): p = temp ind = j elif (p2 < temp): p2 = temp ind2 = j if (p-p2) < th: ab += 1 print lbt[cnt],':',ind, ':', p, '(abstain!)' else: print lbt[cnt],':',ind, ':', p if lbt[cnt] == ind: tag[cnt] = 1 cnt += 1 print 'hit:',sum(tag) print 'total:', len(tag) print 'abstain:', ab print 'miss rate:', 1.0-float(sum(tag))/(len(tag)-ab)
import argparse import mnist import naive_bayes as nb import numpy as np parser = argparse.ArgumentParser(description='It is a program for ML HW#2.') parser.add_argument('train_img_path', help='file path of train img', type=str) parser.add_argument('train_lbl_path', help='file path of train lbl', type=str) parser.add_argument('test_img_path', help='file path of test img', type=str) parser.add_argument('test_lbl_path', help='file path of test lbl', type=str) parser.add_argument('mode', help='toggle option', type=int) args = parser.parse_args() print('train_img_path: {}'.format(args.train_img_path)) print('train_lbl_path: {}'.format(args.train_lbl_path)) print('test_img_path: {}'.format(args.test_img_path)) print('test_img_path: {}'.format(args.test_lbl_path)) train_img, train_lbl = mnist.read(args.train_img_path, args.train_lbl_path) test_img, test_lbl = mnist.read(args.test_img_path, args.test_lbl_path) print(train_img.shape) nb.classify(train_img, train_lbl, test_img, test_lbl, args.mode)
# Dense data #y, x = [1,-1], [[1,0,1], [-1,0,-1]] # Sparse data y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}] prob = svm_problem(y, x) param = svm_parameter('-t 0 -c 4 -b 1') m = svm_train(prob, param) ''' SUBSAMPLE = 10000 digits = [0,1,2,3,4,5,6,7,8,9] DATAPATH = "./data/mnist/" train_images, train_labels = mnist.read(digits, dataset='training', path=DATAPATH) x = np.array(train_images).tolist() #svm requires a list y = np.array(train_labels).T.astype(float).tolist()[0] pprint(len(x)) x = x[:SUBSAMPLE] y = y[:SUBSAMPLE] #pprint(x) #pprint(y) #pprint(x[0]) prob = svm_problem(y, x)
return fin if __name__ == '__main__': config = Config() n_gpu = pytorch_utils.setgpu('6') net = ResNet_CAM() checkpoint = torch.load(root_path + 'checkpoints/020.ckpt') # must before cuda net.load_state_dict(checkpoint['state_dict']) net = net.cuda() cudnn.benchmark = True net = DataParallel(net) net.eval() imgs, lbls = mnist.read(dataset='test', path=config.mnist_dir) idcs = np.random.randint(0, len(lbls), size=(3, )) img_pad = np.zeros([3, config.img_size, config.img_size], dtype=np.float32) classes = [] for idx in idcs: img = imgs[idx].astype(np.float32) ih, iw = img.shape img = (img / 255. - config.mnist_mean) / config.mnist_std x, y = np.random.randint(0, config.img_size - ih, (2, ), dtype=np.int16) for k in range(3): img_pad[k, y:y + ih, x:x + iw] = img classes.append(lbls[idx]) data = np.expand_dims(img_pad, 0).copy() data = torch.from_numpy(data)
import mnist import numpy as np import matplotlib.pyplot as plt plt.figure() images = mnist.read() for i in range(16): plt.subplot(4,4,i+1) im = images.next()[1] fig = plt.imshow(1 - im/128, cmap='Greys') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.savefig('ex.png', bbox_inches='tight', pad_inches=0, dpi=300) #fig.show()
import mnist, Image, sys from pylab import imread, imshow, gray, mean from numpy import array from cvxopt import matrix from pprint import pprint from scipy.misc import imsave from sklearn.metrics import classification_report, precision_score, recall_score from sklearn.cross_validation import LeaveOneOut, StratifiedKFold from sklearn.grid_search import GridSearchCV from sklearn.svm import SVC, LinearSVC # train the digits 0 alphabets = [] # ['a', 'b', 'c', 'd', 'e', 'A', 'B', 'C', 'D', 'E'] alphabets_ord = map(ord, alphabets) images, labels = mnist.read(alphabets_ord) images = array(images) labels = array(labels).reshape(1, len(labels))[0] n_samples = len(images) # read the training data and labelsi sys.stdout.write('Reading in training data and labels') X = images y = labels sys.stdout.write(' ... Done!\n') # split the data into two equal parts respecting label proprtions train, test = iter(StratifiedKFold(y, 2)).next() ################################################# # set the tuning parameters
from keras.models import Sequential from keras.layers import Dense import keras import numpy as np import mnist import time inp = 784 img,lbl = mnist.read(dataset = "training", path = "") img = img[:1000] lbl = lbl[:1000] x=[] for u in img: v = [] for l in u: v.extend(l/255.0) x.append(v) x = np.array(x) y = [] for a in lbl: v = [0.0]*10 v[a] = 1.0 y.append(v) y = np.array(y) model = Sequential() model.add(Dense(100, input_dim=inp, activation='sigmoid'))
import mnist, Image, sys from pylab import imread, imshow, gray, mean from numpy import array from cvxopt import matrix from scipy.misc import imsave from sklearn import svm, metrics # train the digits 0 digit = [1,2,3,4,5] # read the training data and labelsi sys.stdout.write('Reading in training data and labels') train_images, train_labels = mnist.read(digit, "training", "./../../data") sys.stdout.write(' ... Done!\n') # read the test data and labels sys.stdout.write('Reading in testing data and labels') test_images, test_labels = mnist.read(digit, "testing", "./../../data") sys.stdout.write(' ... Done!\n') # convert to arrays x_train = array(train_images) y_train = array(train_labels).reshape(1, len(train_labels))[0] x_test = array(test_images) y_test = array(test_labels).reshape(1, len(test_labels))[0] # create classifier classifier = svm.LinearSVC() # train the classifier
import mnist import Image from pylab import imread, imshow, gray, mean from numpy import array from cvxopt import matrix from scipy.misc import imsave # read all the training data and labels images, labels = mnist.read([0]) # get the first row of pixel dataq img0_matrix_1d = images[0,:] img0_matrix = matrix(img0_matrix_1d, (28,28)) img0_array = array(img0_matrix) # save the image imsave('./raw/mnist_training_digit_4_data_0.png', img0_array)