def test_log_regression(stress=False): import classifier clf = classifier.Classifier() if stress: np.random.seed(133) k=10 images = util.loadTrainImages()[100:200] clf.maxIter = 100 thetaFile = 'grading_data/log_reg_stress.npy' else: np.random.seed(142) k = 5 images = util.loadTrainImages()[:100] clf.maxIter = 5 thetaFile = 'grading_data/log_reg_simple.npy' clf.train(images,k) studentTheta = clf.theta.squeeze() fid = open(thetaFile,'r') theta = pickle.load(fid) fid.close() if studentTheta.shape!=theta.shape: return False,"Dimension mismatch" if np.abs(np.sqrt(np.sum(theta**2)) - np.sqrt(np.sum(studentTheta**2))) > 1e-3: return False,"Parameter vector mismatch" return True,"Passed!"
def test_log_regression(): np.random.seed(33) print "Testing implementation of logistic regression..." import classifier k = 5 images = util.loadTrainImages()[:100] clf = classifier.Classifier() clf.maxIter = 5 clf.train(images, k) fid = open('data/log_reg_test.npy', 'r') theta = pickle.load(fid) fid.close() studentTheta = clf.theta.squeeze() assert studentTheta.shape == theta.shape, "Dimension mismatch" if np.abs(np.sum(theta) - np.sum(studentTheta)) > 1e-3: print "Parameter vector mismatch, test failed" return if np.abs(np.sqrt(np.sum(theta**2)) - np.sqrt(np.sum(studentTheta**2))) > 1e-3: print "Parameter vector mismatch, test failed" return print "Logistic regression test passed."
def run(self): """ Function: Run ------------- This function will evaluate your solution! You do not need to write any code in this file, however you SHOULD understand this function! """ print "Running the full pipeline!" K = 25 trainImages = util.loadTrainImages()[:1000] testImages = util.loadTestImages() classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'Testing...........' testPredictions = classifier.test(testImages) testAccuracy = self.evaluate(testPredictions, testImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy') self.reportAccuracy(testAccuracy, 'Test Accuracy')
def test_kmeans(): np.random.seed(33) print "Testing K-Means implementation..." import featureLearner as fl k = 5 learner = fl.FeatureLearner(k) learner.maxIter = 10 tr = util.loadTrainImages()[:100] learner.runKmeans(tr) # check the basics assert isinstance(learner.centroids,np.ndarray),"centroids should be stored in numpy array" assert len(learner.centroids.shape) == 2, "centroids array should be 2-D" assert learner.centroids.shape[0] == util.patch_dim**2, "Size of centroids not correct" assert learner.centroids.shape[1] == k,"Number of centroids not correct" # load test centroids testDat = open('data/kmeans_test.npy','r') centroids = pickle.load(testDat) testDat.close() # check that they are the same diff = np.sum((centroids.reshape([-1])-learner.centroids.reshape([-1]))**2) if diff > 1e-5: print "Somethings wrong, your centroids don't match the test centroids" else: print "K-means test passed"
def run(grad,view,pixels,maxIter,numTrain): k = 25 maxIter_kMeans = 20 trainUImages = util.loadTrainImages()[:numTrain] trainSImages = trainUImages[:500] testImages = util.loadTestImages() if pixels is False: # Compile all patches into one big 2-D array (patchSize x numPatches) patches = np.hstack([np.array(image.getPatches()).transpose() for image in trainUImages]) print "Training K-means using %d images"%numTrain centroids = submission.runKMeans(k,patches,maxIter_kMeans) trainX,trainY = util.kMeansFeatures(trainSImages,centroids,submission.extractFeatures) testX, testY = util.kMeansFeatures(testImages,centroids,submission.extractFeatures) if view: util.viewPatches(centroids) else: maxIter = 100 trainX,trainY = util.pixelFeatures(trainSImages) testX,testY = util.pixelFeatures(testImages) clf = util.Classifier(maxIter=maxIter,alpha=5e-5,gradient=grad) clf.train(trainX,trainY) predictions = clf.test(trainX) acc = np.sum(trainY==predictions)/float(trainY.size) print "Train accuracy is %f"%acc predictions = clf.test(testX) acc = np.sum(testY==predictions)/float(testY.size) print "Test accuracy is %f"%acc
def run(self): """ Function: Run ------------- This function will evaluate your solution! You do not need to write any code in this file, however you SHOULD understand this function! """ print "Running the full pipeline!" K=25 trainImages = util.loadTrainImages()[:1000] testImages = util.loadTestImages() classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'Testing...........' testPredictions = classifier.test(testImages) testAccuracy = self.evaluate(testPredictions, testImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy') self.reportAccuracy(testAccuracy, 'Test Accuracy')
def test_feature_extraction(): np.random.seed(33) print "Testing implementation of feature extraction..." import featureLearner as fl k = 5 learner = fl.FeatureLearner(k) learner.trained = True image = util.loadTrainImages()[33] # load test centroids and features testDat = open('data/kmeans_test.npy','r') centroids = pickle.load(testDat) testDat.close() testDat = open('data/features_test.npy','r') features = pickle.load(testDat) testDat.close() learner.centroids = centroids studentFeats = learner.extractFeatures(image) assert isinstance(studentFeats,np.ndarray),"Features should be in an numpy array" assert studentFeats.shape==features.shape,"Dimension mismatch" studentFeatsList = studentFeats.tolist() if np.abs(np.sum(features)-np.sum(studentFeats)) > 1e-3: print "Feature mismatch, test failed" return if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3: print "Feature mismatch, test failed" return print "Feature extraction test passed"
def test_log_regression(): np.random.seed(33) print "Testing implementation of logistic regression..." import classifier k = 5 images = util.loadTrainImages()[:100] clf = classifier.Classifier() clf.maxIter = 5 clf.train(images,k) fid = open('data/log_reg_test.npy','r') theta = pickle.load(fid) fid.close() studentTheta = clf.theta.squeeze() assert studentTheta.shape==theta.shape,"Dimension mismatch" if np.abs(np.sum(theta)-np.sum(studentTheta)) > 1e-3: print "Parameter vector mismatch, test failed" return if np.abs(np.sqrt(np.sum(theta**2)) - np.sqrt(np.sum(studentTheta**2))) > 1e-3: print "Parameter vector mismatch, test failed" return print "Logistic regression test passed."
def test_kmeans(): np.random.seed(33) print "Testing K-Means implementation..." import featureLearner as fl k = 5 learner = fl.FeatureLearner(k) learner.maxIter = 10 tr = util.loadTrainImages()[:100] learner.runKmeans(tr) # check the basics assert isinstance(learner.centroids, np.ndarray), "centroids should be stored in numpy array" assert len(learner.centroids.shape) == 2, "centroids array should be 2-D" assert learner.centroids.shape[ 0] == util.patch_dim**2, "Size of centroids not correct" assert learner.centroids.shape[1] == k, "Number of centroids not correct" # load test centroids testDat = open('data/kmeans_test.npy', 'r') centroids = pickle.load(testDat) testDat.close() # check that they are the same diff = np.sum( (centroids.reshape([-1]) - learner.centroids.reshape([-1]))**2) if diff > 1e-5: print "Somethings wrong, your centroids don't match the test centroids" else: print "K-means test passed"
def run(grad,view,pixels,maxIter,numTrain): k = 25 maxIter_kMeans = 20 trainUImages = util.loadTrainImages()[:numTrain] trainSImages = trainUImages[:500] testImages = util.loadTestImages() if pixels is False: # Compile all patches into one big 2-D array (patchSize x numPatches) patches = np.hstack([np.array(image.getPatches()).transpose() for image in trainUImages]) print("Training K-means using %d images..." % numTrain) centroids = submission.runKMeans(k,patches,maxIter_kMeans) print("Finished running K-means") if view: util.viewPatches(centroids) trainX,trainY = util.kMeansFeatures(trainSImages,centroids,submission.extractFeatures) testX, testY = util.kMeansFeatures(testImages,centroids,submission.extractFeatures) else: maxIter = 100 trainX,trainY = util.pixelFeatures(trainSImages) testX,testY = util.pixelFeatures(testImages) clf = util.Classifier(maxIter=maxIter,alpha=5e-5,gradient=grad) print("Training supervised classifier with %d images..."%len(trainSImages)) clf.train(trainX,trainY) predictions = clf.test(trainX) acc = np.sum(trainY==predictions)/float(trainY.size) print("Train accuracy is %f"%acc) predictions = clf.test(testX) acc = np.sum(testY==predictions)/float(testY.size) print("Test accuracy is %f"%acc)
def test_kmeans(stress=False): import studentLearner as fl if stress: np.random.seed(133) k = 10 learner = fl.FeatureLearner(k) learner.maxIter = 20 testfile = 'grading_data/kmeans_stress.npy' tr = util.loadTrainImages()[100:200] else: np.random.seed(142) k = 5 learner = fl.FeatureLearner(k) learner.maxIter = 10 testfile = 'grading_data/kmeans_simple.npy' tr = util.loadTrainImages()[:100] # load test centroids testDat = open(testfile,'r') centroids = pickle.load(testDat) testDat.close() learner.runKmeans(tr) # check the basics if not isinstance(learner.centroids,np.ndarray): return False,"Centroids should be stored in numpy array" if len(learner.centroids.shape) != 2: return False,"centroids array should be 2D" if learner.centroids.shape[0] != util.patch_dim**2: return False,"Size of centroids not correct" if learner.centroids.shape[1] != k: return False,"Number of centroids not correct" if not np.all(~np.isnan(learner.centroids)): return False,"NaNs detected in centroids" # check that they are the same diff = np.sum((centroids.reshape([-1])-learner.centroids.reshape([-1]))**2) if diff > 1e-5: return False,"Centroid mismatch" return True,"Passed!"
def kmeans_only(view=False): print "Running only K-Means..." import featureLearner as fl k = 25 learner = fl.FeatureLearner(k) tr = util.loadTrainImages()[:1000] learner.runKmeans(tr) if view: util.viewPatches(learner.centroids[:,:20])
def kmeans_only(view=False): print "Running only K-Means..." import featureLearner as fl k = 25 learner = fl.FeatureLearner(k) tr = util.loadTrainImages()[:1000] learner.runKmeans(tr) if view: util.viewPatches(learner.centroids)
def test_feature_extraction(stress=False): import studentLearner as fl if stress: np.random.seed(133) k = 10 testCentroidFile='grading_data/kmeans_stress.npy' testFeaturesFile='grading_data/features_stress.npy' images = util.loadTrainImages()[500:600] else: np.random.seed(142) k = 5 testCentroidFile='grading_data/kmeans_simple.npy' testFeaturesFile='grading_data/features_simple.npy' images = [util.loadTrainImages()[99]] learner = fl.FeatureLearner(k) learner.trained = True # load test centroids and features testDat = open(testCentroidFile,'r') centroids = pickle.load(testDat) testDat.close() testDat = open(testFeaturesFile,'r') features = pickle.load(testDat) testDat.close() learner.centroids = centroids for i in range(len(images)): studentFeats = learner.extractFeatures(images[i]).squeeze() if not isinstance(studentFeats,np.ndarray): return False,"Features should be in a numpy array" if studentFeats.shape!=features[:,i].squeeze().shape: return False,"Dimension mismatch" if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3: return False,"Feature mismatch" return True,"Passed!"
def runDev(self): print "Running in development mode" K=5 trainImages = util.loadTrainImages()[:100] testImages = util.loadTestImages()[:100] classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy')
def test_predictions(dummy=True): np.random.seed(133) import classifier clf = classifier.Classifier() images = util.loadTrainImages()[100:200] clf.maxIter = 1 k = 10 clf.train(images,k) predfile = 'grading_data/predictions.npy' fid = open('grading_data/log_reg_stress.npy','r') theta = pickle.load(fid) fid.close() clf.theta = theta studentPreds = clf.test(images) fid = open(predfile,'r') preds = pickle.load(fid) fid.close() preds = np.array(preds,dtype=np.int32) studentPreds = np.array(studentPreds,dtype=np.int32) if studentPreds.size!=preds.size: return False,"Wrong number of predictions." if np.sum(preds==studentPreds)!=preds.size: # try again with intercept at end just in case fid = open('grading_data/log_reg_stress_int_end.npy','r') theta = pickle.load(fid) fid.close() clf.theta = theta studentPreds = np.array(clf.test(images),dtype=np.int32) fid = open('grading_data/predictions_int_end.npy','r') preds = np.array(pickle.load(fid),dtype=np.int32) fid.close() if np.sum(preds==studentPreds)==preds.size: return True,"Passed!" return False,"Prediction mismatch." return True,"Passed!"
def run(self): print "Running the full pipeline!" K=25 trainImages = util.loadTrainImages()[:1000] testImages = util.loadTestImages() classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'Testing...........' testPredictions = classifier.test(testImages) testAccuracy = self.evaluate(testPredictions, testImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy') self.reportAccuracy(testAccuracy, 'Test Accuracy')
def runDev(self): """ Function: runDev ------------- This function will run the full pipeline in development mode. I.e. it will use only 10 centroids and 100 images. """ print "Running in development mode" K=5 trainImages = util.loadTrainImages()[:100] testImages = util.loadTestImages()[:100] classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy')
def runDev(self): """ Function: runDev ------------- This function will run the full pipeline in development mode. I.e. it will use only 10 centroids and 100 images. """ print "Running in development mode" K = 5 trainImages = util.loadTrainImages()[:100] testImages = util.loadTestImages()[:100] classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy')
def test_feature_extraction(): np.random.seed(33) print "Testing implementation of feature extraction..." import featureLearner as fl k = 5 learner = fl.FeatureLearner(k) learner.trained = True image = util.loadTrainImages()[33] # load test centroids and features testDat = open('data/kmeans_test.npy', 'r') centroids = pickle.load(testDat) testDat.close() testDat = open('data/features_test.npy', 'r') features = pickle.load(testDat) testDat.close() learner.centroids = centroids studentFeats = learner.extractFeatures(image).squeeze() assert isinstance(studentFeats, np.ndarray), "Features should be in a numpy array" assert studentFeats.shape == features.shape, "Dimension mismatch" if np.abs(np.sum(features) - np.sum(studentFeats)) > 1e-3: print "Feature mismatch, test failed" return if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3: print "Feature mismatch, test failed" return print "Feature extraction test passed"
def test_log_regression(): np.random.seed(33) print "Testing implementation of logistic regression..." import classifier k = 5 images = util.loadTrainImages()[:100] clf = classifier.Classifier() clf.maxIter = 5 clf.train(images,k) fid = open('data/log_reg_test.npy','r') theta = pickle.load(fid) fid.close() assert clf.theta.shape==theta.shape,"Dimension mismatch" diff = np.sum((clf.theta-theta)**2) print "diff: " + str(diff) if diff > 1e-5: print "Parameter vector mismatch, test failed." else: print "Logistic regression test passed."