예제 #1
0
def test_log_regression(stress=False):

    import classifier
    clf = classifier.Classifier()

    if stress:
        np.random.seed(133)
        k=10
        images = util.loadTrainImages()[100:200]
        clf.maxIter = 100
        thetaFile = 'grading_data/log_reg_stress.npy'
    else:
        np.random.seed(142)
        k = 5
        images = util.loadTrainImages()[:100]
        clf.maxIter = 5
        thetaFile = 'grading_data/log_reg_simple.npy'

    clf.train(images,k)
    studentTheta = clf.theta.squeeze()
            
    fid = open(thetaFile,'r')
    theta = pickle.load(fid)
    fid.close()

    if studentTheta.shape!=theta.shape:
        return False,"Dimension mismatch"

    if np.abs(np.sqrt(np.sum(theta**2)) -
              np.sqrt(np.sum(studentTheta**2))) > 1e-3:
        return False,"Parameter vector mismatch"

    return True,"Passed!"
예제 #2
0
def test_log_regression():
    np.random.seed(33)
    print "Testing implementation of logistic regression..."

    import classifier
    k = 5
    images = util.loadTrainImages()[:100]

    clf = classifier.Classifier()
    clf.maxIter = 5
    clf.train(images, k)

    fid = open('data/log_reg_test.npy', 'r')
    theta = pickle.load(fid)
    fid.close()

    studentTheta = clf.theta.squeeze()

    assert studentTheta.shape == theta.shape, "Dimension mismatch"

    if np.abs(np.sum(theta) - np.sum(studentTheta)) > 1e-3:
        print "Parameter vector mismatch, test failed"
        return

    if np.abs(np.sqrt(np.sum(theta**2)) -
              np.sqrt(np.sum(studentTheta**2))) > 1e-3:
        print "Parameter vector mismatch, test failed"
        return

    print "Logistic regression test passed."
예제 #3
0
    def run(self):
        """
        Function: Run
        -------------
        This function will evaluate your solution! You do not need to
        write any code in this file, however you SHOULD understand this
        function!
        """
        print "Running the full pipeline!"
        K = 25
        trainImages = util.loadTrainImages()[:1000]
        testImages = util.loadTestImages()

        classifier = Classifier()

        print 'Training..........'
        classifier.train(trainImages, K)

        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'Testing...........'
        testPredictions = classifier.test(testImages)
        testAccuracy = self.evaluate(testPredictions, testImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
        self.reportAccuracy(testAccuracy, 'Test Accuracy')
예제 #4
0
def test_kmeans():
    np.random.seed(33)
    print "Testing K-Means implementation..."

    import featureLearner as fl
    k = 5
    learner = fl.FeatureLearner(k)
    learner.maxIter = 10

    tr = util.loadTrainImages()[:100]
    learner.runKmeans(tr)
    
    # check the basics
    assert isinstance(learner.centroids,np.ndarray),"centroids should be stored in numpy array"
    assert len(learner.centroids.shape) == 2, "centroids array should be 2-D"
    assert learner.centroids.shape[0] == util.patch_dim**2, "Size of centroids not correct"
    assert learner.centroids.shape[1] == k,"Number of centroids not correct"

    # load test centroids
    testDat = open('data/kmeans_test.npy','r')
    centroids = pickle.load(testDat)
    testDat.close()

    # check that they are the same
    diff = np.sum((centroids.reshape([-1])-learner.centroids.reshape([-1]))**2)

    if diff > 1e-5:
        print "Somethings wrong, your centroids don't match the test centroids"
    else:
        print "K-means test passed"
예제 #5
0
def run(grad,view,pixels,maxIter,numTrain):
    k = 25
    maxIter_kMeans = 20

    trainUImages = util.loadTrainImages()[:numTrain]
    trainSImages = trainUImages[:500]
    testImages = util.loadTestImages()

    if pixels is False:
        # Compile all patches into one big 2-D array (patchSize x numPatches)
        patches = np.hstack([np.array(image.getPatches()).transpose() for image in trainUImages])
        print "Training K-means using %d images"%numTrain
        centroids = submission.runKMeans(k,patches,maxIter_kMeans)
        trainX,trainY = util.kMeansFeatures(trainSImages,centroids,submission.extractFeatures)
        testX, testY = util.kMeansFeatures(testImages,centroids,submission.extractFeatures)
        if view:
            util.viewPatches(centroids)
    else:
        maxIter = 100
        trainX,trainY = util.pixelFeatures(trainSImages)
        testX,testY = util.pixelFeatures(testImages)

    clf = util.Classifier(maxIter=maxIter,alpha=5e-5,gradient=grad)
    clf.train(trainX,trainY)

    predictions = clf.test(trainX)
    acc = np.sum(trainY==predictions)/float(trainY.size)
    print "Train accuracy is %f"%acc

    predictions = clf.test(testX)
    acc = np.sum(testY==predictions)/float(testY.size)
    print "Test accuracy is %f"%acc
예제 #6
0
    def run(self):
        """
        Function: Run
        -------------
        This function will evaluate your solution! You do not need to
        write any code in this file, however you SHOULD understand this
        function!
        """
        print "Running the full pipeline!"
        K=25
        trainImages = util.loadTrainImages()[:1000]
        testImages = util.loadTestImages()

        classifier = Classifier()

        print 'Training..........'
        classifier.train(trainImages, K)

        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'Testing...........'
        testPredictions = classifier.test(testImages)
        testAccuracy = self.evaluate(testPredictions, testImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
        self.reportAccuracy(testAccuracy, 'Test Accuracy')
예제 #7
0
def test_feature_extraction():
    np.random.seed(33)
    print "Testing implementation of feature extraction..."

    import featureLearner as fl
    k = 5
    learner = fl.FeatureLearner(k)
    learner.trained = True

    image = util.loadTrainImages()[33]

    # load test centroids and features
    testDat = open('data/kmeans_test.npy','r')
    centroids = pickle.load(testDat)
    testDat.close()
    testDat = open('data/features_test.npy','r')
    features = pickle.load(testDat)
    testDat.close()

    learner.centroids = centroids

    studentFeats = learner.extractFeatures(image)
    assert isinstance(studentFeats,np.ndarray),"Features should be in an numpy array"
    assert studentFeats.shape==features.shape,"Dimension mismatch"
    studentFeatsList = studentFeats.tolist()
    if np.abs(np.sum(features)-np.sum(studentFeats)) > 1e-3:
        print "Feature mismatch, test failed"
        return

    if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3:
        print "Feature mismatch, test failed"
        return
    
    print "Feature extraction test passed"
예제 #8
0
def test_log_regression():
    np.random.seed(33)
    print "Testing implementation of logistic regression..."

    import classifier
    k = 5
    images = util.loadTrainImages()[:100]

    clf = classifier.Classifier()
    clf.maxIter = 5
    clf.train(images,k)

    fid = open('data/log_reg_test.npy','r')
    theta = pickle.load(fid)
    fid.close()

    studentTheta = clf.theta.squeeze()
    
    assert studentTheta.shape==theta.shape,"Dimension mismatch"

    if np.abs(np.sum(theta)-np.sum(studentTheta)) > 1e-3:
        print "Parameter vector mismatch, test failed"
        return

    if np.abs(np.sqrt(np.sum(theta**2)) - np.sqrt(np.sum(studentTheta**2))) > 1e-3:
        print "Parameter vector mismatch, test failed"
        return

    print "Logistic regression test passed."
예제 #9
0
def test_kmeans():
    np.random.seed(33)
    print "Testing K-Means implementation..."

    import featureLearner as fl
    k = 5
    learner = fl.FeatureLearner(k)
    learner.maxIter = 10

    tr = util.loadTrainImages()[:100]
    learner.runKmeans(tr)

    # check the basics
    assert isinstance(learner.centroids,
                      np.ndarray), "centroids should be stored in numpy array"
    assert len(learner.centroids.shape) == 2, "centroids array should be 2-D"
    assert learner.centroids.shape[
        0] == util.patch_dim**2, "Size of centroids not correct"
    assert learner.centroids.shape[1] == k, "Number of centroids not correct"

    # load test centroids
    testDat = open('data/kmeans_test.npy', 'r')
    centroids = pickle.load(testDat)
    testDat.close()

    # check that they are the same
    diff = np.sum(
        (centroids.reshape([-1]) - learner.centroids.reshape([-1]))**2)

    if diff > 1e-5:
        print "Somethings wrong, your centroids don't match the test centroids"
    else:
        print "K-means test passed"
def run(grad,view,pixels,maxIter,numTrain):
    k = 25
    maxIter_kMeans = 20

    trainUImages = util.loadTrainImages()[:numTrain]
    trainSImages = trainUImages[:500]
    testImages = util.loadTestImages()

    if pixels is False:
        # Compile all patches into one big 2-D array (patchSize x numPatches)
        patches = np.hstack([np.array(image.getPatches()).transpose() for image in trainUImages])
        print("Training K-means using %d images..." % numTrain)
        centroids = submission.runKMeans(k,patches,maxIter_kMeans)
        print("Finished running K-means")
        if view:
            util.viewPatches(centroids)
        trainX,trainY = util.kMeansFeatures(trainSImages,centroids,submission.extractFeatures)
        testX, testY = util.kMeansFeatures(testImages,centroids,submission.extractFeatures)
    else:
        maxIter = 100
        trainX,trainY = util.pixelFeatures(trainSImages)
        testX,testY = util.pixelFeatures(testImages)

    clf = util.Classifier(maxIter=maxIter,alpha=5e-5,gradient=grad)
    print("Training supervised classifier with %d images..."%len(trainSImages))
    clf.train(trainX,trainY)

    predictions = clf.test(trainX)
    acc = np.sum(trainY==predictions)/float(trainY.size)
    print("Train accuracy is %f"%acc)

    predictions = clf.test(testX)
    acc = np.sum(testY==predictions)/float(testY.size)
    print("Test accuracy is %f"%acc)
예제 #11
0
def test_kmeans(stress=False):
    import studentLearner as fl

    if stress:
        np.random.seed(133)
        k = 10
        learner = fl.FeatureLearner(k)
        learner.maxIter = 20
        testfile = 'grading_data/kmeans_stress.npy'
        tr = util.loadTrainImages()[100:200]
    else:
        np.random.seed(142)
        k = 5
        learner = fl.FeatureLearner(k)
        learner.maxIter = 10
        testfile = 'grading_data/kmeans_simple.npy'
        tr = util.loadTrainImages()[:100]

    # load test centroids
    testDat = open(testfile,'r')
    centroids = pickle.load(testDat)
    testDat.close()

    learner.runKmeans(tr)

    # check the basics
    if not isinstance(learner.centroids,np.ndarray):
        return False,"Centroids should be stored in numpy array"
    if len(learner.centroids.shape) != 2:
        return False,"centroids array should be 2D"
    if learner.centroids.shape[0] != util.patch_dim**2:
        return False,"Size of centroids not correct"
    if learner.centroids.shape[1] != k:
        return False,"Number of centroids not correct"
    if not np.all(~np.isnan(learner.centroids)):
        return False,"NaNs detected in centroids"
    # check that they are the same
    diff = np.sum((centroids.reshape([-1])-learner.centroids.reshape([-1]))**2)

    if diff > 1e-5:
        return False,"Centroid mismatch"
    
    return True,"Passed!"
예제 #12
0
def kmeans_only(view=False):
    print "Running only  K-Means..."
    import featureLearner as fl
    k = 25
    learner = fl.FeatureLearner(k)

    tr = util.loadTrainImages()[:1000]
    learner.runKmeans(tr)

    if view:
        util.viewPatches(learner.centroids[:,:20])
예제 #13
0
def kmeans_only(view=False):
    print "Running only  K-Means..."
    import featureLearner as fl
    k = 25
    learner = fl.FeatureLearner(k)

    tr = util.loadTrainImages()[:1000]
    learner.runKmeans(tr)

    if view:
        util.viewPatches(learner.centroids)
예제 #14
0
def test_feature_extraction(stress=False):

    import studentLearner as fl
    if stress:
        np.random.seed(133)
        k = 10
        testCentroidFile='grading_data/kmeans_stress.npy'
        testFeaturesFile='grading_data/features_stress.npy'
        images = util.loadTrainImages()[500:600]
    else:
        np.random.seed(142)
        k = 5
        testCentroidFile='grading_data/kmeans_simple.npy'
        testFeaturesFile='grading_data/features_simple.npy'
        images = [util.loadTrainImages()[99]]

    learner = fl.FeatureLearner(k)
    learner.trained = True

    # load test centroids and features
    testDat = open(testCentroidFile,'r')
    centroids = pickle.load(testDat)
    testDat.close()
    testDat = open(testFeaturesFile,'r')
    features = pickle.load(testDat)
    testDat.close()

    learner.centroids = centroids

    for i in range(len(images)):
        studentFeats = learner.extractFeatures(images[i]).squeeze()
        if not isinstance(studentFeats,np.ndarray):
            return False,"Features should be in a numpy array"
        if studentFeats.shape!=features[:,i].squeeze().shape:
            return False,"Dimension mismatch"

        if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3:
            return False,"Feature mismatch"


    return True,"Passed!"
예제 #15
0
    def runDev(self):
        print "Running in development mode"

        K=5
        trainImages = util.loadTrainImages()[:100]
        testImages = util.loadTestImages()[:100]
        
        classifier = Classifier()
        
        print 'Training..........'
        classifier.train(trainImages, K)
        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
예제 #16
0
def test_predictions(dummy=True):
    np.random.seed(133)
    import classifier
    clf = classifier.Classifier()
    images = util.loadTrainImages()[100:200]
    clf.maxIter = 1
    k = 10
    clf.train(images,k)

    predfile = 'grading_data/predictions.npy'
    fid = open('grading_data/log_reg_stress.npy','r')
    theta = pickle.load(fid)
    fid.close()

    clf.theta = theta
    studentPreds = clf.test(images)
    
    fid = open(predfile,'r')
    preds = pickle.load(fid)
    fid.close()
    
    preds = np.array(preds,dtype=np.int32)
    studentPreds = np.array(studentPreds,dtype=np.int32)

    if studentPreds.size!=preds.size:
        return False,"Wrong number of predictions."

    if np.sum(preds==studentPreds)!=preds.size:
        # try again with intercept at end just in case
        fid = open('grading_data/log_reg_stress_int_end.npy','r')
        theta = pickle.load(fid)
        fid.close()
        clf.theta = theta
        studentPreds = np.array(clf.test(images),dtype=np.int32)
        fid = open('grading_data/predictions_int_end.npy','r')
        preds = np.array(pickle.load(fid),dtype=np.int32)
        fid.close()
        if np.sum(preds==studentPreds)==preds.size:
            return True,"Passed!"

        return False,"Prediction mismatch."

    return True,"Passed!"
예제 #17
0
    def run(self):
        print "Running the full pipeline!"
        K=25
        trainImages = util.loadTrainImages()[:1000]
        testImages = util.loadTestImages()

        classifier = Classifier()

        print 'Training..........'
        classifier.train(trainImages, K)

        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'Testing...........'
        testPredictions = classifier.test(testImages)
        testAccuracy = self.evaluate(testPredictions, testImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
        self.reportAccuracy(testAccuracy, 'Test Accuracy')
예제 #18
0
    def runDev(self):
        """
        Function: runDev
        -------------
        This function will run the full pipeline in development mode.
        I.e. it will use only 10 centroids and 100 images.
        """
        print "Running in development mode"

        K=5
        trainImages = util.loadTrainImages()[:100]
        testImages = util.loadTestImages()[:100]
        
        classifier = Classifier()
        
        print 'Training..........'
        classifier.train(trainImages, K)
        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
예제 #19
0
    def runDev(self):
        """
        Function: runDev
        -------------
        This function will run the full pipeline in development mode.
        I.e. it will use only 10 centroids and 100 images.
        """
        print "Running in development mode"

        K = 5
        trainImages = util.loadTrainImages()[:100]
        testImages = util.loadTestImages()[:100]

        classifier = Classifier()

        print 'Training..........'
        classifier.train(trainImages, K)
        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
예제 #20
0
def test_feature_extraction():
    np.random.seed(33)
    print "Testing implementation of feature extraction..."

    import featureLearner as fl
    k = 5
    learner = fl.FeatureLearner(k)
    learner.trained = True

    image = util.loadTrainImages()[33]

    # load test centroids and features
    testDat = open('data/kmeans_test.npy', 'r')
    centroids = pickle.load(testDat)
    testDat.close()
    testDat = open('data/features_test.npy', 'r')
    features = pickle.load(testDat)
    testDat.close()

    learner.centroids = centroids

    studentFeats = learner.extractFeatures(image).squeeze()

    assert isinstance(studentFeats,
                      np.ndarray), "Features should be in a numpy array"
    assert studentFeats.shape == features.shape, "Dimension mismatch"

    if np.abs(np.sum(features) - np.sum(studentFeats)) > 1e-3:
        print "Feature mismatch, test failed"
        return

    if np.abs(np.sqrt(np.sum(features**2)) -
              np.sqrt(np.sum(features**2))) > 1e-3:
        print "Feature mismatch, test failed"
        return

    print "Feature extraction test passed"
예제 #21
0
def test_log_regression():
    np.random.seed(33)
    print "Testing implementation of logistic regression..."

    import classifier
    k = 5
    images = util.loadTrainImages()[:100]

    clf = classifier.Classifier()
    clf.maxIter = 5
    clf.train(images,k)

    fid = open('data/log_reg_test.npy','r')
    theta = pickle.load(fid)
    fid.close()
    assert clf.theta.shape==theta.shape,"Dimension mismatch"

    diff = np.sum((clf.theta-theta)**2)
    print "diff: " + str(diff)

    if diff > 1e-5:
        print "Parameter vector mismatch, test failed."
    else:
        print "Logistic regression test passed."