def __init__(self, master): frame = Frame(master, width=700, height=600, bd=1, background="black") frame.pack() iFrame = Frame(frame, width=700, height=600, bd=1, background="black") iFrame.pack(side=TOP) # Control panel temp.iControl = control.theControl(iFrame, LEFT) # System Output Frame temp.iOutput = theOutput(iFrame, LEFT) # User Input Image temp.usrInput = theDisplay(iFrame, "User Input Image", LEFT, 235) # Add a log list temp.LOG_LIST = theLog(iFrame, RIGHT) # Create a new frame iFrame = Frame(frame, width=700, height=600, bd=1, background="black") iFrame.pack(side=BOTTOM) # Input weight temp.inputWeight = theDisplay(iFrame, "", LEFT, 470) # Current Weight temp.currentWeight = theDisplay(iFrame, "", LEFT, 470) # Select classifier temp.iClassifier = perceptron.PerceptronClassifier()
def runClassifier(): # Set up variables according to the command line inputs featureFunction = basicFeatureExtractorDigit legalLabels = range(10) # number of labels # Select classifier classifier = perceptron.PerceptronClassifier(legalLabels) # Load data numTraining = 100 rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features trainingData = map(basicFeatureExtractorDigit, rawTrainingData) validationData = map(basicFeatureExtractorDigit, rawValidationData) testData = map(basicFeatureExtractorDigit, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) # print "Validating..." # guesses = classifier.classify(validationData) # correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) util.pause() analysis(classifier, guesses, testLabels, rawTestData)
def __init__(self, master): global c1, c2, c3, c4, c5, c6, c7, iClassifier global TRAIN_NUM, POS_WEIGHT, NEG_WEIGHT, Tkinter frame = Frame(master, width=700, height=600, bd=1, background ="black") frame.pack() # Add a title temp.iTitle = title.theTitle(frame,TOP) # Add setting option # temp.iSetting = setting.theSetting(frame,BOTTOM) iFrame = Frame(frame, width=700, height=600, bd=1, background ="black") iFrame.pack(side=TOP) # User Input Image temp.usrInput = display.theDisplay(iFrame,"User Input Image",LEFT) # Input and weight Overlap temp.inputWeight = display.theDisplay(iFrame,"Input and Weight Overlap",LEFT) # Current Weight temp.currentWeight = display.theDisplay(iFrame,"Current weight",LEFT) iFrame = Frame(frame, width=700, height=600, bd=1, background ="black") iFrame.pack(side=TOP) # Control panel temp.iControl = control.theControl(iFrame,LEFT) # Current Weight temp.weightChange = display.theDisplay(iFrame,"Weight Change",LEFT) # System Output Frame temp.iOutput = output.theOutput(iFrame,LEFT) # Add a log list temp.LOG_LIST = log.theLog(iFrame,BOTTOM) # Select classifier temp.iClassifier = perceptron.PerceptronClassifier()
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'perceptron', 'mlp', 'svm'], default='mostFrequent') parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAINING_SET_SIZE, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit legalLabels = range(10) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "mlp"): classifier = mlp.MLPClassifier(legalLabels, options.iterations) elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif (options.classifier == "svm"): classifier = svm.SVMClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand( argv ): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['linear_svm', 'nb', 'naiveBayes', 'perceptron'], default='linear_svm') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='faces') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=0.1) parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=50, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) print "testing set size:\t"+str(options.test) if(options.data=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage elif(options.data=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if(options.data=="digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if(options.classifier == "linear_svm"): classifier = linear_svm.LinearClassifier(options.data) elif(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif(options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['printImage'] = printImage return args, options
def runClassifier(args, options, legalLabels): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if (options.data == "faces"): rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", numTest) rawTestData = samples.loadDataFile("facedata/facedatatest", numTest, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest) else: rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) total = numTraining f_out = open( './results/' + options.classifier + "_" + options.data + '.txt', 'w') # train and classify for portions of the training data, compare performance for i in range(1, 11): print "\n\nUsing", i * 10, "% of training data\n" multiplier = i / 10.0 numTraining = int(total * multiplier) if (options.classifier == "naiveBayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations) if options.data == "faces": rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", numTraining) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) trainingData = map(featureFunction, rawTrainingData) # Conduct training and testing start_time = time.time() print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) end_time = time.time() exec_time = end_time - start_time print "\n\nUsing " + str(numTraining) + " training images" print "Training took " + str(exec_time) + " seconds\n\n" print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) val_correct = correct print "Testing..." guesses = classifier.classify(testData) correct = [ guesses[i] == testLabels[i] for i in range(len(testLabels)) ].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) test_correct = correct # analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) f_out.write( str(numTraining) + " " + str(exec_time) + " " + str(numTest) + " " + str(val_correct) + " " + str(test_correct) + '\n') f_out.close()
def runClassifier(): global TK_ROOT, SP_CANVAS, LOG_X, LOG_Y TK_ROOT = Tk(className="Classifier Interface") # Create window TK_ROOT.geometry("1024x768") TK_ROOT.grid_rowconfigure(0, weight=1) TK_ROOT.grid_columnconfigure(0, weight=1) SP_CANVAS = Canvas(TK_ROOT, xscrollcommand=None, scrollcommand=None) SP_CANVAS.grid(row=0, column=0, sticky='nesw') SP_CANVAS.create_rectangle(10, 10, 150, 500, fill="white") # Set up variables according to the command line inputs featureFunction = basicFeatureExtractorDigit legalLabels = range(10) # number of labels # Select classifier classifier = perceptron.PerceptronClassifier(legalLabels) # Load data numTraining = 1 rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, 'train', SP_CANVAS) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) loadImage() rawTestData = samples.loadDataFile("digitdata/testingimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, 'test', SP_CANVAS) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print rawTestData trainingData = map(basicFeatureExtractorDigit, rawTrainingData) print "cp3" testData = map(basicFeatureExtractorDigit, rawTestData) # Conduct training and testing SP_CANVAS.create_text(LOG_X, LOG_Y, text="Training...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15 classifier.train(trainingData, trainingLabels, SP_CANVAS) # print "Validating..." # guesses = classifier.classify(validationData) # correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) SP_CANVAS.create_text(LOG_X, LOG_Y, text="Testing...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15 guesses = classifier.classify(testData, SP_CANVAS) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) SP_CANVAS.create_text(LOG_X, LOG_Y + 30, text="Completed...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15 SP_CANVAS.create_rectangle(200, 300, 201, 301) the_input = raw_input('TYPE HERE:>> ') if match('bye', the_input): return
def readCommand(argv): """Processes the command used to run from the command line.""" from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option( '-r', '--run', help=default('automatically runs training and test cycle for 5 times'), default=False, action='store_true') parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['perceptron', 'naiveBayes', 'mira'], default='naiveBayes') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The ratio of the training set to use'), default=1.0, type="float") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) print("using enhanced features?:\t" + str(options.features)) if options.data == "digits": printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if options.classifier == "mira": classifier = mira.MiraClassifier(legalLabels, options.iterations) elif options.classifier == "naiveBayes": classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if options.autotune: print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes" % options.smoothing) elif options.classifier == "perceptron": classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "knn": classifier = knn.KNN(legalLabels) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def __init__(self, master): global c1, c2, c3, c4, c5, myList, legalLabels, iClassifier frame = Frame(master, width=700, height=600, bd=1, background="black") frame.pack() iframe5 = Frame(frame, bd=0, relief=FLAT, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top") c1 = Canvas(iframe5, bg='white', width=700, height=50, background="black") c1.pack() iframe5 = Frame(frame, bd=0, relief=RAISED, background="black", highlightcolor="red") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="bottom") iframe7 = Frame(iframe5, bd=0, relief=RAISED, background="black", highlightcolor="red") iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left") lbl = Label(iframe7, text="Auto Training Settings ", fg="#3cecff", bg="black", font=("times", 14, "bold")) lbl.pack() c7 = Canvas(iframe7, width=70, height=50, background="black") c7.pack() iframe7 = Frame(iframe5, bd=0, relief=RAISED, background="black", highlightcolor="red") iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left") lbl = Label(iframe7, text="Validation Settings", fg="#3cecff", bg="black", font=("times", 14, "bold")) lbl.pack() c7 = Canvas(iframe7, width=70, height=50, background="black") c7.pack() iframe7 = Frame(iframe5, bd=0, relief=RAISED, background="black", highlightcolor="red") iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left") lbl = Label(iframe7, text="User Training Settings ", fg="#3cecff", bg="black", font=("times", 14, "bold")) lbl.pack() c4 = Canvas(iframe7, width=70, height=50, background="black") c4.pack() iframe7 = Frame(iframe5, bd=0, relief=RAISED, background="black", highlightcolor="red") iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="right") lbl = Label(iframe7, text="Perceptron Settings", fg="#3cecff", bg="black", font=("times", 14, "bold")) lbl.pack() c7 = Canvas(iframe7, width=70, height=50, background="black") c7.pack() # Status Frame iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="left") lbl = Label(iframe5, text="System Status", fg="#3cecff", bg="black", font=("times", 14, "bold")) lbl.pack() # c2 = Canvas(iframe5, bg='white', width=175, height=400,background="black") #c2.pack() # Add a log list myList = theList(iframe5) # User Input Frame iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="left") lb2 = Label(iframe5, text="User Input", fg="#3cecff", bg="black", font=("times", 14, "bold")) lb2.pack(side="top") c3 = Canvas(iframe5, width=235, height=300, background="black") c3.pack() iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="right") myButton(iframe5) # System Output Frame iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top") lb2 = Label(iframe5, text="Guess Digit", fg="#3cecff", bg="black", font=("times", 14, "bold")) lb2.pack(side="top") c4 = Canvas(iframe5, bg='white', width=130, height=60, background="black") c4.pack() iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top") lb2 = Label(iframe5, text="Predicted Correct", fg="#3cecff", bg="black", font=("times", 14, "bold")) lb2.pack(side="top") c5 = Canvas(iframe5, bg='white', width=130, height=60, background="black") c5.pack() iframe5 = Frame(frame, bd=0, relief=RAISED, background="black") iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top") lb2 = Label(iframe5, text="User Input\nCorrectness Statistics", fg="#3cecff", bg="black", font=("times", 14, "bold")) lb2.pack(side="top") c6 = Canvas(iframe5, bg='white', width=130, height=60, background="black") c6.pack() c5.create_text(80, 280, text='Correctness', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) c1.create_text(330, 25, text='Perceptron Handwriting Recognition', fill="#3cecff", justify=CENTER, font=('Times', 20, 'bold')) # c3.create_text(130, 20, text='User Input', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) # c3.create_text(130, 320, text='Predicted Correctness', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) c5.create_text(80, 280, text='Correctness', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) c5.create_text(80, 296, text='Statistics', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) c6.create_text(80, 20, text='Statistics', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold')) iClassifier = perceptron.PerceptronClassifier(legalLabels)
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser #is a powerful tool to parsing command line options. parser = OptionParser(USAGE_STRING) #parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") #-f or --features both mean the same option, we can use either of them on the command line. #<script> -h will print all the help texts set for each option. #default: it sets the option.features to false if the option is not present in command line. but it present always, action is chosen i:e. True parser.add_option( '-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron'], default='naiveBayes') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=15, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-n', '--analysis', help=default("Shows which data is wrongly predicted"), default=False, action="store_true") parser.add_option('-r', '--random', help=default("Trains the data set using random data and \ calculates averages for percent accuracy and standard deviation"), default=False, action="store_true") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} #empty dictionary to capture the command line inputs. # Set up variables according to the command line input. This is the start line of the whole drama. print("Doing classification") print("--------------------") print("Data:\t\t" + options.data) print("Classifier:\t\t" + options.classifier) print("Using enhanced features?:\t" + str(options.features)) if not options.random: print("Training set size:\t" + str(options.training)) if (options.data == "digits"): printImage = ImagePrinter( DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT ).printImage #DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT are global variables if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter( FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT ).printImage #FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT are global variables #& creating an object of class ImagePrinter. #print("ImagePrinter is used") if ( options.features ): #to decide on what to choose b/w enhancedFeatureExtractorFace function or basicFeatureExtractorFace function. featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: #if both digits and faces are not what we called on the command prompt. print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if (options.data == "digits"): legalLabels = range(10) #0,1,2,3,4,5,6,7,8,9 else: legalLabels = range(2) #face or not face #we are not keeping training <=0 hence, below wont be used if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) #we are not using smoothing. hence, below wont be used if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) #we are not using odds. hence, below wont be used if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) #defining decision structure based on asked classifier. if (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print("Using automatic tuning for naivebayes") classifier.automaticTuning = True else: print("Using smoothing parameter k=%f for naivebayes" % options.smoothing) elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) #creating a PerceptronClassifier object by passing legalLabels and iterations=3 as max iterations to PerceptronClassifier's constructor. else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args[ 'classifier'] = classifier #assining classifier as a value to key 'classifier' args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand( argv ): """ Processes the command used to run from the command line. """ import getopt # Set default options options = {'classifier': 'mostfrequent', 'data': 'digits', 'enhancedFeatures': False, 'train': 100, 'odds': False, 'class1': 1, 'class2': 0, 'smoothing': 1, 'automaticTuning' : False, 'maxIterations': 3} args = {} # This dictionary will hold the objects used by the main method # Read input from the command line commands = ['help', 'classifer=', 'data=', 'train=', 'enhancedFeatures', 'odds', 'class1=', 'class2=', 'smoothing=', 'automaticTuning' 'maxIterations='] try: opts = getopt.getopt( argv, "hc:d:t:fo1:2:k:ai:", commands ) except getopt.GetoptError: print USAGE_STRING sys.exit( 2 ) for option, value in opts[0]: if option in ['--help', '-h']: print USAGE_STRING sys.exit( 0 ) if option in ['--classifier', '-c']: options['classifier'] = value if option in ['--data', '-d']: options['data'] = value if option in ['--train', '-t']: options['train'] = int(value) if option in ['--enhancedFeatures', '-f']: options['enhancedFeatures'] = True if option in ['--odds', '-o']: options['odds'] = True if option in ['--class1', '-1']: options['class1'] = int(value) if option in ['--class2', '-2']: options['class2'] = int(value) if option in ['--smoothing', '-k']: options['smoothing'] = float( value ) if option in ['--automaticTuning', '-a']: options['automaticTuning'] = True if option in ['--maxIterations', '-i']: options['maxIterations'] = int(value) # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options['data'] print "classifier:\t\t" + options['classifier'] print "using enhanced features?:\t" + str(options['enhancedFeatures']) print "training set size:\t" + str(options['train']) if(options['data']=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options['enhancedFeatures']): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif(options['data']=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options['enhancedFeatures']): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print "Unknown dataset", options['data'] print USAGE_STRING sys.exit(2) if(options['data']=="digits"): legalLabels = range(10) else: legalLabels = range(2) if options['train'] <= 0: print "Training set size should be a positive integer (you provided: %d)" % options['train'] print USAGE_STRING sys.exit(2) if options['smoothing'] <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options['smoothing'] print USAGE_STRING sys.exit(2) if options['odds']: for className in ['class1','class2']: if options[className] not in legalLabels: print "Didn't provide a legal labels for the odds ratio for %s" % className print USAGE_STRING sys.exit(2) if(options['classifier'] == "mostfrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif(options['classifier'] == "naivebayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options['smoothing']) if (options['automaticTuning']): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options['smoothing'] elif(options['classifier'] == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,options['maxIterations']) elif(options['classifier'] == "mira"): classifier = mira.MiraClassifier(legalLabels, options['maxIterations']) if (options['automaticTuning']): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" else: print "Unknown classifier:", options['classifier'] print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
item["background"] = "blue" item["fg"] = "#FFF" for i in self.button: if self.button[i] != item: self.button[i]["background"] = "black" self.button[i]["fg"] = "#3cecff" # Test code if __name__ == "__main__": root = Tk() frame = Frame(root) temp.LOG_LIST = log.theLog(frame, TOP) iControl = theControl(root, TOP) temp.iClassifier = perceptron.PerceptronClassifier() temp.iTitle = title.theTitle(frame, TOP) # Add setting option #temp.iSetting = setting.theSetting(frame,TOP) # Add a log list temp.LOG_LIST = log.theLog(frame, TOP) # User Input Image temp.iDisplay = display.theDisplay(frame, "hi", TOP) # System Output Frame temp.iOutput = output.theOutput(frame, TOP)
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option( "-c", "--classifier", help=default("The type of classifier"), choices=[ "mostFrequent", "perceptron", ], default="mostFrequent", ) parser.add_option( "-d", "--data", help=default("Dataset to use"), choices=["digits", "faces", "pacman"], default="digits", ) parser.add_option( "-t", "--training", help=default("The size of the training set"), default=100, type="int", ) parser.add_option( "-f", "--features", help=default("Whether to use enhanced features"), default=False, action="store_true", ) parser.add_option( "-o", "--odds", help=default("Whether to compute odds ratios"), default=False, action="store_true", ) parser.add_option( "-1", "--label1", help=default("First label in an odds ratio comparison"), default=0, type="int", ) parser.add_option( "-2", "--label2", help=default("Second label in an odds ratio comparison"), default=1, type="int", ) parser.add_option( "-w", "--weights", help=default("Whether to print weights"), default=False, action="store_true", ) parser.add_option( "-k", "--smoothing", help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0, ) parser.add_option( "-a", "--autotune", help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true", ) parser.add_option( "-i", "--iterations", help=default("Maximum iterations to run training"), default=3, type="int", ) parser.add_option( "-s", "--test", help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int", ) parser.add_option( "-g", "--agentToClone", help=default("Pacman agent to copy"), default=None, type="str", ) options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception( "Command line input not understood: " + str(otherjunk) ) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) print("using enhanced features?:\t" + str(options.features)) print("training set size:\t" + str(options.training)) if options.data == "digits": printImage = ImagePrinter( DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT ).printImage featureFunction = basicFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter( FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT ).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif options.data == "pacman": printImage = None if options.features: featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = list(range(10)) else: legalLabels = ["Stop", "West", "East", "North", "South"] if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training ) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing ) print(USAGE_STRING) sys.exit(2) if options.odds: if ( options.label1 not in legalLabels or options.label2 not in legalLabels ): print( "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2) ) print(USAGE_STRING) sys.exit(2) if options.classifier == "mostFrequent": classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif options.classifier == "perceptron": if options.data != "pacman": classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations ) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations ) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args["agentToClone"] = options.agentToClone args["classifier"] = classifier args["featureFunction"] = featureFunction args["printImage"] = printImage return args, options
print USAGE_STRING sys.exit(2) if(options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif(options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations) elif(options.classifier == "mira"): if options.data != 'pacman': classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" elif(options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print "Unknown classifier:", options.classifier
def readCommand( argv ): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['naiveBayes', 'perceptron', 'kNN'], default='naiveBayes') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option('-k', '--neighbors', help=default("Numbers of neighbors in k-Nearest Neighbors"), type="int", default=3) parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) if(options.data=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit elif(options.data=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorFace else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if(options.data=="digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.neighbors <= 0: print "Neighbors for kNN should be a positive integer (you provided: %d)" % options.neighbors print USAGE_STRING sys.exit(2) if(options.classifier == "naiveBayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) elif(options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) elif(options.classifier == "kNN"): classifier = kNN.kNNClassifier(legalLabels,options.neighbors) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def runClassifier(): ######################################################################################################################################## #Edited Code #Store info for each iteration nbDigits = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] nbFaces = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] perceptronDigits = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] perceptronFaces = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] trainingCounts = { 0: 500, 1: 1000, 2: 1500, 3: 2000, 4: 2500, 5: 3000, 6: 3500, 7: 4000, 8: 4500, 9: 5000, 10: 500, 11: 1000, 12: 1500, 13: 2000, 14: 2500, 15: 3000, 16: 3500, 17: 4000, 18: 4500, 19: 5000, 20: 45, 21: 90, 22: 135, 23: 180, 24: 225, 25: 270, 26: 315, 27: 360, 28: 405, 29: 450, 30: 45, 31: 90, 32: 135, 33: 180, 34: 225, 35: 270, 36: 315, 37: 360, 38: 405, 39: 450 } #FaceData rawFaceTrainingData = samples.loadDataFile("facedata/facedatatrain", 450, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) faceTrainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", 450) rawFaceValidationData = samples.loadDataFile("facedata/facedatatrain", 300, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) faceValidationLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", 300) rawFaceTestData = samples.loadDataFile("facedata/facedatatest", 149, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testFaceLabels = samples.loadLabelsFile("facedata/facedatatestlabels", 149) #DigitData rawDigitTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) digitTrainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000) rawDigitValidationData = samples.loadDataFile("digitdata/validationimages", 1000, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) digitValidationLabels = samples.loadLabelsFile( "digitdata/validationlabels", 1000) rawDigitTestData = samples.loadDataFile("digitdata/testimages", 1000, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testDigitLabels = samples.loadLabelsFile("digitdata/testlabels", 1000) #Automation of test for each classifier and data type for x in range(40): if x < 10: classifierName = "nb" elif x < 20: classifierName = "perceptron" elif x < 30: classifierName = "nb" else: classifierName = "perceptron" if x < 20: Data = "digits" else: Data = "faces" if (Data == "digits"): legalLabels = range(10) #featureFunction = enhancedFeatureExtractorDigit featureFunction = basicFeatureExtractorDigit else: legalLabels = range(2) #featureFunction = enhancedFeatureExtractorFace featureFunction = basicFeatureExtractorFace if (classifierName == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(2.0) elif (classifierName == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, 3) print("Doing classification") print("--------------------") print("data:\t\t" + Data) print("classifier:\t\t " + classifierName) print("using enhanced features") print("training set size:\t" + str(trainingCounts[x])) # Extract features print("Extracting features...") # Load data if Data == "digits": startTime = time.process_time() h = 0 while h < 3: print("Iteration %d" % h) numTraining = trainingCounts[x] rawTrainingData = [] rawTrainingLabels = [] i = 0 while i < numTraining: k = list(range(0, 5000)) random.shuffle(k) j = k.pop() rawTrainingLabels.append(digitTrainingLabels[j]) rawTrainingData.append(rawDigitTrainingData[j]) i += 1 trainingData = list(map(featureFunction, rawTrainingData)) validationData = list( map(featureFunction, rawDigitValidationData)) testData = list(map(featureFunction, rawDigitTestData)) print("Training...") classifier.train(trainingData, rawTrainingLabels, validationData, digitValidationLabels) print("Validating...") guesses = classifier.classify(validationData) correct = [ guesses[i] == digitValidationLabels[i] for i in range(len(digitValidationLabels)) ].count(True) print(str(correct), ("correct out of " + str(len(digitValidationLabels)) + " (%.1f%%).") % (100.0 * correct / len(digitValidationLabels))) print("Testing...") guesses = classifier.classify(testData) correct = [ guesses[i] == testDigitLabels[i] for i in range(len(testDigitLabels)) ].count(True) print(str(correct), ("correct out of " + str(len(testDigitLabels)) + " (%.1f%%).") % (100.0 * correct / len(testDigitLabels))) h += 1 #Gather correct count for each iteration and use to compute standard deviation if classifierName == "nb": if Data == "digits": nbDigits[x % 10] += correct nbDigits[(x % 10) + 10] += time.process_time() - startTime else: nbFaces[x % 10] += correct nbFaces[(x % 10) + 10] += time.process_time() - startTime else: if Data == "digits": perceptronDigits[x % 10] += correct perceptronDigits[(x % 10) + 10] += time.process_time() - startTime else: perceptronFaces[x % 10] += correct perceptronFaces[(x % 10) + 10] += time.process_time() - startTime else: h = 0 while h < 3: print("Iteration %d" % h) numTraining = trainingCounts[x] rawTrainingData = [] rawTrainingLabels = [] i = 0 while i < numTraining: k = list(range(0, 450)) random.shuffle(k) j = k.pop() rawTrainingLabels.append(faceTrainingLabels[j]) rawTrainingData.append(rawFaceTrainingData[j]) i += 1 trainingData = list(map(featureFunction, rawTrainingData)) validationData = list( map(featureFunction, rawFaceValidationData)) testData = list(map(featureFunction, rawFaceTestData)) print("Training...") classifier.train(trainingData, rawTrainingLabels, validationData, faceValidationLabels) print("Validating...") guesses = classifier.classify(validationData) correct = [ guesses[i] == faceValidationLabels[i] for i in range(len(faceValidationLabels)) ].count(True) print(str(correct), ("correct out of " + str(len(faceValidationLabels)) + " (%.1f%%).") % (100.0 * correct / len(faceValidationLabels))) print("Testing...") guesses = classifier.classify(testData) correct = [ guesses[i] == testFaceLabels[i] for i in range(len(testFaceLabels)) ].count(True) print(str(correct), ("correct out of " + str(len(testFaceLabels)) + " (%.1f%%).") % (100.0 * correct / len(testFaceLabels))) h += 1 #Gather correct count for each iteration and use to compute standard deviation if classifierName == "nb": if Data == "digits": nbDigits[x % 10] += correct nbDigits[(x % 10) + 10] += time.process_time() - startTime else: nbFaces[x % 10] += correct nbFaces[(x % 10) + 10] += time.process_time() - startTime else: if Data == "digits": perceptronDigits[x % 10] += correct perceptronDigits[(x % 10) + 10] += time.process_time() - startTime else: perceptronFaces[x % 10] += correct perceptronFaces[(x % 10) + 10] += time.process_time() - startTime #NAIVE BAYES DIGITS print( "Average Correct Guesses for Naive Bayes Digits Based on Percentage of TrainingData Used" ) print( "10%% %d/1000, 20%% %d/1000, 30%% %d/1000, 40%% %d/1000, 50%% %d/1000, 60%% %d/1000, 70%% %d/1000, 80%% %d/1000, 90%% %d/1000, 100%% %d/1000" % (nbDigits[0] / 3, nbDigits[1] / 3, nbDigits[2] / 3, nbDigits[3] / 3, nbDigits[4] / 3, nbDigits[5] / 3, nbDigits[6] / 3, nbDigits[7] / 3, nbDigits[8] / 3, nbDigits[9] / 3)) print( "Standard Deviation for Naive Bayes Digits Based on Percentage of Training Data Used" ) stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] i = 0 while i < 10: stndDev[i] = nbDigits[i] / 3 stndDev[i] = nbDigits[i] - stndDev[i] stndDev[i] = math.pow(stndDev[i], 2) stndDev[i] = stndDev[i] / 1000 stndDev[i] = math.sqrt(stndDev[i]) i += 1 print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4], stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9])) print( "Average Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds" ) print( "10%% %d seconds, 20%% %d seconds, 30%% %d seconds, 40%% %d seconds, 50%% %d seconds, 60%% %d seconds, 70%% %d seconds, 80%% %d seconds, 90%% %d seconds, 100%% %d seconds" % (nbDigits[10] / 3, nbDigits[11] / 3, nbDigits[12] / 3, nbDigits[13] / 3, nbDigits[14] / 3, nbDigits[15] / 3, nbDigits[16] / 3, nbDigits[17] / 3, nbDigits[18] / 3, nbDigits[19] / 3)) #NAIVE BAYES FACES print( "Average Correct Guesses for Naive Bayes Faces Based on Percentage of TrainingData Used" ) print( "10%% %d/149, 20%% %d/149, 30%% %d/149, 40%% %d/149, 50%% %d/149, 60%% %d/149, 70%% %d/149, 80%% %d/149, 90%% %d/149, 100%% %d/149" % (nbFaces[0] / 3, nbFaces[1] / 3, nbFaces[2] / 3, nbFaces[3] / 3, nbFaces[4] / 3, nbFaces[5] / 3, nbFaces[6] / 3, nbFaces[7] / 3, nbFaces[8] / 3, nbFaces[9] / 3)) stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] i = 0 while i < 10: stndDev[i] = nbFaces[i] / 3 stndDev[i] = nbFaces[i] - stndDev[i] stndDev[i] = math.pow(stndDev[i], 2) stndDev[i] = stndDev[i] / 149 stndDev[i] = math.sqrt(stndDev[i]) i += 1 print( "Standard Deviation for Naive Bayes Faces Based on Percentage of Training Data Used" ) print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4], stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9])) print( "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds" ) print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (nbFaces[10] / 3, nbFaces[11] / 3, nbFaces[12] / 3, nbFaces[13] / 3, nbFaces[14] / 3, nbFaces[15] / 3, nbFaces[16] / 3, nbFaces[17] / 3, nbFaces[18] / 3, nbFaces[19] / 3)) #PERCEPTRON DIGITS print( "Average Correct Guesses for Perceptron Digits Based on Percentage of Training Data Used" ) print( "10%% %d/1000, 20%% %d/1000, 30%% %d/1000, 40%% %d/1000, 50%% %d/1000, 60%% %d/1000, 70%% %d/1000, 80%% %d/1000, 90%% %d/1000, 100%% %d/1000" % (perceptronDigits[0] / 3, perceptronDigits[1] / 3, perceptronDigits[2] / 3, perceptronDigits[3] / 3, perceptronDigits[4] / 3, perceptronDigits[5] / 3, perceptronDigits[6] / 3, perceptronDigits[7] / 3, perceptronDigits[8] / 3, perceptronDigits[9] / 3)) print( "Standard Deviation for Perceptron Digits Based on Percentage of Training Data Used" ) stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] i = 0 while i < 10: stndDev[i] = perceptronDigits[i] / 3 stndDev[i] = perceptronDigits[i] - stndDev[i] stndDev[i] = math.pow(stndDev[i], 2) stndDev[i] = stndDev[i] / 1000 stndDev[i] = math.sqrt(stndDev[i]) i += 1 print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4], stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9])) print( "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds" ) print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (perceptronDigits[10] / 3, perceptronDigits[11] / 3, perceptronDigits[12] / 3, perceptronDigits[13] / 3, perceptronDigits[14] / 3, perceptronDigits[15] / 3, perceptronDigits[16] / 3, perceptronDigits[17] / 3, perceptronDigits[18] / 3, perceptronDigits[19] / 3)) #PERCEPTRON FACES print( "Average Correct Guesses for Perceptron Faces Based on Percentage of Training Data Used" ) print( "10%% %d/149, 20%% %d/149, 30%% %d/149, 40%% %d/149, 50%% %d/149, 60%% %d/149, 70%% %d/149, 80%% %d/149, 90%% %d/149, 100%% %d/149" % (perceptronFaces[0] / 3, perceptronFaces[1] / 3, perceptronFaces[2] / 3, perceptronFaces[3] / 3, perceptronFaces[4] / 3, perceptronFaces[5] / 3, perceptronFaces[6] / 3, perceptronFaces[7] / 3, perceptronFaces[8] / 3, perceptronFaces[9] / 3)) print( "Standard Deviation for Perceptron Faces Based on Percentage of Training Data Used" ) stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] i = 0 while i < 10: stndDev[i] = perceptronFaces[i] / 3 stndDev[i] = perceptronFaces[i] - stndDev[i] stndDev[i] = math.pow(stndDev[i], 2) stndDev[i] = stndDev[i] / 149 stndDev[i] = math.sqrt(stndDev[i]) i += 1 print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4], stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9])) print( "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds" ) print( "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d" % (perceptronFaces[10] / 3, perceptronFaces[11] / 3, perceptronFaces[12] / 3, perceptronFaces[13] / 3, perceptronFaces[14] / 3, perceptronFaces[15] / 3, perceptronFaces[16] / 3, perceptronFaces[17] / 3, perceptronFaces[18] / 3, perceptronFaces[19] / 3))
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['perceptron'], default='perceptron') parser.add_option('-t', '--training', help=default('The size of the training set'), default=1000, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option( '-v', '--validate', help=default("Whether to validate when training (for graphs)"), default=False, action="store_true") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "classifier:\t\t" + options.classifier print "using enhanced features?:\t" + str(options.features) print "training set size:\t" + str(options.training) printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit legalLabels = range(10) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option( '-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'knn'], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=K_VALUE) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=MAX_ITERATIONS, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option( '-q', '--index', help=default( "index of data whose predicted label and actual label you want to display" ), default=-1, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t", options.data) print("classifier:\t\t", options.classifier) print("training set size:\t" + str(options.training)) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: print('using basicFeatureExtractorDigit for digits') featureFunction = basicFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: print('using basicFeatureExtractorDigit for faces') featureFunction = basicFeatureExtractorFace else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)", options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)", options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print("using automatic tuning for naivebayes") classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes", options.smoothing) elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif (options.classifier == 'knn'): if (options.data == "digits"): classifier = knn.KNNClassifier(legalLabels, options.smoothing) else: classifier = knn_faces.KNNClassifierFaces(legalLabels, options.smoothing) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def read_command(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'most_frequent', 'nb', 'naive_bayes', 'perceptron', 'perceptron_numpy', 'logistic', 'minicontest' ], default='most_frequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-n', '--num_weights', help=default( "Num Weights to Print (when --weights enabled), default: 100"), default=100, type="int") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agent_to_clone', help=default("Pacman agent to copy"), default=None, type="str") parser.add_option( '-l', '--learning_rates', help=default( "Learning rates to use for gradient descent, can be a comma separated list or single value" ), default=[0.2], type="str", action='callback', callback=learning_rate_callback) options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) if not options.classifier == 'minicontest': print("using enhanced features?:\t" + str(options.features)) else: print("using minicontest feature extractor") print("training set size:\t" + str(options.training)) if (options.data == "digits"): print_image = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).print_image if (options.features): feature_function = enhanced_feature_extractor_digit else: feature_function = basic_feature_extractor_digit if (options.classifier == 'minicontest'): feature_function = contest_feature_extractor_digit elif (options.data == "faces"): print_image = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).print_image if (options.features): feature_function = enhanced_feature_extractor_face else: feature_function = basic_feature_extractor_face elif (options.data == "pacman"): print_image = None if (options.features): feature_function = enhanced_feature_extractor_pacman else: feature_function = basic_feature_extractor_pacman else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if (options.data == "digits"): legal_labels = list(range(10)) else: legal_labels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legal_labels or options.label2 not in legal_labels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if (options.classifier == "most_frequent"): classifier = most_frequent.MostFrequentClassifier(legal_labels) elif (options.classifier == "naive_bayes" or options.classifier == "nb"): classifier = naive_bayes.NaiveBayesClassifier(legal_labels) classifier.set_smoothing(options.smoothing) if (options.autotune): print("using automatic tuning for naivebayes") classifier.automatic_tuning = True else: print("using smoothing parameter k=%f for naivebayes" % options.smoothing) elif (options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier( legal_labels, options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legal_labels, options.iterations) elif (options.classifier == "perceptron_numpy"): if options.data != 'pacman': classifier = perceptron_numpy.OptimizedPerceptronClassifier( legal_labels, options.iterations) elif (options.classifier == "logistic"): if options.data != 'pacman': classifier = logistic.SoftmaxClassifier(legal_labels, options.iterations) classifier.learning_rates = options.learning_rates elif (options.classifier == 'minicontest'): import minicontest classifier = minicontest.contest_classifier(legal_labels) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['agent_to_clone'] = options.agent_to_clone args['classifier'] = classifier args['feature_function'] = feature_function args['print_image'] = print_image return args, options
def runClassifier(): global TK_ROOT, SP_CANVAS, LOG_X, LOG_Y # Set up variables according to the command line inputs featureFunction = basicFeatureExtractorDigit legalLabels = range(10) # number of labels # Select classifier classifier = perceptron.PerceptronClassifier(legalLabels) # Load data numTraining = 1 loadImage() rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, 'train', SP_CANVAS) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawTestData = samples.loadDataFile("digitdata/testingimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, 'test', SP_CANVAS) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print rawTestData trainingData = map(basicFeatureExtractorDigit, rawTrainingData) print "cp3" testData = map(basicFeatureExtractorDigit, rawTestData) # Conduct auto training SP_CANVAS.create_text(LOG_X, LOG_Y, text="Auto Training...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15 classifier.train(trainingData, trainingLabels, SP_CANVAS) # Auto Testing # print "Validating..." # guesses = classifier.classify(validationData) # correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) # User Input Testing SP_CANVAS.create_text(LOG_X, LOG_Y, text="Recognizing...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15 guesses = classifier.classify(testData, SP_CANVAS, "usr") # Completion Notify SP_CANVAS.create_text(LOG_X, LOG_Y + 30, text="Completed...", anchor=NW, font=tkFont.Font(size=-14)) LOG_Y += 15
def runClassifier(): """ Harness code for running different classifiers on the face or digit data. This is the main function for classification, and is designed to be invoked from the command line (outside the Python interpreter). Usage: > python dataClassifier.py OR > python dataClassifier.py <data> <classifierName> OR > python dataClassifier.py <data> <classifierName> <featureFunction> OR > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> OR > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> <odds class1 class2> For example: > python dataClassifier.py digits naivebayes basic 1000 would run the naive Bayes classifier on 1000 training examples using the basicFeatureExtractor function, and then test the classifier on the test data. """ print "Doing classification" print "--------------------" # Assign default values for arguments if they are not provided. if (len(sys.argv) == 1): print "No data specified; using digits." sys.argv.append("digits") if (len(sys.argv) == 2): print "No classifier specified; using default." sys.argv.append("mostfrequent") if (len(sys.argv) == 3): print "No feature extraction function specified; using default." sys.argv.append("basic") if (len(sys.argv) == 4): print "No training set size specified; using default." sys.argv.append("100") if (len(sys.argv) == 5): print "Not doing odds ratio computation." sys.argv.append("noodds") # Set up variables according to the command line input. print "data:\t\t" + sys.argv[1] print "classifier:\t\t" + sys.argv[2] print "feature extractor:\t" + sys.argv[3] print "training set size:\t" + sys.argv[4] if ((sys.argv[1] == "digits") & (sys.argv[3] == "basic")): featureFunction = basicFeatureExtractorDigit elif ((sys.argv[1] == "faces") & (sys.argv[3] == "basic")): featureFunction = basicFeatureExtractorFace elif ((sys.argv[1] == "digits") & (sys.argv[3] == "enhanced")): featureFunction = enhancedFeatureExtractorDigit elif ((sys.argv[1] == "faces") & (sys.argv[3] == "enhanced")): featureFunction = enhancedFeatureExtractorFace else: print "Unknown feature function:", sys.argv[2] return if (sys.argv[1] == "digits"): # if digits detect legalLabels = range(10) else: # if face detect legalLabels = range(2) if (sys.argv[2] == "mostfrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (sys.argv[2] == "naivebayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) elif (sys.argv[2] == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels) else: print "Unknown classifier:", sys.argv[2] return # Load data numTraining = int(sys.argv[4]) if (sys.argv[1] == "faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) util.pause() analysis(classifier, guesses, testLabels, rawTestData) # do odds ratio computation if specified at command line if ((sys.argv[5] == "odds") & (len(sys.argv) == 8)): features_class1, features_class2, features_odds = classifier.findHighOddsFeatures( int(sys.argv[6]), int(sys.argv[7])) if (sys.argv[1] == "faces"): printImage(features_class1, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) printImage(features_class2, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) printImage(features_odds, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) else: printImage(features_class1, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) printImage(features_class2, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) printImage(features_odds, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['perceptron'], default='perceptron') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif (options.data == "pacman"): printImage = None featureFunction = basicFeatureExtractorPacman else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if (options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['agentToClone'] = options.agentToClone args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand( argv ): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron','mira'], default='perceptron') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-n', '--analysis', help=default("Shows which data is wrongly predicted"), default=True, action="store_true") parser.add_option('-r', '--random', help=default("Trains the data set using random data and calculates averages for percent accuracy and standard deviation"), default=True, action="store_true") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print ("Doing classification") print ("--------------------") print ("Data:\t\t" + options.data) print ("Classifier:\t\t" + options.classifier) print ("Using enhanced features?:\t" + str(options.features)) if not options.random: print ("Training set size:\t" + str(options.training)) if(options.data=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif(options.data=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print ("Unknown dataset", options.data) print (USAGE_STRING) sys.exit(2) if(options.data=="digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print ("Training set size should be a positive integer (you provided: %d)" % options.training) print (USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print ("Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print (USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print ("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print (USAGE_STRING) sys.exit(2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print ("Using automatic tuning for naivebayes") classifier.automaticTuning = True else: print ("Using smoothing parameter k=%f for naivebayes" % options.smoothing) elif(options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) elif(options.classifier == "mira"): classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print ("Using automatic tuning for MIRA") classifier.automaticTuning = True else: print ("Using default C=0.001 for MIRA") else: print ("Unknown classifier:", options.classifier) print (USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['perceptron', 'bagging', 'boosting'], default='bagging') parser.add_option('-t', '--training', help=default('The size of the training set'), default=1000, type="int") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option( '-v', '--validate', help=default("Whether to validate when training (for graphs)"), default=False, action="store_true") parser.add_option( '-r', '--ratio', help=default( 'The ratio of dataset to be used to train 1 weak classifier'), default=0.1, type=float) parser.add_option( '-n', '--num_classifiers', help=default( 'The number of weak classifier to be trained on each subset of dataset' ), default=10, type=int) parser.add_option( '-b', '--boosting_iteration', help=default('Maximum iterations to run adaboost algorithm'), default=2, type=int) options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "classifier:\t\t" + options.classifier if options.classifier == "bagging": print "num of weak classifier:%d" % options.num_classifiers if options.classifier == "boosting": print "num of boosting iterations:%d" % options.boosting_iteration print "training set size:\t" + str(options.training) printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit legalLabels = [-1, 1] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif (options.classifier == "bagging"): classifier = bagging.BaggingClassifier(legalLabels, options.iterations, perceptron.PerceptronClassifier, options.ratio, options.num_classifiers) elif (options.classifier == "boosting"): classifier = boosting.AdaBoostClassifier( legalLabels, options.iterations, perceptron.PerceptronClassifier, options.boosting_iteration) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier if not options.classifier == 'minicontest': print "using enhanced features?:\t" + str(options.features) else: print "using minicontest feature extractor" print "training set size:\t" + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif (options.data == "pacman"): printImage = None if (options.features): featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % ( options.label1, options.label2) print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif (options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations) elif (options.classifier == "mira"): if options.data != 'pacman': classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" elif (options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['agentToClone'] = options.agentToClone args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def selfRunClassifier(): print "Doing classification" print "--------------------" data_percent = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # print "=================Digits====================" # # # # NaiveBayes part # print "Training by using NaiveBayes Algorithm" # featureFunction = enhancedFeatureExtractorDigit # legalLabels = range(10) # classifier = naiveBayes.NaiveBayesClassifier(legalLabels) # lst_avg_time = [] # lst_avg_acc = [] # lst_std_acc = [] # for percent in data_percent: # print "training set size:\t" + str(percent)+"%" # # print "setSmoothing: k value is ", classifier.k # lst_time = [] # lst_acc = [] # for i in range(5): # start_time = timeit.default_timer() # rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent) # rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) # rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # # print "Extracting features..." # trainingData = map(featureFunction, rawTrainingData) # validationData = map(featureFunction, rawValidationData) # testData = map(featureFunction, rawTestData) # classifier.train(trainingData, trainingLabels, validationData, validationLabels) # elapse = timeit.default_timer() - start_time # lst_time.append(elapse) # guesses = classifier.classify(testData) # correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) # lst_acc.append(float(correct) / len(testLabels)) # print '--------------------------------------------------------------' # lst_avg_time.append(np.mean(lst_time)) # lst_avg_acc.append(np.mean(lst_acc)) # lst_std_acc.append(np.std(lst_acc)) # analysis(lst_avg_time, lst_avg_acc, lst_std_acc) # # Percentron algorithm # print "Training by using Percentron Algorithm" # lst_avg_time = [] # lst_avg_acc = [] # lst_std_acc = [] # for percent in data_percent: # print "training set size:\t" + str(percent)+"%" # # print "setSmoothing: k value is ", classifier.k # lst_time = [] # lst_acc = [] # for i in range(5): # featureFunction = enhancedFeatureExtractorDigit # legalLabels = range(10) # classifier = perceptron.PerceptronClassifier(legalLabels, 3) # start_time = timeit.default_timer() # rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent) # rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) # rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # # print "Extracting features..." # trainingData = map(featureFunction, rawTrainingData) # validationData = map(featureFunction, rawValidationData) # testData = map(featureFunction, rawTestData) # classifier.train(trainingData, trainingLabels, validationData, validationLabels) # elapse = timeit.default_timer() - start_time # lst_time.append(elapse) # guesses = classifier.classify(testData) # correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) # lst_acc.append(float(correct) / len(testLabels)) # print '--------------------------------------------------------------' # lst_avg_time.append(np.mean(lst_time)) # lst_avg_acc.append(np.mean(lst_acc)) # lst_std_acc.append(np.std(lst_acc)) # analysis(lst_avg_time, lst_avg_acc, lst_std_acc) # # # # # # # # K nearest neighbour algorithm # print "Training by using KNN Algorithm" # print "Only use 10% test set" # # featureFunction = enhancedFeatureExtractorDigit # # legalLabels = range(10) # # classifier = Knear.KnearestNeighbourClassifier(legalLabels) # lst_avg_time = [] # lst_avg_acc = [] # lst_std_acc = [] # for percent in data_percent: # print "training set size:\t" + str(percent)+"%" # lst_time = [] # lst_acc = [] # for i in range(5): # start_time = timeit.default_timer() # featureFunction = enhancedFeatureExtractorDigit # legalLabels = range(10) # classifier = Knear.KnearestNeighbourClassifier(legalLabels) # rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent) # rawValidationData = samples.loadDataFile("digitdata/validationimages", 10,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 10) # rawTestData = samples.loadDataFile("digitdata/testimages", 10,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) # testLabels = samples.loadLabelsFile("digitdata/testlabels", 10) # # print "Extracting features..." # trainingData = map(featureFunction, rawTrainingData) # validationData = map(featureFunction, rawValidationData) # testData = map(featureFunction, rawTestData) # # classifier.train(trainingData, trainingLabels, validationData, validationLabels) # elapse = timeit.default_timer() - start_time # # print elapse # lst_time.append(elapse) # guesses = classifier.classify(testData) # correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) # lst_acc.append(float(correct) / len(testLabels)) # print '--------------------------------------------------------------' # lst_avg_time.append(np.mean(lst_time)) # lst_avg_acc.append(np.mean(lst_acc)) # lst_std_acc.append(np.std(lst_acc)) # analysis(lst_avg_time, lst_avg_acc, lst_std_acc) print "" print "=================Faces====================" # # NaiveBayes Algorithm # print "Training by using NaiveBayes Algorithm" # featureFunction = enhancedFeatureExtractorDigit # legalLabels = range(2) # classifier = naiveBayes.NaiveBayesClassifier(legalLabels) # lst_avg_time = [] # lst_avg_acc = [] # lst_std_acc = [] # for percent in data_percent: # print "training set size:\t" + str(percent)+"%" # lst_time = [] # lst_acc = [] # for i in range(5): # start_time = timeit.default_timer() # rawTrainingData = samples.loadDataFile("facedata/facedatatrain", percent,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) # trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", percent) # rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) # validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE) # rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) # testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) # trainingData = map(featureFunction, rawTrainingData) # validationData = map(featureFunction, rawValidationData) # testData = map(featureFunction, rawTestData) # classifier.train(trainingData, trainingLabels, validationData, validationLabels) # elapse = timeit.default_timer() - start_time # lst_time.append(elapse) # guesses = classifier.classify(testData) # correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) # lst_acc.append(float(correct) / len(testLabels)) # print '--------------------------------------------------------------' # lst_avg_time.append(np.mean(lst_time)) # lst_avg_acc.append(np.mean(lst_acc)) # lst_std_acc.append(np.std(lst_acc)) # analysis(lst_avg_time, lst_avg_acc, lst_std_acc) # Perceptron Algorithm print "Training by using Perceptron Algorithm" featureFunction = enhancedFeatureExtractorDigit legalLabels = range(2) classifier = perceptron.PerceptronClassifier(legalLabels, 3) lst_avg_time = [] lst_avg_acc = [] lst_std_acc = [] for percent in data_percent: print "training set size:\t" + str(percent) + "%" lst_time = [] lst_acc = [] for i in range(5): start_time = timeit.default_timer() rawTrainingData = samples.loadDataFile("facedata/facedatatrain", percent, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", percent) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) classifier.train(trainingData, trainingLabels, validationData, validationLabels) elapse = timeit.default_timer() - start_time lst_time.append(elapse) guesses = classifier.classify(testData) correct = [ guesses[i] == testLabels[i] for i in range(len(testLabels)) ].count(True) lst_acc.append(float(correct) / len(testLabels)) print '--------------------------------------------------------------' lst_avg_time.append(np.mean(lst_time)) lst_avg_acc.append(np.mean(lst_acc)) lst_std_acc.append(np.std(lst_acc)) analysis(lst_avg_time, lst_avg_acc, lst_std_acc)