def readCommand( argv ) : "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser( USAGE_STRING ) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['naiveBayes', 'mostFrequent' , 'decisionTree' ] , default='mostFrequent' ) parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['car', 'ionosphere' , 'wine' ], default='car') parser.add_option( '-m' , '--metric' , help = default( 'Division metric' ) , choices = [ 'gini' , 'error' , 'entropy' ] , default = 'error' ) parser.add_option( '-x' , '--maxdepth' , help = default( 'Maximum depth of the tree learned' ) , default = 200 ) options, otherjunk = parser.parse_args( argv ) if len( otherjunk ) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier if( options.data == "car" ) : '' elif( options.data =="ionosphere" ) : '' elif( options.data =="wine" ) : '' else: print "Unknown dataset" , options.data print USAGE_STRING sys.exit( 2 ) legalLabels = [ 0 , 1 ] metric = None if( options.classifier == "decisionTree" or options.classifier == "dt" ) : classifier = decisionTree.DecisionTreeClassifier( legalLabels ) if options.metric == 'gini' : metric = metrics.gini elif options.metric == 'error' : metric = metrics.error elif options.metric == 'entropy' : metric = metrics.entropy else : print "Unknown metric:", options.metric print USAGE_STRING sys.exit(2) elif( options.classifier == "mostFrequent" or options.classifier == "mf" ) : classifier = mostFrequent.MostFrequentClassifier( legalLabels ) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args[ 'metric' ] = metric args[ 'maxdepth' ] = options.maxdepth return args , options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier if not options.classifier == 'minicontest': print "using enhanced features?:\t" + str(options.features) else: print "using minicontest feature extractor" print "training set size:\t" + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif (options.data == "pacman"): printImage = None if (options.features): featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % ( options.label1, options.label2) print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif (options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations) elif (options.classifier == "mira"): if options.data != 'pacman': classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" elif (options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['agentToClone'] = options.agentToClone args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def runClassifier(): """ Harness code for running different classifiers on the face or digit data. This is the main function for classification, and is designed to be invoked from the command line (outside the Python interpreter). Usage: > python dataClassifier.py OR > python dataClassifier.py <data> <classifierName> OR > python dataClassifier.py <data> <classifierName> <featureFunction> OR > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> OR > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> <odds class1 class2> For example: > python dataClassifier.py digits naivebayes basic 1000 would run the naive Bayes classifier on 1000 training examples using the basicFeatureExtractor function, and then test the classifier on the test data. """ print "Doing classification" print "--------------------" # Assign default values for arguments if they are not provided. if (len(sys.argv) == 1): print "No data specified; using digits." sys.argv.append("digits") if (len(sys.argv) == 2): print "No classifier specified; using default." sys.argv.append("mostfrequent") if (len(sys.argv) == 3): print "No feature extraction function specified; using default." sys.argv.append("basic") if (len(sys.argv) == 4): print "No training set size specified; using default." sys.argv.append("100") if (len(sys.argv) == 5): print "Not doing odds ratio computation." sys.argv.append("noodds") # Set up variables according to the command line input. print "data:\t\t" + sys.argv[1] print "classifier:\t\t" + sys.argv[2] print "feature extractor:\t" + sys.argv[3] print "training set size:\t" + sys.argv[4] if ((sys.argv[1] == "digits") & (sys.argv[3] == "basic")): featureFunction = basicFeatureExtractorDigit elif ((sys.argv[1] == "faces") & (sys.argv[3] == "basic")): featureFunction = basicFeatureExtractorFace elif ((sys.argv[1] == "digits") & (sys.argv[3] == "enhanced")): featureFunction = enhancedFeatureExtractorDigit elif ((sys.argv[1] == "faces") & (sys.argv[3] == "enhanced")): featureFunction = enhancedFeatureExtractorFace else: print "Unknown feature function:", sys.argv[2] return if (sys.argv[1] == "digits"): # if digits detect legalLabels = range(10) else: # if face detect legalLabels = range(2) if (sys.argv[2] == "mostfrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (sys.argv[2] == "naivebayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) elif (sys.argv[2] == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels) else: print "Unknown classifier:", sys.argv[2] return # Load data numTraining = int(sys.argv[4]) if (sys.argv[1] == "faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) util.pause() analysis(classifier, guesses, testLabels, rawTestData) # do odds ratio computation if specified at command line if ((sys.argv[5] == "odds") & (len(sys.argv) == 8)): features_class1, features_class2, features_odds = classifier.findHighOddsFeatures( int(sys.argv[6]), int(sys.argv[7])) if (sys.argv[1] == "faces"): printImage(features_class1, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) printImage(features_class2, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) printImage(features_odds, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) else: printImage(features_class1, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) printImage(features_class2, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) printImage(features_odds, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'GDA'], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=450, type="int") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorFace else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif (options.classifier == "GDA"): classifier = gaussianDiscriminantAnalysis.GaussianDiscriminantAnalysisClassifier( legalLabels, "GDA") else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'perceptron', 'mlp', 'svm'], default='mostFrequent') parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAINING_SET_SIZE, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "classifier:\t\t" + options.classifier print "training set size:\t" + str(options.training) printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage featureFunction = basicFeatureExtractorDigit legalLabels = range(10) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "mlp"): classifier = mlp.MLPClassifier(legalLabels, options.iterations) elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif (options.classifier == "svm"): classifier = svm.SVMClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): """Processes the command used to run from the command line.""" from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'nearestNeighbors', 'perceptron', 'mira', 'minicontest' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option( '-n', '--k_number_of_neighbors', help=default("Number of neighbors to search (For nearestNeighbors)"), default=3, type="int") parser.add_option( '-x', '--specialMode', help=default( "Trains in increments of random 10% of data and displays results"), default=False, action="store_true") parser.add_option('-y', '--analysis', help=default("Shows which data is wrongly predicted"), default=False, action="store_true") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) if not options.classifier == 'minicontest': print("using enhanced features?:\t" + str(options.features)) else: print("using minicontest feature extractor") print("training set size:\t" + str(options.training)) if not options.specialMode: print("training set size:\t" + str(options.training)) if options.data == "digits": printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if options.classifier == 'minicontest': featureFunction = contestFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if options.k_number_of_neighbors <= 0: print( "Please provide a positive number for neighbors (you provided: %f)" % options.k_number_of_neighbors) print(USAGE_STRING) sys.exit(2) if options.classifier == "mostFrequent": classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif options.classifier == "nearestNeighbors": classifier = nearestNeighbors.NNClassifier(legalLabels) elif options.classifier == "naiveBayes" or options.classifier == "nb": classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if options.autotune: print("using automatic tuning for naivebayes") classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes" % options.smoothing) elif options.classifier == "perceptron": classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "mira": classifier = mira.MiraClassifier(legalLabels, options.iterations) if options.autotune: print("using automatic tuning for MIRA") classifier.automaticTuning = True else: print("using default C=0.001 for MIRA") elif options.classifier == 'minicontest': import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): """Processes the command used to run from the command line.""" from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-r', '--run', help=default('automatically runs training and test cycle for 5 times'), default= False, action='store_true') parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'naiveBayes', 'perceptron', 'knn'], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The ratio of the training set to use'), default=1.0, type="float") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) print("using enhanced features?:\t" + str(options.features)) if options.data == "digits": printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print("Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print("Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if options.classifier == "mostFrequent": classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif options.classifier == "naiveBayes": classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if options.autotune: print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes" % options.smoothing) elif options.classifier == "perceptron": classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "knn": classifier = knn.KNN(legalLabels) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand( argv ): """ Processes the command used to run from the command line. """ import getopt # Set default options options = {'classifier': 'mostfrequent', 'data': 'digits', 'enhancedFeatures': False, 'train': 100, 'odds': False, 'class1': 1, 'class2': 0, 'smoothing': 1, 'automaticTuning' : False, 'maxIterations': 3} args = {} # This dictionary will hold the objects used by the main method # Read input from the command line commands = ['help', 'classifer=', 'data=', 'train=', 'enhancedFeatures', 'odds', 'class1=', 'class2=', 'smoothing=', 'automaticTuning' 'maxIterations='] try: opts = getopt.getopt( argv, "hc:d:t:fo1:2:k:ai:", commands ) except getopt.GetoptError: print USAGE_STRING sys.exit( 2 ) for option, value in opts[0]: if option in ['--help', '-h']: print USAGE_STRING sys.exit( 0 ) if option in ['--classifier', '-c']: options['classifier'] = value if option in ['--data', '-d']: options['data'] = value if option in ['--train', '-t']: options['train'] = int(value) if option in ['--enhancedFeatures', '-f']: options['enhancedFeatures'] = True if option in ['--odds', '-o']: options['odds'] = True if option in ['--class1', '-1']: options['class1'] = int(value) if option in ['--class2', '-2']: options['class2'] = int(value) if option in ['--smoothing', '-k']: options['smoothing'] = float( value ) if option in ['--automaticTuning', '-a']: options['automaticTuning'] = True if option in ['--maxIterations', '-i']: options['maxIterations'] = int(value) # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options['data'] print "classifier:\t\t" + options['classifier'] print "using enhanced features?:\t" + str(options['enhancedFeatures']) print "training set size:\t" + str(options['train']) if(options['data']=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options['enhancedFeatures']): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit elif(options['data']=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options['enhancedFeatures']): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print "Unknown dataset", options['data'] print USAGE_STRING sys.exit(2) if(options['data']=="digits"): legalLabels = range(10) else: legalLabels = range(2) if options['train'] <= 0: print "Training set size should be a positive integer (you provided: %d)" % options['train'] print USAGE_STRING sys.exit(2) if options['smoothing'] <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options['smoothing'] print USAGE_STRING sys.exit(2) if options['odds']: for className in ['class1','class2']: if options[className] not in legalLabels: print "Didn't provide a legal labels for the odds ratio for %s" % className print USAGE_STRING sys.exit(2) if(options['classifier'] == "mostfrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif(options['classifier'] == "naivebayes"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options['smoothing']) if (options['automaticTuning']): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options['smoothing'] elif(options['classifier'] == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,options['maxIterations']) elif(options['classifier'] == "mira"): classifier = mira.MiraClassifier(legalLabels, options['maxIterations']) if (options['automaticTuning']): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" else: print "Unknown classifier:", options['classifier'] print USAGE_STRING sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option( "-c", "--classifier", help=default("The type of classifier"), choices=[ "mostFrequent", "perceptron", ], default="mostFrequent", ) parser.add_option( "-d", "--data", help=default("Dataset to use"), choices=["digits", "faces", "pacman"], default="digits", ) parser.add_option( "-t", "--training", help=default("The size of the training set"), default=100, type="int", ) parser.add_option( "-f", "--features", help=default("Whether to use enhanced features"), default=False, action="store_true", ) parser.add_option( "-o", "--odds", help=default("Whether to compute odds ratios"), default=False, action="store_true", ) parser.add_option( "-1", "--label1", help=default("First label in an odds ratio comparison"), default=0, type="int", ) parser.add_option( "-2", "--label2", help=default("Second label in an odds ratio comparison"), default=1, type="int", ) parser.add_option( "-w", "--weights", help=default("Whether to print weights"), default=False, action="store_true", ) parser.add_option( "-k", "--smoothing", help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0, ) parser.add_option( "-a", "--autotune", help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true", ) parser.add_option( "-i", "--iterations", help=default("Maximum iterations to run training"), default=3, type="int", ) parser.add_option( "-s", "--test", help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int", ) parser.add_option( "-g", "--agentToClone", help=default("Pacman agent to copy"), default=None, type="str", ) options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception( "Command line input not understood: " + str(otherjunk) ) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) print("using enhanced features?:\t" + str(options.features)) print("training set size:\t" + str(options.training)) if options.data == "digits": printImage = ImagePrinter( DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT ).printImage featureFunction = basicFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter( FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT ).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif options.data == "pacman": printImage = None if options.features: featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = list(range(10)) else: legalLabels = ["Stop", "West", "East", "North", "South"] if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training ) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing ) print(USAGE_STRING) sys.exit(2) if options.odds: if ( options.label1 not in legalLabels or options.label2 not in legalLabels ): print( "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2) ) print(USAGE_STRING) sys.exit(2) if options.classifier == "mostFrequent": classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif options.classifier == "perceptron": if options.data != "pacman": classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations ) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations ) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args["agentToClone"] = options.agentToClone args["classifier"] = classifier args["featureFunction"] = featureFunction args["printImage"] = printImage return args, options
print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2) print USAGE_STRING sys.exit(2) if(options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif(options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations) elif(options.classifier == "mira"): if options.data != 'pacman':