示例#1
0
def readCommand( argv ) :
	"Processes the command used to run from the command line."
	from optparse import OptionParser
	parser = OptionParser( USAGE_STRING )

	parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['naiveBayes', 'mostFrequent' , 'decisionTree' ] , default='mostFrequent' )
	parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['car', 'ionosphere' , 'wine' ], default='car')
	parser.add_option( '-m' , '--metric' , help = default( 'Division metric' ) , choices = [ 'gini' , 'error' , 'entropy' ] , default = 'error' )
	parser.add_option( '-x' , '--maxdepth' , help = default( 'Maximum depth of the tree learned' ) , default = 200 )

	options, otherjunk = parser.parse_args( argv )
	if len( otherjunk ) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
	args = {}

	# Set up variables according to the command line input.
	print "Doing classification"
	print "--------------------"
	print "data:\t\t" + options.data
	print "classifier:\t\t" + options.classifier
	if( options.data == "car" ) : ''
	elif( options.data =="ionosphere" ) : ''
	elif( options.data =="wine" ) : ''
	else:
		print "Unknown dataset" , options.data
		print USAGE_STRING
		sys.exit( 2 )

	legalLabels = [ 0 , 1 ]

	metric = None
	if( options.classifier == "decisionTree" or options.classifier == "dt" ) :
		classifier = decisionTree.DecisionTreeClassifier( legalLabels )
		if options.metric == 'gini' :
			metric = metrics.gini
		elif options.metric == 'error' :
			metric = metrics.error
		elif options.metric == 'entropy' :
			metric = metrics.entropy
		else :
			print "Unknown metric:", options.metric
			print USAGE_STRING
			sys.exit(2)
	elif( options.classifier == "mostFrequent" or options.classifier == "mf" ) :
		classifier = mostFrequent.MostFrequentClassifier( legalLabels )
	else:
		print "Unknown classifier:", options.classifier
		print USAGE_STRING
		sys.exit(2)

	args['classifier'] = classifier
	args[ 'metric' ] = metric
	args[ 'maxdepth' ] = options.maxdepth
	return args , options
示例#2
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes', 'perceptron',
                          'mira', 'minicontest'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces', 'pacman'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-g',
                      '--agentToClone',
                      help=default("Pacman agent to copy"),
                      default=None,
                      type="str")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    if not options.classifier == 'minicontest':
        print "using enhanced features?:\t" + str(options.features)
    else:
        print "using minicontest feature extractor"
    print "training set size:\t" + str(options.training)
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif (options.data == "pacman"):
        printImage = None
        if (options.features):
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (
                options.label1, options.label2)
            print USAGE_STRING
            sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" % options.smoothing
    elif (options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations)
    elif (options.classifier == "mira"):
        if options.data != 'pacman':
            classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print "using automatic tuning for MIRA"
            classifier.automaticTuning = True
        else:
            print "using default C=0.001 for MIRA"
    elif (options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['agentToClone'] = options.agentToClone

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
示例#3
0
def runClassifier():
    """
  Harness code for running different classifiers on the face or digit data.
  
  This is the main function for classification, and is designed
  to be invoked from the command line (outside the Python interpreter).
  
  Usage:
    > python dataClassifier.py 
    OR
    > python dataClassifier.py <data> <classifierName>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> <odds class1 class2>
    
  For example:
    > python dataClassifier.py digits naivebayes basic 1000
    
  would run the naive Bayes classifier on 1000 training examples using the
  basicFeatureExtractor function, and then test the classifier on the test data.
  """
    print "Doing classification"
    print "--------------------"
    # Assign default values for arguments if they are not provided.
    if (len(sys.argv) == 1):
        print "No data specified; using digits."
        sys.argv.append("digits")
    if (len(sys.argv) == 2):
        print "No classifier specified; using default."
        sys.argv.append("mostfrequent")
    if (len(sys.argv) == 3):
        print "No feature extraction function specified; using default."
        sys.argv.append("basic")
    if (len(sys.argv) == 4):
        print "No training set size specified; using default."
        sys.argv.append("100")
    if (len(sys.argv) == 5):
        print "Not doing odds ratio computation."
        sys.argv.append("noodds")

    # Set up variables according to the command line input.
    print "data:\t\t" + sys.argv[1]
    print "classifier:\t\t" + sys.argv[2]
    print "feature extractor:\t" + sys.argv[3]
    print "training set size:\t" + sys.argv[4]
    if ((sys.argv[1] == "digits") & (sys.argv[3] == "basic")):
        featureFunction = basicFeatureExtractorDigit
    elif ((sys.argv[1] == "faces") & (sys.argv[3] == "basic")):
        featureFunction = basicFeatureExtractorFace
    elif ((sys.argv[1] == "digits") & (sys.argv[3] == "enhanced")):
        featureFunction = enhancedFeatureExtractorDigit
    elif ((sys.argv[1] == "faces") & (sys.argv[3] == "enhanced")):
        featureFunction = enhancedFeatureExtractorFace
    else:
        print "Unknown feature function:", sys.argv[2]
        return

    if (sys.argv[1] == "digits"):  # if digits detect
        legalLabels = range(10)
    else:  # if face detect
        legalLabels = range(2)

    if (sys.argv[2] == "mostfrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (sys.argv[2] == "naivebayes"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    elif (sys.argv[2] == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels)
    else:
        print "Unknown classifier:", sys.argv[2]
        return

    # Load data
    numTraining = int(sys.argv[4])

    if (sys.argv[1] == "faces"):
        rawTrainingData = samples.loadDataFile("facedata/facedatatrain",
                                               numTraining, FACE_DATUM_WIDTH,
                                               FACE_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("facedata/facedatatrain",
                                                 TEST_SET_SIZE,
                                                 FACE_DATUM_WIDTH,
                                                 FACE_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile(
            "facedata/facedatatrainlabels", TEST_SET_SIZE)
        rawTestData = samples.loadDataFile("facedata/facedatatest",
                                           TEST_SET_SIZE, FACE_DATUM_WIDTH,
                                           FACE_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("facedata/facedatatestlabels",
                                            TEST_SET_SIZE)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               numTraining, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 TEST_SET_SIZE,
                                                 DIGIT_DATUM_WIDTH,
                                                 DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  TEST_SET_SIZE)
        rawTestData = samples.loadDataFile("digitdata/testimages",
                                           TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels",
                                            TEST_SET_SIZE)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [
        guesses[i] == validationLabels[i] for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
                         " (%.1f%%).") % (100.0 * correct /
                                          len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    util.pause()
    analysis(classifier, guesses, testLabels, rawTestData)

    # do odds ratio computation if specified at command line
    if ((sys.argv[5] == "odds") & (len(sys.argv) == 8)):
        features_class1, features_class2, features_odds = classifier.findHighOddsFeatures(
            int(sys.argv[6]), int(sys.argv[7]))
        if (sys.argv[1] == "faces"):
            printImage(features_class1, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
            printImage(features_class2, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
            printImage(features_odds, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
        else:
            printImage(features_class1, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
            printImage(features_class2, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
            printImage(features_odds, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['mostFrequent', 'nb', 'naiveBayes', 'GDA'],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=450,
                      type="int")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    print "training set size:\t" + str(options.training)
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        featureFunction = basicFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        featureFunction = basicFeatureExtractorFace
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" % options.smoothing
    elif (options.classifier == "GDA"):
        classifier = gaussianDiscriminantAnalysis.GaussianDiscriminantAnalysisClassifier(
            legalLabels, "GDA")
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
示例#5
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['mostFrequent', 'perceptron', 'mlp', 'svm'],
                      default='mostFrequent')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=TRAINING_SET_SIZE,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "classifier:\t\t" + options.classifier
    print "training set size:\t" + str(options.training)

    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    featureFunction = basicFeatureExtractorDigit
    legalLabels = range(10)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "mlp"):
        classifier = mlp.MLPClassifier(legalLabels, options.iterations)
    elif (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif (options.classifier == "svm"):
        classifier = svm.SVMClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
示例#6
0
def readCommand(argv):
    """Processes the command used to run from the command line."""
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes',
                          'nearestNeighbors', 'perceptron', 'mira',
                          'minicontest'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option(
        '-n',
        '--k_number_of_neighbors',
        help=default("Number of neighbors to search (For nearestNeighbors)"),
        default=3,
        type="int")
    parser.add_option(
        '-x',
        '--specialMode',
        help=default(
            "Trains in increments of random 10% of data and displays results"),
        default=False,
        action="store_true")
    parser.add_option('-y',
                      '--analysis',
                      help=default("Shows which data is wrongly predicted"),
                      default=False,
                      action="store_true")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    if not options.classifier == 'minicontest':
        print("using enhanced features?:\t" + str(options.features))
    else:
        print("using minicontest feature extractor")
    print("training set size:\t" + str(options.training))
    if not options.specialMode:
        print("training set size:\t" + str(options.training))
    if options.data == "digits":
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if options.classifier == 'minicontest':
            featureFunction = contestFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if options.k_number_of_neighbors <= 0:
        print(
            "Please provide a positive number for neighbors (you provided: %f)"
            % options.k_number_of_neighbors)
        print(USAGE_STRING)
        sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "nearestNeighbors":
        classifier = nearestNeighbors.NNClassifier(legalLabels)
    elif options.classifier == "naiveBayes" or options.classifier == "nb":
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if options.autotune:
            print("using automatic tuning for naivebayes")
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" %
                  options.smoothing)
    elif options.classifier == "perceptron":
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif options.classifier == "mira":
        classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if options.autotune:
            print("using automatic tuning for MIRA")
            classifier.automaticTuning = True
        else:
            print("using default C=0.001 for MIRA")
    elif options.classifier == 'minicontest':
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
示例#7
0
def readCommand(argv):
    """Processes the command used to run from the command line."""
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)


    parser.add_option('-r', '--run',  help=default('automatically runs training and test cycle for 5 times'),
                      default= False, action='store_true')

    parser.add_option('-c', '--classifier', help=default('The type of classifier'),
                      choices=['mostFrequent', 'naiveBayes', 'perceptron', 'knn'],
                      default='mostFrequent')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits')
    parser.add_option('-t', '--training', help=default('The ratio of the training set to use'), default=1.0,
                      type="float")
    parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False,
                      action="store_true")
    parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False,
                      action="store_true")
    parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int")
    parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int")
    parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"),
                      type="float", default=2.0)
    parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False,
                      action="store_true")
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    print("using enhanced features?:\t" + str(options.features))

    if options.data == "digits":
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print("Training set size should be a positive integer (you provided: %d)" % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print("Please provide a positive number for smoothing (you provided: %f)" % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "naiveBayes":
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if options.autotune:
            print
            "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" % options.smoothing)
    elif options.classifier == "perceptron":
        classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations)

    elif options.classifier == "knn":
        classifier = knn.KNN(legalLabels)

    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def readCommand( argv ):
  """
  Processes the command used to run from the command line.
  """
  import getopt

  # Set default options
  options = {'classifier': 'mostfrequent', 
             'data': 'digits', 
             'enhancedFeatures': False,
             'train': 100,
             'odds': False,
             'class1': 1,
             'class2': 0,
             'smoothing': 1,
             'automaticTuning' : False,
             'maxIterations': 3}
             
  args = {} # This dictionary will hold the objects used by the main method
  
  # Read input from the command line
  commands = ['help', 
              'classifer=', 
              'data=',
              'train=', 
              'enhancedFeatures', 
              'odds',
              'class1=',
              'class2=',
              'smoothing=',
              'automaticTuning'
              'maxIterations=']
  try:
    opts = getopt.getopt( argv, "hc:d:t:fo1:2:k:ai:", commands )
  except getopt.GetoptError:
    print USAGE_STRING
    sys.exit( 2 )
    
  for option, value in opts[0]:
    if option in ['--help', '-h']:
      print USAGE_STRING
      sys.exit( 0 )
    if option in ['--classifier', '-c']:
      options['classifier'] = value
    if option in ['--data', '-d']:
      options['data'] = value
    if option in ['--train', '-t']:
      options['train'] = int(value)
    if option in ['--enhancedFeatures', '-f']:
      options['enhancedFeatures'] = True
    if option in ['--odds', '-o']:
      options['odds'] = True
    if option in ['--class1', '-1']:
      options['class1'] = int(value)
    if option in ['--class2', '-2']:
      options['class2'] = int(value)
    if option in ['--smoothing', '-k']:
      options['smoothing'] = float( value )
    if option in ['--automaticTuning', '-a']:
      options['automaticTuning'] = True
    if option in ['--maxIterations', '-i']:
      options['maxIterations'] = int(value)
    
  # Set up variables according to the command line input.
  print "Doing classification"
  print "--------------------"
  print "data:\t\t" + options['data']
  print "classifier:\t\t" + options['classifier']
  print "using enhanced features?:\t" + str(options['enhancedFeatures'])
  print "training set size:\t" + str(options['train'])
  if(options['data']=="digits"):
    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    if (options['enhancedFeatures']):
      featureFunction = enhancedFeatureExtractorDigit
    else:
      featureFunction = basicFeatureExtractorDigit
  elif(options['data']=="faces"):
    printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
    if (options['enhancedFeatures']):
      featureFunction = enhancedFeatureExtractorFace
    else:
      featureFunction = basicFeatureExtractorFace      
  else:
    print "Unknown dataset", options['data']
    print USAGE_STRING
    sys.exit(2)
    
  if(options['data']=="digits"):
    legalLabels = range(10)
  else:
    legalLabels = range(2)
    
  if options['train'] <= 0:
    print "Training set size should be a positive integer (you provided: %d)" % options['train']
    print USAGE_STRING
    sys.exit(2)
    
  if options['smoothing'] <= 0:
    print "Please provide a positive number for smoothing (you provided: %f)" % options['smoothing']
    print USAGE_STRING
    sys.exit(2)
    
  if options['odds']:
    for className in ['class1','class2']:
      if options[className] not in legalLabels:
        print "Didn't provide a legal labels for the odds ratio for %s" % className
        print USAGE_STRING
        sys.exit(2)

  if(options['classifier'] == "mostfrequent"):
    classifier = mostFrequent.MostFrequentClassifier(legalLabels)
  elif(options['classifier'] == "naivebayes"):
    classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    classifier.setSmoothing(options['smoothing'])
    if (options['automaticTuning']):
        print "using automatic tuning for naivebayes"
        classifier.automaticTuning = True
    else:
        print "using smoothing parameter k=%f for naivebayes" %  options['smoothing']
  elif(options['classifier'] == "perceptron"):
    classifier = perceptron.PerceptronClassifier(legalLabels,options['maxIterations'])
  elif(options['classifier'] == "mira"):
    classifier = mira.MiraClassifier(legalLabels, options['maxIterations'])
    if (options['automaticTuning']):
        print "using automatic tuning for MIRA"
        classifier.automaticTuning = True
    else:
        print "using default C=0.001 for MIRA"
  else:
    print "Unknown classifier:", options['classifier']
    print USAGE_STRING
    sys.exit(2)

  args['classifier'] = classifier
  args['featureFunction'] = featureFunction
  args['printImage'] = printImage
  
  return args, options
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser

    parser = OptionParser(USAGE_STRING)

    parser.add_option(
        "-c",
        "--classifier",
        help=default("The type of classifier"),
        choices=[
            "mostFrequent",
            "perceptron",
        ],
        default="mostFrequent",
    )
    parser.add_option(
        "-d",
        "--data",
        help=default("Dataset to use"),
        choices=["digits", "faces", "pacman"],
        default="digits",
    )
    parser.add_option(
        "-t",
        "--training",
        help=default("The size of the training set"),
        default=100,
        type="int",
    )
    parser.add_option(
        "-f",
        "--features",
        help=default("Whether to use enhanced features"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-o",
        "--odds",
        help=default("Whether to compute odds ratios"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-1",
        "--label1",
        help=default("First label in an odds ratio comparison"),
        default=0,
        type="int",
    )
    parser.add_option(
        "-2",
        "--label2",
        help=default("Second label in an odds ratio comparison"),
        default=1,
        type="int",
    )
    parser.add_option(
        "-w",
        "--weights",
        help=default("Whether to print weights"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-k",
        "--smoothing",
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0,
    )
    parser.add_option(
        "-a",
        "--autotune",
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-i",
        "--iterations",
        help=default("Maximum iterations to run training"),
        default=3,
        type="int",
    )
    parser.add_option(
        "-s",
        "--test",
        help=default("Amount of test data to use"),
        default=TEST_SET_SIZE,
        type="int",
    )
    parser.add_option(
        "-g",
        "--agentToClone",
        help=default("Pacman agent to copy"),
        default=None,
        type="str",
    )

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception(
            "Command line input not understood: " + str(otherjunk)
        )
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    print("using enhanced features?:\t" + str(options.features))
    print("training set size:\t" + str(options.training))
    if options.data == "digits":
        printImage = ImagePrinter(
            DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
        ).printImage
        featureFunction = basicFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(
            FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT
        ).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif options.data == "pacman":
        printImage = None
        if options.features:
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = list(range(10))
    else:
        legalLabels = ["Stop", "West", "East", "North", "South"]

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training
        )
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing
        )
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if (
            options.label1 not in legalLabels
            or options.label2 not in legalLabels
        ):
            print(
                "Didn't provide a legal labels for the odds ratio: (%d,%d)"
                % (options.label1, options.label2)
            )
            print(USAGE_STRING)
            sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "perceptron":
        if options.data != "pacman":
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations
            )
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations
            )
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args["agentToClone"] = options.agentToClone

    args["classifier"] = classifier
    args["featureFunction"] = featureFunction
    args["printImage"] = printImage

    return args, options
示例#10
0
        print USAGE_STRING
        sys.exit(2)
    
    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)
    
    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)
            print USAGE_STRING
            sys.exit(2)

if(options.classifier == "mostFrequent"):
    classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif(options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" %  options.smoothing
elif(options.classifier == "perceptron"):
    if options.data != 'pacman':
        classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations)
elif(options.classifier == "mira"):
    if options.data != 'pacman':