def readContestData(trainingSize=100, testSize=100):
    rootdata = 'pacmandata'
    rawTrainingData, trainingLabels = samples.loadPacmanData(rootdata + '/contest_training.pkl', trainingSize)
    rawValidationData, validationLabels = samples.loadPacmanData(rootdata + '/contest_validation.pkl', testSize)
    rawTestData, testLabels = samples.loadPacmanData(rootdata + '/contest_test.pkl', testSize)
    trainingData = []
    validationData = []
    testData = []
    return (trainingData, trainingLabels, validationData, validationLabels, rawTrainingData, rawValidationData, testData, testLabels, rawTestData)
Exemplo n.º 2
0
def readSuicideData(trainingSize=100, testSize=100):
    rootdata = "pacmandata"
    rawTrainingData, trainingLabels = samples.loadPacmanData(rootdata + "/suicide_training.pkl", trainingSize)
    rawValidationData, validationLabels = samples.loadPacmanData(rootdata + "/suicide_validation.pkl", testSize)
    rawTestData, testLabels = samples.loadPacmanData(rootdata + "/suicide_test.pkl", testSize)
    trainingData = []
    validationData = []
    testData = []
    return (
        trainingData,
        trainingLabels,
        validationData,
        validationLabels,
        rawTrainingData,
        rawValidationData,
        testData,
        testLabels,
        rawTestData,
    )
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']
    
    # Load data
    numTraining = options.training
    numTest = options.test

    if(options.data=="pacman"):
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None))
        trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2]
        rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining)
        rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest)
        rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)


    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
    analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)

    # do odds ratio computation if specified at command line
    if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
        label1, label2 = options.label1, options.label2
        features_odds = classifier.findHighOddsFeatures(label1,label2)
        if(options.classifier == "naiveBayes" or options.classifier == "nb"):
            string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
        else:
            string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)

        print string3
        printImage(features_odds)

    if((options.weights) & (options.classifier == "perceptron")):
        for l in classifier.legalLabels:
            features_weights = classifier.findHighWeightFeatures(l)
            print ("=== Features with high weight for label %d ==="%l)
            printImage(features_weights)
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']

    # Load data
    numTraining = options.training
    numTest = options.test

    if (options.data == "pacman"):
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(
            agentToClone, (None, None, None))
        trainingData = trainingData or args.get(
            'trainingData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get(
            'validationData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[
            'ContestAgent'][2]
        rawTrainingData, trainingLabels = samples.loadPacmanData(
            trainingData, numTraining)
        rawValidationData, validationLabels = samples.loadPacmanData(
            validationData, numTest)
        rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               numTraining, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 numTest, DIGIT_DATUM_WIDTH,
                                                 DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,
                                           DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [
        guesses[i] == validationLabels[i] for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
                         " (%.1f%%).") % (100.0 * correct /
                                          len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    analysis(classifier, guesses, testLabels, testData, rawTestData,
             printImage)

    # do odds ratio computation if specified at command line
    if ((options.odds) & (options.classifier == "naiveBayes" or
                          (options.classifier == "nb"))):
        label1, label2 = options.label1, options.label2
        features_odds = classifier.findHighOddsFeatures(label1, label2)
        if (options.classifier == "naiveBayes" or options.classifier == "nb"):
            string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (
                label1, label2)
        else:
            string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (
                label1, label2)

        print string3
        printImage(features_odds)

    if ((options.weights) & (options.classifier == "perceptron")):
        for l in classifier.legalLabels:
            features_weights = classifier.findHighWeightFeatures(l)
            print("=== Features with high weight for label %d ===" % l)
            printImage(features_weights)
Exemplo n.º 5
0
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']

    # Load data
    numTraining = options.training
    numTest = options.test

    if (options.data == "pacman"):
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(
            agentToClone, (None, None, None))
        trainingData = trainingData or args.get(
            'trainingData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get(
            'validationData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[
            'ContestAgent'][2]
        rawTrainingData, trainingLabels = samples.loadPacmanData(
            trainingData, numTraining)
        rawValidationData, validationLabels = samples.loadPacmanData(
            validationData, numTest)
        rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               numTraining, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 numTest, DIGIT_DATUM_WIDTH,
                                                 DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,
                                           DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [
        guesses[i] == validationLabels[i] for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
                         " (%.1f%%).") % (100.0 * correct /
                                          len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
Exemplo n.º 6
0
def runClassifier(args, options):
    classifier = args['classifier']

    # Load data
    if options.data == "pacman":
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(
            agentToClone, (None, None, None))
        trainingData = trainingData or args.get(
            'trainingData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get(
            'validationData',
            False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[
            'ContestAgent'][2]

        trainingData, trainingLabels = samples.loadPacmanData(
            trainingData, options.training)
        validationData, validationLabels = samples.loadPacmanData(
            validationData, options.validation)
        testData, testLabels = samples.loadPacmanData(testData, None)

    elif options.data == "digits":
        if options.training is None:
            options.training = 2000
        if options.validation is None:
            options.validation = 1000
        numTest = 1000

        trainingData = samples.loadDataFile("digitdata/trainingimages",
                                            options.training,
                                            DIGIT_DATUM_WIDTH,
                                            DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                options.training)

        validationData = samples.loadDataFile("digitdata/validationimages",
                                              options.validation,
                                              DIGIT_DATUM_WIDTH,
                                              DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  options.validation)

        testData = samples.loadDataFile("digitdata/testimages", numTest,
                                        DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)

    else:
        raise ValueError('unrecognized dataset %r' % options.data)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))