def crossValidationPositions(): ''' Performs 10 fold cross validation on the total joint position dataset ''' theData = generateAllPositionTrainingData() means, stdDevs = theData.normalizeData() k = 10 #Partition the data into 10 subsets dataSets = theData.getKSegments(k) #For each of the 10 subsets leave one out, train on the # other 9, test on the one left out, print the accuracy. results = confusionMatrix(labels) for i in xrange(k): print i #testing set testSet = dataSets[i] #Build the training set trainingSet = TrainingData("CrossVal") trainingList = copy.deepcopy(dataSets) trainingList.pop(i) for elem in trainingList: trainingSet.combineWithNewData(elem) #train the classifier on the trainingSet testForest = RandomForest(trainingSet) testForest.train() #Evaluate the classifer on the test set for samp in testSet.getData(): resultLabel = testForest.classify(samp) trueLabel = samp.getLabel() results.update(trueLabel, resultLabel) results.printMatrix()
def twoVsOneAngles(): ''' Trains a random forest on the data from participants 1 and 2 and tests it on participant 3. The data used here uses the angle features ''' theData = generateTwoAngleTrainingData() testForest = RandomForest(theData) print "Training" testForest.train() print "Done!" testList = generateOneTestAngleData() results = confusionMatrix(labels) for samp in testList: resultLabel = testForest.classify(samp) trueLabel = samp.getLabel() results.update(trueLabel, resultLabel) results.printMatrix()
def oneVsTwoPositions(): ''' Trains a random forest on the data from participant 1 and tests it on participant 2 and 3. The data used here uses the position features ''' theData = generateOneTrainPositionData() means, stdDevs = theData.normalizeData() testForest = RandomForest(theData) print "Training" testForest.train() print "Done!" testList = generateTwoTestPositionData(means, stdDevs) results = confusionMatrix(labels) for samp in testList: resultLabel = testForest.classify(samp) trueLabel = samp.getLabel() results.update(trueLabel, resultLabel) results.printMatrix()
from Dataset import Dataset from RandomForest import RandomForest import ClassifierStats as stats dataset_path = 'synthetic.social' train = Dataset.from_file('../data/{}.train'.format(dataset_path)) test = Dataset.from_file('../data/{}.test'.format(dataset_path)) model = RandomForest(num_trees=100, max_depth=100, bagging_data_fraction=0.4) model.train(train) predictions = model.classify(test) accuracy = stats.accuracy(test.labels, predictions) print(accuracy)