def features1(): trainingSet, trainingComposers, numTrainingComposers, numTrainingPieces = loadData.loadTrainingFiles() trainingFeatureVectors, trainingFeatureNames, trainingComposerNames = extractFeatures.initFeatureVectors(4, trainingSet, len(labels)+1) testingSet, testingComposers, numTestingComposers, numTestingPieces = loadData.loadTestingFiles() testingFeatureVectors, testingFeatureNames, testingComposerNames = extractFeatures.initFeatureVectors(4, testingSet, len(labels)+1) nameList = list() for name in trainingFeatureNames: nameList.append(name) for name in testingFeatureNames: nameList.append(name) print 'begin convertion' trainingData = np.zeros((len(trainingFeatureVectors), len(nameList))) testingData = np.zeros((len(testingFeatureVectors), len(nameList))) for author in range(len(trainingFeatureVectors)): for feature in trainingFeatureVectors[author]: trainingData[author][nameList.index(feature)] = trainingFeatureVectors[author][feature] for author in range(len(testingFeatureVectors)): for feature in testingFeatureVectors[author]: testingData[author][nameList.index(feature)] = testingFeatureVectors[author][feature] print 'done convertion' #data = np.array([[featureVectors[author][feature] for feature in sorted(featureVectors[author])] for author in range(len(featureVectors))]) predictions = bench_svm(trainingData, testingData, trainingComposerNames) printStatistics(predictions, testingComposerNames)
def features2(): trainingSet, trainingComposers, numTrainingComposers, numTrainingPieces = loadData.loadTrainingFiles() trainingData, trainingComposerNames = extractFeatures.initFeatureVectors(trainingSet, numTrainingComposers, numTrainingPieces) testingSet, testingComposers, numTestingComposers, numTestingPieces = loadData.loadTestingFiles() testingFeatureVectors, testingComposerNames = extractFeatures.initFeatureVectors(testingSet, numTestingComposers, numTestingPieces) #trainingComposerNames = [0, 1, 2, 3, 4] #testingComposerNames = [0, 1, 2, 3, 4] predictions = bench_svm(trainingData, testingFeatureVectors, trainingComposerNames) printStatistics(predictions, testingComposerNames, numTrainingComposers)
def features2(): #load data dataSet, dataComposers, numTrainingComposers, numDataPieces = loadData.loadTrainingFiles() data, composerNames = extractFeatures.initFeatureVectors(dataSet, numTrainingComposers, numDataPieces) # split into a training and testing set trainingSet, testingSet, trainingComposerNames, testingComposerNames = train_test_split(data, composerNames, test_size=0.20) print 'training', len(trainingSet), len(trainingComposerNames), 'testing', len(testingSet), len(testingComposerNames) #make predictions predictions = bench_svm(trainingSet, testingSet, trainingComposerNames) reducedTrainingSet, reducedTestingSet = dimReduction(2, trainingSet, testingSet, trainingComposerNames, testingComposerNames) printStatistics(predictions, testingComposerNames) plotData(reducedTrainingSet, trainingComposerNames, [0, 1, 2, 3, 4])