def saveWeakClassifierInformation(targetPath, trainingPath, testPath, parameterByName, extraBySectionByOption): # Assemble valueBySectionByOption = { 'classifierModule': {'moduleName': classifier.__name__}, 'classifier': parameterByName, 'trainingSet': {'sampleCount': classifier.getSampleCount(trainingPath)}, 'testSet': {'sampleCount': classifier.getSampleCount(testPath)}, } valueBySectionByOption.update(extraBySectionByOption) # Save store.saveInformation(targetPath, valueBySectionByOption)
def train(targetClassifierPath, trainingPath, testPath, parameterByName): # Set paths store.makeFolderSafely(targetClassifierPath) # Count the number of training samples trainingCount = classifier.getSampleCount(trainingPath) # Initialize weights uniformly over training samples sampleWeights = numpy.array([1 / float(trainingCount)] * trainingCount) # Initialize alphas = []; boostCount = parameterByName['boost count'] # For each boost, for boostIndex in xrange(boostCount): # Show feedback print '--- Boosting %d/%d ---' % (boostIndex + 1, boostCount) # Normalize sampleWeights sampleWeights = sampleWeights / float(sum(sampleWeights)) # Weight samples weightedTrainingPath, sampleMultipliers = weightSamples(trainingPath, sampleWeights) # Train weak classifier weakClassifierPath = os.path.join(targetClassifierPath, 'weak%d' % boostIndex) weakResultByName = classifier.train(weakClassifierPath, weightedTrainingPath, testPath, parameterByName) shutil.rmtree(os.path.split(weightedTrainingPath)[0]) # Test weak classifier on training set predictedLabels = classifier.test(weakClassifierPath, trainingPath) actualLabels = classifier.loadLabels(trainingPath) errorDistribution = predictedLabels != actualLabels # Compute training error weighted according to sampleWeights weightedError = numpy.dot(sampleWeights, errorDistribution) # Prevent zero division or zero log if weightedError == 0: weightedError = 1e-9 elif weightedError == 1: weightedError = 1 - 1e-9 # Get alpha from training error alpha = 0.5 * numpy.log(float(1 - weightedError) / weightedError) coefficients = [numpy.exp(alpha) if isWrong else numpy.exp(-alpha) for isWrong in errorDistribution] # Save weakClassifier performance print 'weighted training error = %s' % (100 * weightedError) print 'alpha = %s' % alpha weakResultByName['alpha'] = alpha weakResultByName['weighted training error'] = weightedError weakResultByName['sample multipliers'] = ' '.join(str(x) for x in sampleMultipliers) weakResultByName['error distribution'] = ''.join(str(int(x)) for x in errorDistribution) saveWeakClassifierInformation(weakClassifierPath, trainingPath, testPath, parameterByName, {'performance': weakResultByName}) # Update sampleWeights sampleWeights = coefficients * sampleWeights # Append alphas.append(alpha) # Save classifier save(targetClassifierPath, alphas) # Return evaluation return evaluate(targetClassifierPath, testPath)
def getSampleCount(filePath): return classifier.getSampleCount(filePath)