sortedperfdata, perfdata = evaluate.joust( contenders, runset, folds=3 ) easy.showROCPlot(perfdata) ''' Extract only optimal ROC points ''' rocData_full,optimalIndices = easy.discardSuboptimal(perfdata) #rocData_full,optimalIndices = easy.discardSuboptimal(perfdata,"sboxes/BOW_Trainer_127_0_0_1") ''' Prepare detector data of optimal ROC points ''' rocData_optimal = [] for idx in optimalIndices: trainer = contenders[idx].getTrainer() detectorData = easy.train( trainer, runset, \ trainerProperties=contenders[idx].trainerProps ) rocData_optimal.append([detectorData,\ rocData_full[idx][0],rocData_full[idx][1],\ rocData_full[idx][2]]) ''' ROC zip ''' rocZip = easy.makeROCdata(rocData_optimal) ''' Extract only optimal ROC points ''' detector = easy.getDetector( strDetector ) detectorProps = easy.getDetectorProperties(detector) #specify either a desired falseAlarmRate or recall, but not both: priority = "recall"
def crossValidate( contender, runset, folds=10, printVerbose=False ): '''Returns summary statistics tp, fp, tn, fn, recall, trueNegRate, and a detailed matrix of results with one row for each fold, and one column each for true positive, false positive, true negative, and false negative counts''' # sanity checks: # only positive and negative purposes, # count number of entries for each purpose runset_pos = asList( runset, purpose="pos" ) runset_neg = asList( runset, purpose="neg" ) num_items = ( len(runset_pos), len(runset_neg) ) # check that there are no other purposes all_items = len( asList( runset ) ) if sum(num_items)!=all_items: raise RuntimeError("crossValidate can only handle Positive and Negative purposes") if min(num_items)<2: raise RuntimeError("need more than 1 labeled item per purpose to cross validate") # make sure there are enough items for xval to make sense if folds>min(num_items): print("warning: cannot do "+folds+"-fold validation with only "+str(num_items)+" labeled items") folds = min(num_items) # calculate the size of the training and evaluation sets. # if the number of labeled items in the runset divided # by the number of folds isn't an even # division, use more items for the evaluation chunksize = (int(math.floor( num_items[0]/folds )), int(math.floor( num_items[1]/folds ))) trainsize = (chunksize[0] * (folds-1), chunksize[1] * (folds-1)) evalsize = (num_items[0]-trainsize[0], num_items[1]-trainsize[1]) print( "Will perform a {0}-fold cross-validation with {1} training samples and " "{2} evaluation samples".format( folds, trainsize, evalsize ) ) # randomize the order of the elements in the runset, once and for all folds rndidx = ( range( num_items[0] ), range( num_items[1] ) ) random.shuffle( rndidx[0] ) # shuffles items in place random.shuffle( rndidx[1] ) # shuffles items in place #confusionTables = numpy.empty( [folds, 5], dtype=int ) confusionTables = [] for fold in range( folds ): # split the runset trainset, evalset = splitRunSet( runset_pos, runset_neg, fold, chunksize, evalsize, rndidx ) print( "-------- fold number {0} --------".format(fold) ) # training print( "---- training:" ) easy.printRunSetInfo( trainset, printArtifacts=printVerbose ) trainer = contender.getTrainer() model = easy.train( trainer, trainset, trainerProperties=contender.trainerProps) # detection print( "---- evaluation:" ) easy.printRunSetInfo( evalset, printArtifacts=printVerbose ) detector = contender.getDetector() detections = easy.detect( detector, model, evalset, detectorProperties=contender.detectorProps ) confusionTables.append( \ getConfusionTable( detections, origSet=evalset, foundMap=contender.foundMap )) # calculate statistics of our tuple TestResult,nores sumTestResult = TestResult() sumNoRes = 0; for entry in confusionTables: sumTestResult.tp += entry[0].tp sumTestResult.tn += entry[0].tn sumTestResult.fp += entry[0].fp sumTestResult.fn += entry[0].fn sumNoRes += entry[1] r = EvaluationResult(folds, sumTestResult, sumNoRes, detail=None, name=contender.name) return r
# it is the client's responsibility to upload them if not. # The putResult contains information about which files were actually transferred. # fileserver = easy.getFileServer( "FileService:default -p 10110 " + host ) putResult = easy.putAllFiles( fileserver, rs1 ) modelfile = "detectors/haarcascade_frontalface_alt.xml" if not fileserver.exists( easy.getCvacPath(modelfile) ): easy.putFile( fileserver, easy.getCvacPath(modelfile) ) # # detect remotely: note the host specification # print("------- Remote detection, local result display: -------") detector = easy.getDetector( "OpenCVCascadeDetector:default -p 10102 "+host ) results = easy.detect( detector, modelfile, rs1 ) easy.printResults( results ) # # Example 2: # Train on a remote machine, obtain the model file, and test locally. # Assume the files are on the remote machine, or transfer with putAllFiles. # trainer = easy.getTrainer( "bowTrain:default -p 10103 "+ host) # remote trainset = easy.createRunSet( "trainImg" ); trainedModel = easy.train( trainer, trainset ) easy.getFile( fileserver, trainedModel.file ) # downloads the model from remote print("{0}".format(trainedModel)) detector = easy.getDetector( "bowTest:default -p 10104" ) # local service testset = easy.createRunSet("testImg","UNPURPOSED" ) results = easy.detect( detector, trainedModel, testset )
# pick a subset: all license plates license_plates = categories['license plate'] print("There are {0} license plate labels.".format( len(license_plates) )) # another subset: all labels starting with "car" cars = [] for key in categories.keys(): if key.startswith("car"): cars.append( categories[key] ) print("There are {0} car-related labels.".format( len(cars) )) # Note that Labels are cached in the CorpusServer, but the corpus currently # needs to re-mirror if the CorpusServer is restarted because Labels are # not stored to disk. Images are stored to disk. quit() # done for now # Train a detector on license plates trainer = easy.getTrainer( "BOW_Trainer:default -p 10103 ") trainset = easy.createRunSet( license_plates, "pos" ) easy.printRunSetInfo( trainset ) licenseplateModel = easy.train( trainer, trainset ) # test the license plate detector on the known locations of cars; # this will only try to detect within the boundaries of cars. testset = easy.createRunSet( cars, "unpurposed" ) detector = easy.getDetector( "BOW_Detector:default -p 10104" ) results = easy.detect( detector, licenseplateModel, testset ) printResults( results )
trainset = cvac.RunSet() caset = easy.createRunSet("trainImg/ca") krset = easy.createRunSet("trainImg/kr") usset = easy.createRunSet("trainImg/us") easy.addToRunSet(trainset, caset, cvac.Purpose(cvac.PurposeType.MULTICLASS, 0)) easy.addToRunSet(trainset, krset, cvac.Purpose(cvac.PurposeType.MULTICLASS, 1)) easy.addToRunSet(trainset, usset, cvac.Purpose(cvac.PurposeType.MULTICLASS, 2)) easy.printRunSetInfo(trainset, printLabels=True) # # Connect to the trainer for a Bag of Words algorithm, then # train with the given runset # print("starting training, this might take a few minutes...") trainer = easy.getTrainer("BOW_Trainer") trainedModel = easy.train(trainer, trainset) # # Display information about the file in which the model is stored; # this is generally no concern to algorithm users and only of # interest to algorithm developers since it is algorithm-specific # zipfname = easy.getFSPath(trainedModel) print("{0}".format(zipfname)) zipf = zipfile.ZipFile(zipfname) print("Training model stored in file {0}".format(zipfname)) # print("file contents:\n{0}".format(zipf.namelist())) # # test the trained model on a separate set of images #
Easy! mini tutorial Repeatedly train and evaluate for efficient label use; bootstrap. matz 6/21/2013 ''' import os import easy # # Create a training set from one sample each of 9 corporate logos # trainset1 = easy.createRunSet( "corporate_logos" ) # train, round 1 trainer = easy.getTrainer( "BOW_Trainer") model1 = easy.train( trainer, trainset1 ) # evaluate the model on a separate test set, images and videos # in DataDir/testdata1 testset1 = easy.createRunSet( "testImg", "UNPURPOSED" ) easy.printRunSetInfo( testset1 ) detector = easy.getDetector( "BOW_Detector" ) result1 = easy.detect( detector, model1, testset1 ) easy.printResults(result1) # sort the images from the testdata1 folder into subfolders of # "testresults1" corresponding to the found labels; # if multiple labels were found per original, consider only # the label with the highest confidence easy.sortIntoFolders( result1, outfolder="testresults1", multi="highest")
Easy! mini tutorial Repeatedly train and evaluate for efficient label use; bootstrap. matz 6/21/2013 ''' import os import easy # # Create a training set from one sample each of 9 corporate logos # trainset1 = easy.createRunSet("corporate_logos") # train, round 1 trainer = easy.getTrainer("BOW_Trainer") model1 = easy.train(trainer, trainset1) # evaluate the model on a separate test set, images and videos # in DataDir/testImg testset1 = easy.createRunSet("testImg", "UNPURPOSED") easy.printRunSetInfo(testset1) detector = easy.getDetector("BOW_Detector") result1 = easy.detect(detector, model1, testset1) easy.printResults(result1) # sort the images from the testdata1 folder into subfolders of # "testresults1" corresponding to the found labels; # if multiple labels were found per original, consider only # the label with the highest confidence easy.sortIntoFolders(result1, outfolder="testresults1", multi="highest")
# pick a subset: all license plates license_plates = categories['license plate'] print("There are {0} license plate labels.".format(len(license_plates))) # another subset: all labels starting with "car" cars = [] for key in categories.keys(): if key.startswith("car"): cars.append(categories[key]) print("There are {0} car-related labels.".format(len(cars))) # Note that Labels are cached in the CorpusServer, but the corpus currently # needs to re-mirror if the CorpusServer is restarted because Labels are # not stored to disk. Images are stored to disk. quit() # done for now # Train a detector on license plates trainer = easy.getTrainer("BOW_Trainer:default -p 10103 ") trainset = easy.createRunSet(license_plates, "pos") easy.printRunSetInfo(trainset) licenseplateModel = easy.train(trainer, trainset) # test the license plate detector on the known locations of cars; # this will only try to detect within the boundaries of cars. testset = easy.createRunSet(cars, "unpurposed") detector = easy.getDetector("BOW_Detector:default -p 10104") results = easy.detect(detector, licenseplateModel, testset) printResults(results)
classmap = res['classmap'] # # Make sure all files in the RunSet are available on the remote site; # it is the client's responsibility to upload them if not. # host = "-h localhost" #host = "-h vision.nps.edu" fileserver = easy.getFileServer( "FileService:default -p 10110 " + host ) putResult = easy.putAllFiles( fileserver, runset ) # # Connect to a trainer service, train on the RunSet # trainer = easy.getTrainer( "bowTrain:default -p 10103 " + host ) trainedModel = easy.train( trainer, runset ) print("Training model stored in file: " + easy.getFSPath( trainedModel.file )) # # Connect to a detector service, # test on the training RunSet for validation purposes; # The detect call takes the detector, the trained model, the # runset, and a mapping from purpose to label name # detector = easy.getDetector( "bowTest:default -p 10104 " + host ) results = easy.detect( detector, trainedModel, runset ) easy.printResults( results, foundMap=classmap ) easy.deleteAllFiles( fileserver, putResult['uploaded'] ) quit()
contenders.append(c1) sortedperfdata, perfdata = evaluate.joust(contenders, runset, folds=3) easy.showROCPlot(perfdata) ''' Extract only optimal ROC points ''' rocData_full, optimalIndices = easy.discardSuboptimal(perfdata) #rocData_full,optimalIndices = easy.discardSuboptimal(perfdata,"sboxes/BOW_Trainer_127_0_0_1") ''' Prepare detector data of optimal ROC points ''' rocData_optimal = [] for idx in optimalIndices: trainer = contenders[idx].getTrainer() detectorData = easy.train( trainer, runset, \ trainerProperties=contenders[idx].trainerProps ) rocData_optimal.append([detectorData,\ rocData_full[idx][0],rocData_full[idx][1],\ rocData_full[idx][2]]) ''' ROC zip ''' rocZip = easy.makeROCdata(rocData_optimal) ''' Extract only optimal ROC points ''' detector = easy.getDetector(strDetector) detectorProps = easy.getDetectorProperties(detector) #specify either a desired falseAlarmRate or recall, but not both: priority = "recall" if priority == "falseAlarmRate":