def run(self, className, outfilename, param, dsname, gtname, evalconfig): try: classifier = param['classifier'] gt = GroundTruth(classifier) gt.load(gtname) # force the GroundTruth class name to be the one specified by our project file, not # the one in the original groundtruth file gt.className = className ds = DataSet() ds.load(dsname) # some points may have failed to be analyzed, remove those from the GroundTruth pnames = ds.pointNames() for pid in list(gt.keys()): if pid not in pnames: log.warning( 'Removing %s from GroundTruth as it could not be found in the merged dataset' % pid) del gt[pid] trainerFun, trainingparam, newds = getTrainer( classifier, param, ds) # run all the evaluations specified in the evaluation config for i, evalparam in enumerate(evalconfig): # if we already ran this evaluation, no need to run it again... resultFilename = outfilename + '_%d.result' % i if exists(resultFilename): log.info('File %s already exists. Skipping evaluation...' % resultFilename) continue log.info( 'Running evaluation %d for: %s with classifier %s and dataset %s' % (i, outfilename, param['classifier'], param['preprocessing'])) log.info(' PID: %d, parameters: %s' % (os.getpid(), json.dumps(param))) # run evaluation confusion = evaluateNfold(evalparam['nfold'], ds, gt, trainerFun, **trainingparam) # write evaluation params & result with open(outfilename + '_%d.param' % i, 'w') as f: yaml.dump({'model': param, 'evaluation': evalparam}, f) confusion.save(resultFilename) except Exception: log.error( 'While doing evaluation with param = %s\nevaluation = %s' % (param, evalconfig)) raise
def run(self, className, outfilename, param, dsname, gtname, evalconfig): try: classifier = param['classifier'] gt = GroundTruth(classifier) gt.load(gtname) # force the GroundTruth class name to be the one specified by our project file, not # the one in the original groundtruth file gt.className = className ds = DataSet() ds.load(dsname) # some points may have failed to be analyzed, remove those from the GroundTruth pnames = ds.pointNames() for pid in list(gt.keys()): if pid not in pnames: log.warning('Removing %s from GroundTruth as it could not be found in the merged dataset' % pid) del gt[pid] trainerFun, trainingparam, newds = getTrainer(classifier, param, ds) # run all the evaluations specified in the evaluation config for i, evalparam in enumerate(evalconfig): # if we already ran this evaluation, no need to run it again... resultFilename = outfilename + '_%d.result' % i if exists(resultFilename): log.info('File %s already exists. Skipping evaluation...' % resultFilename) continue log.info('Running evaluation %d for: %s with classifier %s and dataset %s' % (i, outfilename, param['classifier'], param['preprocessing'])) log.info(' PID: %d, parameters: %s' % (os.getpid(), json.dumps(param))) # run evaluation confusion = evaluateNfold(evalparam['nfold'], ds, gt, trainerFun, **trainingparam) # write evaluation params & result with open(outfilename + '_%d.param' % i, 'w') as f: yaml.dump({ 'model': param, 'evaluation': evalparam }, f) confusion.save(resultFilename) except Exception: log.error('While doing evaluation with param = %s\nevaluation = %s' % (param, evalconfig)) raise
def validate(basedir): # let's start with some basic check print('Checking basic directory layout...') if not exists(basedir): raise Exception('The specified base directory does not exist') # check required metadata files are there and that they're valid configFile = join(basedir, 'metadata', 'config.yaml') if not exists(configFile): raise Exception( 'config.yaml could not be found in the metadata/ folder') config = yaml.load(open(configFile).read()) if 'version' not in config: raise Exception('config.yaml doesn\'t have a version number') # check that the specified audioFormats correspond to the audio/ subfolders print('Checking available audio formats...') audioFormats = config['audioFormats'] if not audioFormats: raise Exception('audioFormats not specified in config.yaml') audioFolders = [ basename(f) for f in glob.glob(join(basedir, 'audio', '*')) ] if len(audioFolders) != len(audioFormats): raise Exception( 'Some audio folders are not described in the audioFormats section of the config.yaml' ) print('Found formats:', str(audioFolders)) # check the audio formats are valid, in particular that they have a valid filelist for format, desc in audioFormats.items(): print("\nChecking format '%s':" % format) # TODO: at some point in the future we should also check for valid values in desc if not exists(join(basedir, 'audio', format)): raise Exception( '%s is listed as an audio format, but doesn\'t appear in the audio/ folder' % format) if 'filelist' not in desc: raise Exception('Audio format "%s" does not define a filelist' % format) filelist = yaml.load( open(join(basedir, 'metadata', desc['filelist'])).read()) print(' filelist OK, lists %d files' % len(filelist)) for pid, filename in filelist.items(): fullpath = join(basedir, 'audio', format, filename) if not exists(fullpath): raise Exception( 'For format "%s": file "%s" appears in filelist, however there is no corresponding "%s"' % (format, filename, fullpath)) # check that the groundtruth files do actually exist if they are specified print('\nChecking groundtruth files...') groundTruth = config.get('groundTruth', {}) print('Found groundtruth files:', str(list(groundTruth.keys()))) for name, gtfile in groundTruth.items(): print("\nChecking groundtruth '%s':" % name) gt = GroundTruth('') gt.load(join(basedir, 'metadata', gtfile)) # check that the IDs used in the groundtruth files exist in all the filelists for afname, af in audioFormats.items(): flist = yaml.load( open(join(basedir, 'metadata', af['filelist'])).read()).keys() for gid in gt: if gid not in flist: raise Exception( "ID '%s' is in GroundTruth '%s', but could not be found in filelist for audio format '%s'" % (gid, gtfile, afname)) print(' gt filelist OK, found classes:', str(sorted(set(gt.values()))))
def validate(basedir): # let's start with some basic check print 'Checking basic directory layout...' if not exists(basedir): raise Exception('The specified base directory does not exist') # check required metadata files are there and that they're valid configFile = join(basedir, 'metadata', 'config.yaml') if not exists(configFile): raise Exception('config.yaml could not be found in the metadata/ folder') config = yaml.load(open(configFile).read()) if 'version' not in config: raise Exception('config.yaml doesn\'t have a version number') # check that the specified audioFormats correspond to the audio/ subfolders print 'Checking available audio formats...' audioFormats = config['audioFormats'] if not audioFormats: raise Exception('audioFormats not specified in config.yaml') audioFolders = [ basename(f) for f in glob.glob(join(basedir, 'audio', '*')) ] if len(audioFolders) != len(audioFormats): raise Exception('Some audio folders are not described in the audioFormats section of the config.yaml') print 'Found formats:', str(audioFolders) # check the audio formats are valid, in particular that they have a valid filelist for format, desc in audioFormats.items(): print "\nChecking format '%s':" % format # TODO: at some point in the future we should also check for valid values in desc if not exists(join(basedir, 'audio', format)): raise Exception('%s is listed as an audio format, but doesn\'t appear in the audio/ folder' % format) if 'filelist' not in desc: raise Exception('Audio format "%s" does not define a filelist' % format) filelist = yaml.load(open(join(basedir, 'metadata', desc['filelist'])).read()) print ' filelist OK, lists %d files' % len(filelist) for pid, filename in filelist.items(): fullpath = join(basedir, 'audio', format, filename) if not exists(fullpath): raise Exception('For format "%s": file "%s" appears in filelist, however there is no corresponding "%s"' % (format, filename, fullpath)) # check that the groundtruth files do actually exist if they are specified print '\nChecking groundtruth files...' groundTruth = config.get('groundTruth', {}) print 'Found groundtruth files:', str(groundTruth.keys()) for name, gtfile in groundTruth.items(): print "\nChecking groundtruth '%s':" % name gt = GroundTruth('') gt.load(join(basedir, 'metadata', gtfile)) # check that the IDs used in the groundtruth files exist in all the filelists for afname, af in audioFormats.items(): flist = yaml.load(open(join(basedir, 'metadata', af['filelist'])).read()).keys() for gid in gt: if gid not in flist: raise Exception("ID '%s' is in GroundTruth '%s', but could not be found in filelist for audio format '%s'" % (gid, gtfile, afname)) print ' gt filelist OK, found classes:', str(sorted(set(gt.values())))