def __init__(self, dirpath, groundTruth=None): self.name = split(dirpath)[-1] self._basepath = dirpath self._config = yaml.loadfile(self.configFilePath()) self._files = {} for type, props in list(self._config['audioFormats'].items()): self._files[type] = self.absolutePathFiles(type, props['filelist']) self.loadGroundTruth(groundTruth)
def relativePathFiles(self, audioFormat=None): if audioFormat is None: audioFormats = self._config["audioFormats"].keys() audioFormat = audioFormats[0] if len(audioFormats) > 1: print "WARNING: only taking audio format: %s out of: %s" % (audioFormat, audioFormats) filelist = self._config["audioFormats"][audioFormat]["filelist"] return yaml.loadfile(join(self._basepath, "metadata", filelist))
def __init__(self, dirpath, groundTruth=None): self.name = split(dirpath)[-1] self._basepath = dirpath self._config = yaml.loadfile(self.configFilePath()) self._files = {} for type, props in self._config["audioFormats"].items(): self._files[type] = self.absolutePathFiles(type, props["filelist"]) self.loadGroundTruth(groundTruth)
def relativePathFiles(self, audioFormat=None): if audioFormat is None: audioFormats = list(self._config['audioFormats'].keys()) audioFormat = audioFormats[0] if len(audioFormats) > 1: print('WARNING: only taking audio format: %s out of: %s' % (audioFormat, audioFormats)) filelist = self._config['audioFormats'][audioFormat]['filelist'] return yaml.loadfile(join(self._basepath, 'metadata', filelist))
def trainSVMfolds(collections, folds): gaia2.verbose = False for cname in collections.keys(): collec = collections[cname] print('Training models for all folds for collection', cname) # load best parameters filename = glob.glob('test/evaldata/essentia_svm_models/%s*.param' % CN(collec))[0] params = yaml.loadfile(filename)['model'] if params.pop('classifier') != 'svm': raise Exception( 'Can only use this script on SVM config parameters.') preproc = params.pop('preprocessing') # load original preprocessed dataset ds = DataSet() ds.load('%s/%s-harm-%s.db' % (WORK_DIR, CN(collec), preproc)) # add 'highlevel.' in front of the descriptor, this is what will appear in the final Essentia sigfile gt = collec.groundTruth.copy() gt.className = 'highlevel.' + CN(collec) # create and train datasets for all folds print('Training for model:', gt.className) for i, fold in enumerate(folds[cname]): print(' - fold', i) model_filename = '%s/%s_%d.model' % (WORK_DIR, CN(collec), i) if os.path.exists(model_filename): print('already computed') continue # remove points from the fold to have a training dataset dsf = ds.copy() toRemove = set(fold) & set(ds.pointNames()) dsf.removePoints(list(toRemove)) # train SVM h = trainSVM(dsf, gt, **params) h.save(model_filename)
def trainSVMfolds(collections, folds): gaia2.verbose = False for cname in collections.keys(): collec = collections[cname] print 'Training models for all folds for collection', cname # load best parameters filename = glob.glob('test/evaldata/essentia_svm_models/%s*.param' % CN(collec))[0] params = yaml.loadfile(filename)['model'] if params.pop('classifier') != 'svm': raise Exception('Can only use this script on SVM config parameters.') preproc = params.pop('preprocessing') # load original preprocessed dataset ds = DataSet() ds.load('%s/%s-harm-%s.db' % (WORK_DIR, CN(collec), preproc)) # add 'highlevel.' in front of the descriptor, this is what will appear in the final Essentia sigfile gt = collec.groundTruth.copy() gt.className = 'highlevel.' + CN(collec) # create and train datasets for all folds print 'Training for model:', gt.className for i, fold in enumerate(folds[cname]): print ' - fold', i model_filename = '%s/%s_%d.model' % (WORK_DIR, CN(collec), i) if os.path.exists(model_filename): print 'already computed' continue # remove points from the fold to have a training dataset dsf = ds.copy() toRemove = set(fold) & set(ds.pointNames()) dsf.removePoints(list(toRemove)) # train SVM h = trainSVM(dsf, gt, **params) h.save(model_filename)
def absolutePathFiles(self, audioFormat, filelist): """Returns a map with the given list of audio files with their path expanded.""" flist = yaml.loadfile(join(self._basepath, 'metadata', filelist)) expanded = dict((id, join(self.audioDirectory(audioFormat), fpath)) for id, fpath in list(flist.items())) return expanded
ds_harm_proc.save(ds_harm_filename) if __name__ == '__main__': c = loadCollections() try: os.mkdir(WORK_DIR) except OSError: pass # need to do some prep work before to harmonize all datasets layouts. This won't be # necessary anymore in the future when all is nicely generated with a single coherent # script, but at the moment we have to work with the data we have... harmonizeDatasets(c) cachedFolds = False foldsFile = '%s/folds.yaml' % WORK_DIR if os.path.exists(foldsFile): folds = yaml.loadfile(foldsFile) else: print('Generating folds for all collections...') folds = generateFolds(c, NFOLDS) yaml.dump(folds, open(foldsFile, 'w')) print('Training SVM models for their corresponding folds...') trainSVMfolds(c, folds) print('Generating the evaluation datasets from the models...') generateEvaluationDatasets(c, folds)
def absolutePathFiles(self, audioFormat, filelist): """Returns a map with the given list of audio files with their path expanded.""" flist = yaml.loadfile(join(self._basepath, "metadata", filelist)) expanded = dict((id, join(self.audioDirectory(audioFormat), fpath)) for id, fpath in flist.items()) return expanded
if __name__ == '__main__': c = loadCollections() try: os.mkdir(WORK_DIR) except OSError: pass # need to do some prep work before to harmonize all datasets layouts. This won't be # necessary anymore in the future when all is nicely generated with a single coherent # script, but at the moment we have to work with the data we have... harmonizeDatasets(c) cachedFolds = False foldsFile = '%s/folds.yaml' % WORK_DIR if os.path.exists(foldsFile): folds = yaml.loadfile(foldsFile) else: print 'Generating folds for all collections...' folds = generateFolds(c, NFOLDS) yaml.dump(folds, open(foldsFile, 'w')) print 'Training SVM models for their corresponding folds...' trainSVMfolds(c, folds) print 'Generating the evaluation datasets from the models...' generateEvaluationDatasets(c, folds)