def generateProject(): parser = OptionParser( usage= '%prog [options] groundtruth_file filelist_file project_file datasets_dir results_dir' ) options, args = parser.parse_args() try: groundtruth_file = args[0] filelist_file = args[1] project_file = args[2] datasets_dir = args[3] results_dir = args[4] except: parser.print_help() sys.exit(1) gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print groundtruth_file, "groundtruth file has incorrect format" sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = groundTruth.keys() fl_trackids = fl.keys() # check that there are no dublicate ids if len(gt_trackids) != len(set(gt_trackids)): print groundtruth_file, "contains dublicate track ids" sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print filelist_file, "contains dublicate track ids" sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print "track ids found in", groundtruth_file, "are inconsistent with", filelist_file sys.exit(4) # write the project file with open(project_file, 'w') as pfile: pfile.write( PROJECT_TEMPLATE % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file) }) print 'Successfully written', project_file
def generateProject(): parser = OptionParser(usage = '%prog [options] groundtruth_file filelist_file project_file datasets_dir results_dir') options, args = parser.parse_args() try: groundtruth_file = args[0] filelist_file = args[1] project_file = args[2] datasets_dir = args[3] results_dir = args[4] except: parser.print_help() sys.exit(1) gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print groundtruth_file, "groundtruth file has incorrect format" sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = groundTruth.keys() fl_trackids = fl.keys() # check that there are no dublicate ids if len(gt_trackids) != len(set(gt_trackids)): print groundtruth_file, "contains dublicate track ids" sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print filelist_file, "contains dublicate track ids" sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print "track ids found in", groundtruth_file, "are inconsistent with", filelist_file sys.exit(4) # write the project file with open(project_file, 'w') as pfile: pfile.write(PROJECT_TEMPLATE % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file) }) print 'Successfully written', project_file
def testValues(self): collection = yaml.load(open(testdata.TEST_DATABASE_FILES, 'r').read()) # prepend 'data/' to the filenames for pid, filename in collection.items(): collection[pid] = 'data/' + filename cvar.verbose = False ds = DataSet.mergeFiles(collection) cvar.verbose = True self.assertAlmostEqual(ds.point('Panic-The Smiths.mp3').value('danceability'), 0.5691167712) self.assertAlmostEqual(ds.point('11 Go.mp3').value('energy.mean'), 0.0231081359) self.assertAlmostEqual(ds.point('03 The Chopper [Shy FX Remix].mp3').value('chords_number_rate'), 0.0551007539) self.assertEqual(ds.point('08 I Can\'t Dance - Genesis.mp3').label('key_key'), 'D#') self.assertEqual(ds.point('06 Booo!.mp3').label('chords_mode'), 'major') ds.save(testdata.TEST_DATABASE)
def testValues(self): collection = yaml.load(open(testdata.TEST_DATABASE_FILES, 'r').read()) # prepend 'data/' to the filenames for pid, filename in list(collection.items()): collection[pid] = 'data/' + filename cvar.verbose = False ds = DataSet.mergeFiles(collection) cvar.verbose = True self.assertAlmostEqual( ds.point('Panic-The Smiths.mp3').value('danceability'), 0.5691167712) self.assertAlmostEqual( ds.point('11 Go.mp3').value('energy.mean'), 0.0231081359) self.assertAlmostEqual( ds.point('03 The Chopper [Shy FX Remix].mp3').value( 'chords_number_rate'), 0.0551007539) self.assertEqual( ds.point('08 I Can\'t Dance - Genesis.mp3').label('key_key'), 'D#') self.assertEqual( ds.point('06 Booo!.mp3').label('chords_mode'), 'major') ds.save(testdata.TEST_DATABASE)
def convertJsonToSig(filelist_file, result_filelist_file): fl = yaml.load(open(filelist_file, 'r')) result_fl = fl errors = [] for trackid, json_file in fl.iteritems(): try: data = json.load(open(json_file)) # remove descriptors, that will otherwise break gaia_fusion due to incompatibility of layouts if 'tags' in data['metadata']: del data['metadata']['tags'] if 'sample_rate' in data['metadata']['audio_properties']: del data['metadata']['audio_properties']['sample_rate'] sig_file = os.path.splitext(json_file)[0] + '.sig' yaml.dump(data, open(sig_file, 'w')) result_fl[trackid] = sig_file except: errors += [json_file] yaml.dump(result_fl, open(result_filelist_file, 'w')) print "Failed to convert", len(errors), "files:" for e in errors: print e return len(errors) == 0
def convertJsonToSig(filelist_file, result_filelist_file): fl = yaml.load(open(filelist_file, 'r')) result_fl = fl errors = [] for trackid, json_file in fl.iteritems(): try: data = json.load(open(json_file)) # remove descriptors, that will otherwise break gaia_fusion due to incompatibility of layouts if 'tags' in data['metadata']: del data['metadata']['tags'] if 'sample_rate' in data['metadata']['audio_properties']: del data['metadata']['audio_properties']['sample_rate'] if 'lossless' in data['metadata']['audio_properties']: del data['metadata']['audio_properties']['lossless'] sig_file = os.path.splitext(json_file)[0] + '.sig' yaml.safe_dump(data, open(sig_file, 'w')) result_fl[trackid] = sig_file except: errors += [json_file] yaml.dump(result_fl, open(result_filelist_file, 'w')) print("Failed to convert", len(errors), "files:") for e in errors: print(e) return len(errors) == 0
def load(self, filename): with open(filename) as f: data = yaml.load(f.read()) # convert to a defaultdict the data we just loaded self.matrix = defaultdict(lambda: defaultdict(list)) for k, v in data.items(): self.matrix[k] = defaultdict(list, v)
def __call__(self, *args, **kwargs): if kwargs: raise NotImplementedError('Cannot use keyword arguments with YamlRPC at the moment...') if VERBOSE: serializeStart = time.time() try: q = yaml.dump({ 'method': self.methodName, 'params': list(args), 'id': 'gloubi-boulga' }) except: raise RuntimeError('Could not serialize Yaml request, most likely one of the arguments could not be serialized:\n%s' % list(args)) if VERBOSE: responseTime = time.time() - serializeStart print ('serialized request in %f seconds' % responseTime) # we don't want the '+'-quoting params = urlencode({ 'q': q }).replace('+', ' ') headers = { 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain' } if VERBOSE: startTime = time.time() conn = http_client.HTTPConnection(self.endPoint) try: conn.request('POST', '/', params, headers) except Exception as e: raise RuntimeError('request failed', self.endPoint, self.methodName, args, e) response = conn.getresponse() if VERBOSE: responseTime = time.time() - startTime print ('received answer in %f seconds' % responseTime) #print response.status, response.reason startParseTime = time.time() result = yaml.load(response.read()) if VERBOSE: responseTime = time.time() - startParseTime print ('parsed answer in %f seconds' % responseTime) responseTime = time.time() - serializeStart print ('total time: %f seconds' % responseTime) if 'error' in result: raise RuntimeError(result['error']['message']) return result['result']
def generateProject(groundtruth_file, filelist_file, project_file, datasets_dir, results_dir): gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print(groundtruth_file, "groundtruth file has incorrect format") sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = list(groundTruth.keys()) fl_trackids = list(fl.keys()) # check that there are no dublicate ids if len(gt_trackids) != len(set(gt_trackids)): print(groundtruth_file, "contains dublicate track ids") sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print(filelist_file, "contains dublicate track ids") sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print("track ids found in", groundtruth_file, "are inconsistent with", filelist_file) sys.exit(4) # write the project file with open(project_file, 'w') as pfile: pfile.write( PROJECT_TEMPLATE % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file) }) print('Successfully written', project_file)
def __call__(self, *args, **kwargs): # pre-check for errors that might happen very often and where 1 good error message would # be real nice to have if (self.methodName.startswith('nnSearch') and self.methodName not in ('nnSearchById', 'nnSearchByIdWithFilter', 'nnSearchByExample', 'nnSearchByExampleWithFilter')): raise AttributeError( 'You need to use either nnSearchById{WithFilter} or nnSearchByExample{WithFilter}' ) # pre-processing for certain specific methods if self.methodName.startswith('nnSearchByExample'): args = (args[0].toBase64(), ) + args[1:] # in the case of an nnSearch request, we shouldn't do the query immediately but rather # return a proxy object that allows to chain queries using the search_space argument. # the actual query should only be resolved when the user calls the get() method on this # proxy object if self.methodName.startswith('nnSearch'): return ResultSet(self.endPoint, self.methodName, args, kwargs) # actual processing by the server result = YamlRPCMethod.__call__(self, *args, **kwargs) # post-processing for certain specific methods if self.methodName == 'layout': result = yaml.load(result) elif self.methodName == 'getPoint': try: import gaia2 except ImportError: raise ImportError( 'You need to have the gaia2 python module installed in order to be able to retrieve single points' ) p = gaia2.Point() p.fromBase64(result) result = p elif self.methodName == 'getPoints': try: import gaia2 except ImportError: raise ImportError( 'You need to have the gaia2 python module installed in order to be able to retrieve points' ) ds = gaia2.DataSet() ds.fromBase64(result) result = ds return result
def generateProject(groundtruth_file, filelist_file, project_file, datasets_dir, results_dir): gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print groundtruth_file, "groundtruth file has incorrect format" sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = groundTruth.keys() fl_trackids = fl.keys() # check that there are no dublicate ids if len(gt_trackids) != len(set(gt_trackids)): print groundtruth_file, "contains dublicate track ids" sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print filelist_file, "contains dublicate track ids" sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print "track ids found in", groundtruth_file, "are inconsistent with", filelist_file sys.exit(4) # write the project file with open(project_file, 'w') as pfile: pfile.write(PROJECT_TEMPLATE % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file) }) print 'Successfully written', project_file
def load(self, yamlfile): with open(yamlfile) as f: data = yaml.load(f.read()) try: ver = data['version'] tp = data['type'] except: raise Exception("Groundtruth file should contain at least the 'version' and the 'type' fields") if ver != 1.0 and tp != 'singleClass': raise Exception("Groundtruth can only load 'singleClass v1.0' files") self.className = data['className'] self.clear() self.update(data['groundTruth'])
def convertJsonToSig(): parser = OptionParser(usage = '%prog [options] filelist_file result_filelist_file\n' + """ Converts json files found in filelist_file into *.sig yaml files compatible with Gaia. The result files are written to the same directory where original files were located. """ ) options, args = parser.parse_args() try: filelist_file = args[0] result_filelist_file = args[1] except: parser.print_help() sys.exit(1) fl = yaml.load(open(filelist_file, 'r')) result_fl = fl errors = [] for trackid, json_file in fl.iteritems(): try: data = json.load(open(json_file)) # remove descriptors, that will otherwise break gaia_fusion due to incompatibility of layouts if 'tags' in data['metadata']: del data['metadata']['tags'] if 'sample_rate' in data['metadata']['audio_properties']: del data['metadata']['audio_properties']['sample_rate'] sig_file = os.path.splitext(json_file)[0] + '.sig' yaml.dump(data, open(sig_file, 'w')) result_fl[trackid] = sig_file except: errors += [json_file] yaml.dump(result_fl, open(result_filelist_file, 'w')) print "Failed to convert", len(errors), "files:" for e in errors: print e return len(errors)
def convertJsonToSig(): parser = OptionParser( usage='%prog [options] filelist_file result_filelist_file\n' + """ Converts json files found in filelist_file into *.sig yaml files compatible with Gaia. The result files are written to the same directory where original files were located. """) options, args = parser.parse_args() try: filelist_file = args[0] result_filelist_file = args[1] except: parser.print_help() sys.exit(1) fl = yaml.load(open(filelist_file, 'r')) result_fl = fl errors = [] for trackid, json_file in fl.iteritems(): try: data = json.load(open(json_file)) # remove descriptors, that will otherwise break gaia_fusion due to incompatibility of layouts if 'tags' in data['metadata']: del data['metadata']['tags'] if 'sample_rate' in data['metadata']['audio_properties']: del data['metadata']['audio_properties']['sample_rate'] sig_file = os.path.splitext(json_file)[0] + '.sig' yaml.dump(data, open(sig_file, 'w')) result_fl[trackid] = sig_file except: errors += [json_file] yaml.dump(result_fl, open(result_filelist_file, 'w')) print "Failed to convert", len(errors), "files:" for e in errors: print e return len(errors)
def readResults(self, dir): """Reads all the results file contained in the given directory, and generates the associated ConfusionMatrix for each one.""" resultFiles = glob.glob(join(dir, '*.result')) progress = TextProgress(len(resultFiles)) for i, filename in enumerate(resultFiles): cm = ConfusionMatrix() cm.load(filename) paramFile = splitext(filename)[0] + '.param' params = yaml.load(open(paramFile).read()) self.results += [ (filename, cm, params) ] progress.update(i+1)
def evaluateModels(extractor, resultsDir): if not isExecutable(extractor): print '%s does not seem to be an executable extractor... Exiting...' % extractor sys.exit(1) makedir(resultsDir) collections_file = join(gaia2.rootdir(), 'mtgdb', 'mtgdb_collections.yaml') collections = [ c['name'] for c in yaml.load(open(collections_file)) ] # do all the evaluations for collection in collections: evaluateCollection(collection, extractor, resultsDir) # print a report of the evaluations for collection in collections: checkErrors(collection, resultsDir)
def evaluateModels(extractor, resultsDir): if not isExecutable(extractor): print '%s does not seem to be an executable extractor... Exiting...' % extractor sys.exit(1) makedir(resultsDir) collections_file = join(gaia2.rootdir(), 'mtgdb', 'mtgdb_collections.yaml') collections = [c['name'] for c in yaml.load(open(collections_file))] # do all the evaluations for collection in collections: evaluateCollection(collection, extractor, resultsDir) # print a report of the evaluations for collection in collections: checkErrors(collection, resultsDir)
def __call__(self, *args, **kwargs): # pre-check for errors that might happen very often and where 1 good error message would # be real nice to have if (self.methodName.startswith('nnSearch') and self.methodName not in ('nnSearchById', 'nnSearchByIdWithFilter', 'nnSearchByExample', 'nnSearchByExampleWithFilter')): raise AttributeError('You need to use either nnSearchById{WithFilter} or nnSearchByExample{WithFilter}') # pre-processing for certain specific methods if self.methodName.startswith('nnSearchByExample'): args = (args[0].toBase64(),) + args[1:] # in the case of an nnSearch request, we shouldn't do the query immediately but rather # return a proxy object that allows to chain queries using the search_space argument. # the actual query should only be resolved when the user calls the get() method on this # proxy object if self.methodName.startswith('nnSearch'): return ResultSet(self.endPoint, self.methodName, args, kwargs) # actual processing by the server result = YamlRPCMethod.__call__(self, *args, **kwargs) # post-processing for certain specific methods if self.methodName == 'layout': result = yaml.load(result) elif self.methodName == 'getPoint': try: import gaia2 except ImportError: raise ImportError('You need to have the gaia2 python module installed in order to be able to retrieve single points') p = gaia2.Point() p.fromBase64(result) result = p elif self.methodName == 'getPoints': try: import gaia2 except ImportError: raise ImportError('You need to have the gaia2 python module installed in order to be able to retrieve points') ds = gaia2.DataSet() ds.fromBase64(result) result = ds return result
def get_essentia_versions(filelist): versions = set() for v in filelist.values(): try: version = yaml.load(open(v)).get('metadata', {}).get('version', {}).get('essentia', {}) if version: parsed_version = version.split('-') essentia_version = parsed_version[0] if parsed_version[1].startswith('beta'): essentia_version += '-{}'.format(parsed_version[1]) versions.add(essentia_version) else: versions.add('no_essentia_version_field') except IOError: print('Error retrieving the Essentia version of {}'.format(v)) return versions
def mergeAll(pointList, outputFilename, chunkSize, transfoFile, select=None, exclude=None): # TODO: validation of the yaml file format? (ie: pre-2.3 yaml files should be rejected) totalPoints = len(fastyaml.load(open(pointList).read())) begin, end = 0, chunkSize partfiles = [] partfileTemplate = outputFilename + '_%d_%d.partdb' # keep this information for future reference as it won't be accessible anymore # once the dataset is merged excluded = [] if exclude: try: p = gaia2.Point() p.load(list(gaia2.fastyaml.loadfile(pointList).items())[0][1]) excluded = p.layout().descriptorNames(exclude) except: raise # merge each chunk separately # this includes removevl and fixlength, which should yield smaller files than just after # merging, so it should then be possible to load all of them together to merge them while begin < totalPoints: end = min(end, totalPoints) partfile = partfileTemplate % (begin, end) partfiles += [partfile] mergeChunk(pointList, partfile, transfoFile, begin, end, select, exclude) begin, end = end, end + chunkSize horizontalLine() # make sure all histories are the same, if not do whatever it takes to reach that point # also "simplify" the histories so that they are the minimum history representation required # to get to the layout of the final dataset print( 'Harmonizing chunks so that they all have the same layout & history...' ) vldescs, nandescs, rdescs = harmonizeChunks(partfiles) rdescs = rdescs | set(excluded) horizontalLine() # merge all those partfiles together print('Assembling full dataset together...') dstotal = DataSet() for pfile in partfiles: print('Merging partfile', pfile) ds = DataSet() ds.load(pfile) dstotal.appendDataSet(ds) dstotal.save(outputFilename) # print a nice informative summary of what has been done to the dataset horizontalLine() msg = ''' Final dataset information ------------------------- Number of points: %s Descriptors removed: - because they were of variable length: %s - because they were either constant, contained NaN or contained Inf: %s - because they were removed explicitly: %s Your dataset has been saved at %s''' # remove leading dot vldescs = sorted(d[1:] for d in vldescs) nandescs = sorted(d[1:] for d in nandescs) rdescs = sorted(d[1:] for d in rdescs) print(msg % (str(dstotal.size()), ', '.join(vldescs), ', '.join(nandescs), ', '.join(rdescs), outputFilename)) # clean up temporary files for pfile in partfiles: os.remove(pfile) os.remove(pfile + '.raw')
def testLibyaml1024CharLimit(self): l = PointLayout() l.add('a'*2000, RealType) from gaia2 import fastyaml as yaml yaml.load(l.toYaml())
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ import gaia2 import gaia2.fastyaml as yaml import environment import collection from os.path import join COLLECTIONS_FILE = join(gaia2.filedir(), 'mtgdb_collections.yaml') ALL_MTGDB_COLLECTIONS = dict((c['name'], c) for c in yaml.load(open(COLLECTIONS_FILE).read())) class MtgdbCollection(collection.Collection): """An mtgdb.MtgdbCollection instance is a collection on the MTG-DB server that has been classified as stable and may be accessed directly by its name.""" def __init__(self, name, groundTruth = None): try: collection = ALL_MTGDB_COLLECTIONS[name] except KeyError: raise ValueError('Collection "%s" is not known by Gaia. Available collections are: %s' % (name, ALL_MTGDB_COLLECTIONS.keys())) super(MtgdbCollection, self).__init__(join(environment.MTGDB_AUDIO,collection['location']), groundTruth) self.name = name self._properties = collection
def mergeAll(pointList, outputFilename, chunkSize, transfoFile, select = None, exclude = None): # TODO: validation of the yaml file format? (ie: pre-2.3 yaml files should be rejected) totalPoints = len(fastyaml.load(open(pointList).read())) begin, end = 0, chunkSize partfiles = [] partfileTemplate = outputFilename + '_%d_%d.partdb' # keep this information for future reference as it won't be accessible anymore # once the dataset is merged excluded = [] if exclude: try: p = gaia2.Point() p.load(gaia2.fastyaml.loadfile(pointList).items()[0][1]) excluded = p.layout().descriptorNames(exclude) except: raise # merge each chunk separately # this includes removevl and fixlength, which should yield smaller files than just after # merging, so it should then be possible to load all of them together to merge them while begin < totalPoints: end = min(end, totalPoints) partfile = partfileTemplate % (begin, end) partfiles += [ partfile ] mergeChunk(pointList, partfile, transfoFile, begin, end, select, exclude) begin, end = end, end + chunkSize horizontalLine() # make sure all histories are the same, if not do whatever it takes to reach that point # also "simplify" the histories so that they are the minimum history representation required # to get to the layout of the final dataset print 'Harmonizing chunks so that they all have the same layout & history...' vldescs, nandescs, rdescs = harmonizeChunks(partfiles) rdescs = rdescs | set(excluded) horizontalLine() # merge all those partfiles together print 'Assembling full dataset together...' dstotal = DataSet() for pfile in partfiles: print 'Merging partfile', pfile ds = DataSet() ds.load(pfile) dstotal.appendDataSet(ds) dstotal.save(outputFilename) # print a nice informative summary of what has been done to the dataset horizontalLine() msg = ''' Final dataset information ------------------------- Number of points: %s Descriptors removed: - because they were of variable length: %s - because they were either constant, contained NaN or contained Inf: %s - because they were removed explicitly: %s Your dataset has been saved at %s''' # remove leading dot vldescs = sorted( d[1:] for d in vldescs ) nandescs = sorted( d[1:] for d in nandescs ) rdescs = sorted( d[1:] for d in rdescs ) print msg % (str(dstotal.size()), ', '.join(vldescs), ', '.join(nandescs), ', '.join(rdescs), outputFilename) # clean up temporary files for pfile in partfiles: os.remove(pfile) os.remove(pfile + '.raw')
def generate_project(groundtruth_file, filelist_file, project_file, datasets_dir, results_dir, seed=None, cluster_mode=False, template=None, force_consistency=False): gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print(groundtruth_file, "groundtruth file has incorrect format") sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = list(groundTruth.keys()) fl_trackids = list(fl.keys()) # check that there are no duplicated ids if len(gt_trackids) != len(set(gt_trackids)): print(groundtruth_file, "contains duplicated track ids") sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print(filelist_file, "contains duplicated track ids") sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print("track ids found in", groundtruth_file, "are inconsistent with", filelist_file) sys.exit(4) if force_consistency: print( 'Checking Essentia version in the descriptor files to ensure consistency...' ) versions = get_essentia_versions(fl) if len(versions) > 1: raise Exception( "Couldn't find a unique Essentia version in the dataset. " "This exception is thrown because you are using the flag `force-consistency`" ) print('ok!') if not template: print('No classification project template specified.') essentia_version = DEFAULT_VERSION if not force_consistency: print( 'Analyzing the dataset to figure out which project template file to use...' ) versions = get_essentia_versions(fl) if len(versions) == 1: essentia_version = list(versions)[0] else: print("Couldn't find a unique essentia version in the dataset.") template_version = VERSION_MAP.get(essentia_version, DEFAULT_VERSION) print('Using classification project template "{}"'.format( template_version)) template = 'classification_project_template_{}.yaml'.format( template_version) project_template = open(join(filedir(), template)).read() # if not seed specified, get the current clock value if seed is None: import time seed = time.time() # write the project file with open(project_file, 'w') as pfile: pfile.write( project_template % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file), 'seed': seed, 'clusterMode': cluster_mode }) print('Successfully written', project_file)
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ from __future__ import absolute_import import gaia2 import gaia2.fastyaml as yaml from . import environment from . import collection from os.path import join COLLECTIONS_FILE = join(gaia2.filedir(), 'mtgdb_collections.yaml') ALL_MTGDB_COLLECTIONS = dict( (c['name'], c) for c in yaml.load(open(COLLECTIONS_FILE).read())) class MtgdbCollection(collection.Collection): """An mtgdb.MtgdbCollection instance is a collection on the MTG-DB server that has been classified as stable and may be accessed directly by its name.""" def __init__(self, name, groundTruth=None): try: collection = ALL_MTGDB_COLLECTIONS[name] except KeyError: raise ValueError( 'Collection "%s" is not known by Gaia. Available collections are: %s' % (name, list(ALL_MTGDB_COLLECTIONS.keys()))) super(MtgdbCollection, self).__init__( join(environment.MTGDB_AUDIO, collection['location']), groundTruth)