def testMerge(self): #setDebugLevel(GAlgorithms) ds = testdata.loadTestDB() ds1 = transform(ds, 'select', { 'descriptorNames': '*.mean' }) ds2 = transform(ds, 'select', { 'descriptorNames': '*.var' }) ds12 = transform(ds, 'select', { 'descriptorNames': [ '*.mean', '*.var'] }) ds_merged = mergeDataSets(ds1, ds2) # we need to do this because to add a Point we need it with the # original layout, not the FixedLength one testdata.resetSettings() ds_orig = testdata.loadTestDB() sp = ds_orig.samplePoint() # test if we can add points normally ds_merged.removePoint(sp.name()) ds_merged.addPoint(sp) # compare datasets contents self.compareDataSets(ds12, ds_merged) # test the mapDataSet function of the Merge applier ds_remapped = ds_merged.history().mapDataSet(ds_orig) self.compareDataSets(ds12, ds_remapped) self.compareDataSets(ds_merged, ds_remapped)
def select(self, dbfile, pca_covered_variance=75, highlevel=True): ''' dbfile: the path to the gaia dataset pca_covered_variance: the pca transofrmation should keep at least this variance highlevel:include high level descriptors ''' if not os.path.exists("transformed_dbs"): os.mkdir("transformed_dbs") prefix = dbfile[dbfile.rfind("/") + 1:dbfile.rfind(".")] print dbfile ds = gaia2.DataSet() ds.load(dbfile) cleaner = gaia2.AnalyzerFactory.create('cleaner') cleanDB = cleaner.analyze(ds).applyToDataSet(ds) if highlevel: to_remove = ['*.dmean2', '*.dvar2', '*.min', '*.max', '*cov'] else: to_remove = [ '.highlevel.*', '*.dmean2', '*.dvar2', '*.min', '*.max', '*cov' ] fselectDB = gaia2.transform(cleanDB, 'remove', {'descriptorNames': to_remove}) # NORMALIZE, PCA & Friends normalize = gaia2.AnalyzerFactory.create('normalize') normalizedDB = normalize.analyze(fselectDB).applyToDataSet(fselectDB) pcavar = gaia2.AnalyzerFactory.create( 'pca', { 'coveredVariance': pca_covered_variance, 'resultName': 'pca%dtransform' % pca_covered_variance }) pcaDB = pcavar.analyze(normalizedDB).applyToDataSet(normalizedDB) mfccDB = gaia2.transform( cleanDB, 'select', { 'descriptorNames': [ '*mfcc*', '.highlevel.*', '.rhythm.bpm', '.rhythm.onset_rate' ] }) finalDB = gaia2.mergeDataSets(mfccDB, pcaDB) outfile = "transformed_dbs/" + prefix + ".db" finalDB.save(outfile)
def addRCA(ds, groundTruth, dim, selectConfig = {}): #ds_rca = transform(ds, 'fixlength') # should be unnecessary ds_rca = ds if selectConfig: ds_rca = transform(ds_rca, 'select', selectConfig) ds_rca = transform(ds_rca, 'gaussianize') # if dimension is too high, we need to preprocess before with a PCA, otherwise RCA doesn't work l = ds_rca.layout() descdim = l.descriptorLocation(l.descriptorNames()).dimension(RealType) if descdim > 80: ds_rca = transform(ds_rca, 'pca', { 'resultName': 'pca%d' % 80, 'dimension': 80 }) ds_rca = transform(ds_rca, 'rca', { 'resultName': 'rca%d' % dim, 'dimension': dim, 'classMap': pmap(groundTruth) }) return mergeDataSets(ds, ds_rca)