Пример #1
0
    def testMerge(self):
        #setDebugLevel(GAlgorithms)
        ds = testdata.loadTestDB()
        ds1 = transform(ds, 'select', { 'descriptorNames': '*.mean' })
        ds2 = transform(ds, 'select', { 'descriptorNames': '*.var' })
        ds12 = transform(ds, 'select', { 'descriptorNames': [ '*.mean', '*.var'] })

        ds_merged = mergeDataSets(ds1, ds2)

        # we need to do this because to add a Point we need it with the
        # original layout, not the FixedLength one
        testdata.resetSettings()
        ds_orig = testdata.loadTestDB()
        sp = ds_orig.samplePoint()

        # test if we can add points normally
        ds_merged.removePoint(sp.name())
        ds_merged.addPoint(sp)

        # compare datasets contents
        self.compareDataSets(ds12, ds_merged)

        # test the mapDataSet function of the Merge applier
        ds_remapped = ds_merged.history().mapDataSet(ds_orig)

        self.compareDataSets(ds12, ds_remapped)
        self.compareDataSets(ds_merged, ds_remapped)
Пример #2
0
    def select(self, dbfile, pca_covered_variance=75, highlevel=True):
        '''
        dbfile: the path to the gaia dataset
        pca_covered_variance: the pca transofrmation should keep at least this variance
        highlevel:include high level descriptors
        '''
        if not os.path.exists("transformed_dbs"):
            os.mkdir("transformed_dbs")
        prefix = dbfile[dbfile.rfind("/") + 1:dbfile.rfind(".")]
        print dbfile
        ds = gaia2.DataSet()
        ds.load(dbfile)
        cleaner = gaia2.AnalyzerFactory.create('cleaner')
        cleanDB = cleaner.analyze(ds).applyToDataSet(ds)

        if highlevel:
            to_remove = ['*.dmean2', '*.dvar2', '*.min', '*.max', '*cov']
        else:
            to_remove = [
                '.highlevel.*', '*.dmean2', '*.dvar2', '*.min', '*.max', '*cov'
            ]

        fselectDB = gaia2.transform(cleanDB, 'remove',
                                    {'descriptorNames': to_remove})

        # NORMALIZE, PCA & Friends
        normalize = gaia2.AnalyzerFactory.create('normalize')
        normalizedDB = normalize.analyze(fselectDB).applyToDataSet(fselectDB)

        pcavar = gaia2.AnalyzerFactory.create(
            'pca', {
                'coveredVariance': pca_covered_variance,
                'resultName': 'pca%dtransform' % pca_covered_variance
            })
        pcaDB = pcavar.analyze(normalizedDB).applyToDataSet(normalizedDB)

        mfccDB = gaia2.transform(
            cleanDB, 'select', {
                'descriptorNames': [
                    '*mfcc*', '.highlevel.*', '.rhythm.bpm',
                    '.rhythm.onset_rate'
                ]
            })

        finalDB = gaia2.mergeDataSets(mfccDB, pcaDB)
        outfile = "transformed_dbs/" + prefix + ".db"
        finalDB.save(outfile)
Пример #3
0
def addRCA(ds, groundTruth, dim, selectConfig = {}):
    #ds_rca = transform(ds, 'fixlength') # should be unnecessary
    ds_rca = ds
    if selectConfig:
        ds_rca = transform(ds_rca, 'select', selectConfig)

    ds_rca = transform(ds_rca, 'gaussianize')

    # if dimension is too high, we need to preprocess before with a PCA, otherwise RCA doesn't work
    l = ds_rca.layout()
    descdim = l.descriptorLocation(l.descriptorNames()).dimension(RealType)
    if descdim > 80:
        ds_rca = transform(ds_rca, 'pca', { 'resultName': 'pca%d' % 80,
                                            'dimension': 80 })

    ds_rca = transform(ds_rca, 'rca', { 'resultName': 'rca%d' % dim,
                                        'dimension': dim,
                                        'classMap': pmap(groundTruth) })

    return mergeDataSets(ds, ds_rca)