Пример #1
0
    def testRCA(self):
        ds = testdata.loadTestDB()
        ds = transform(ds, 'removevl')
        ds = transform(ds, 'fixlength')
        ds = transform(ds, 'remove', { 'descriptorNames': '*cov' })
        ds = transform(ds, 'cleaner')
        ds = transform(ds, 'normalize')
        ds = transform(ds, 'pca', { 'resultName': 'pca15',
                                    'dimension': 15 })
        ds_rca = transform(ds, 'rca', { 'resultName': 'rca10',
                                        'dimension': 10,
                                        'classFile': testdata.RCA_GENRE_GT })

        v = View(ds_rca)
        dist = MetricFactory.create('euclidean', ds_rca.layout())
        self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10),
                            testdata.RCA_GENRE_RESULTS)

        # try by passing directly the groundtruth map
        import gaia2.fastyaml as yaml
        ds_rca = transform(ds, 'rca', { 'resultName': 'rca10',
                                        'dimension': 10,
                                        'classMap': yaml.load(open(testdata.RCA_GENRE_GT).read()) })

        v = View(ds_rca)
        dist = MetricFactory.create('euclidean', ds_rca.layout())
        self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10),
                            testdata.RCA_GENRE_RESULTS)
Пример #2
0
    def testKullbackLeibler(self):
        ds = transform(testdata.loadTestDB(), 'fixlength')

        # creates a test with more than 1000 points otherwise the test is useless because
        # we split the workload in chunks of 1000 points when computing the distance
        dstest = DataSet()
        ncopy = 20
        for cidx in range(ncopy):
            points = list(ds.points())
            for p in points:
                p.setName(p.name() + '-%d' % cidx)
            dstest.addPoints(points)

        # test whether KL doesn't break with multithreading (did in 2.2.1)
        v = View(dstest)
        dist = MetricFactory.create('kullbackleibler',
                                    dstest.layout(),
                                    { 'descriptorName': 'mfcc' })


        results = v.nnSearch(ds.samplePoint(), dist).get(6*ncopy)
        expected = [ 0.0 ]*2*ncopy + [ 6.1013755798339844 ]*ncopy
        expected += [ 6.4808731079101562 ]*2*ncopy + [ 6.7828292846679688 ]*ncopy

        for r, e in zip(results, expected):
            self.assertAlmostEqual(r[1], e, 5)
Пример #3
0
    def testAngleDistance(self):
        ds = createDataSet()
        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('CosineAngle', ds.layout())

        self.assertEqual(dist(ds.point('p1'), ds.point('p1')), 0.0)
        self.assertEqual(dist(ds.point('p1'), ds.point('p2')), 0.5)
        self.assertEqual(dist(ds.point('p1'), ds.point('p3')), 1.0)
        self.assertEqual(dist(ds.point('p1'), ds.point('p4')), 0.25)
        self.assertEqual(dist(ds.point('p1'), ds.point('p5')), 0.25)
        self.assertEqual(dist(ds.point('p4'), ds.point('p5')), 0.5)

        self.assertRaises(Exception, dist, ds.point('p0'), ds.point('p1'))

        d = MetricFactory.create('CosineAngle', ds.layout(), { 'defaultValue': 0.5 })
        self.assertEqual(d(ds.point('p0'), ds.point('p1')), 0.5)
Пример #4
0
    def testExponentialCompress(self):
        ds = createDataSet()
        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('ExponentialCompress', ds.layout(), { 'distance': 'euclidean' })

        self.assertEqual(dist(ds.point('p1'), ds.point('p1')), 0.0)
        self.assertAlmostEqual(dist(ds.point('p1'), ds.point('p0')), 0.63212056) # 1-exp(-1)
        self.assertAlmostEqual(dist(ds.point('p1'), ds.point('p3')), 0.86466472) # 1-exp(-2)
Пример #5
0
def evaluate_1NN(dataset, groundTruth, distance, params = {}):
    from gaia2 import View, MetricFactory
    view = View(dataset, MetricFactory.create(distance, dataset.layout(), params))

    #classifier = lambda p: dataset.point(view.nnSearch(p, 2)[0][0]).label('genre')
    classifier = lambda p: search(dataset, view, p)

    confusion = evaluate(dataset, classifier, groundTruth)

    return confusion
Пример #6
0
def evaluate_1NN(dataset, groundTruth, distance, params={}):
    from gaia2 import View, MetricFactory
    view = View(dataset,
                MetricFactory.create(distance, dataset.layout(), params))

    #classifier = lambda p: dataset.point(view.nnSearch(p, 2)[0][0]).label('genre')
    classifier = lambda p: search(dataset, view, p)

    confusion = evaluate(dataset, classifier, groundTruth)

    return confusion
Пример #7
0
def train_1NN(dataset, groundTruth, distance, params, dropBestResult=False):
    resultIndex = 1 if dropBestResult else 0

    def search(dataset, view, p):
        similarPoint = dataset.point(view.nnSearch(p).get(2)[resultIndex][0])
        return groundTruth[similarPoint.name()]

    view = View(dataset, MetricFactory.create(distance, dataset.layout(), params))
    classifier = lambda p: str(search(dataset, view, p))

    return classifier
Пример #8
0
def train_1NN(dataset, groundTruth, distance, params, dropBestResult = False):
    resultIndex = 1 if dropBestResult else 0

    def search(dataset, view, p):
        similarPoint = dataset.point(view.nnSearch(p).get(2)[resultIndex][0])
        return groundTruth[similarPoint.name()]

    view = View(dataset, MetricFactory.create(distance, dataset.layout(), params))
    classifier = lambda p: str(search(dataset, view, p))

    return classifier
Пример #9
0
    def testForceIdentity(self):
        l = PointLayout()
        l.add('a', RealType, FixedLength, 1)

        p = Point()
        p.setLayout(l)

        cd = MetricFactory.create('cosinesimilarity', p.layout(), { 'defaultValue': 0.5 })
        self.assertEquals(cd(p, p), 0.5)

        ficd = MetricFactory.create('forceidentity',
                                    p.layout(),
                                    { 'distance': 'cosinesimilarity',
                                      'params': { 'defaultValue': 0.5 }
                                      })

        self.assertEquals(ficd(p, p), 0.0)

        p2 = Point(p)
        p2.setName('p2')
        self.assertEquals(ficd(p, p2), 0.5)
Пример #10
0
    def testParserStillInValidStateAfterParserError(self):
        '''ticket #20: parser is in invalid state after parser error'''
        ds = testdata.createSimpleDataSet()
        dist = MetricFactory.create('null', ds.layout())
        v = View(ds)

        result = v.nnSearch(ds.samplePoint(), dist, 'WHERE true').get(1)
        clause = 'WHERE label.tonal_key_mode.value = \\"major"'
        try:
            result = v.nnSearch(ds.samplePoint(), dist, clause).get(1)
        except:
            pass  # filter correctly failed to compile
        result = v.nnSearch(ds.samplePoint(), dist, 'WHERE true').get(1)
Пример #11
0
    def testParsedVsConstructedFilters(self):
        ds = testdata.loadTestDB()
        ds = transform(ds, 'fixlength')

        p = ds.samplePoint()
        p2 = ds.point('Higher State of Consciousness.mp3')

        queries = [
            (p, '', ''), (p2, '', ''),
            (p2, 'WHERE value.tempotap_bpm.value > 140',
             Filter('tempotap_bpm.value', '>', 140)),
            (p, 'WHERE value.tempotap_bpm > 110',
             Filter('tempotap_bpm', '>', 110)),
            (p, 'WHERE value.tempotap_bpm > -10',
             Filter('tempotap_bpm', '>', -10)),
            (p, 'WHERE value.tempotap_bpm > 23000',
             Filter('tempotap_bpm', '>', 23000)),
            (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130',
             AndFilter([
                 Filter('tempotap_bpm', '>', 120),
                 Filter('tempotap_bpm', '<', 130)
             ])),
            (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120',
             Filter('tempotap_bpm', 'between', [130, 120])),
            (p, 'WHERE label.key_key = "C"', Filter('key_key', '==', 'C')),
            (p2,
             '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR
                                   (label.key_key = "E" AND label.key_mode = "minor"))
                                  AND value.tempotap_bpm < 90''',
             AndFilter([
                 OrFilter([
                     AndFilter([
                         Filter('key_key', '==', 'A'),
                         Filter('key_mode', '==', 'major')
                     ]),
                     AndFilter([
                         Filter('key_key', '==', 'E'),
                         Filter('key_mode', '==', 'minor')
                     ])
                 ]),
                 Filter('tempotap_bpm', '<', 90)
             ]))
        ]

        dist = MetricFactory.create('euclidean', ds.layout(),
                                    {'descriptorNames': '*.mean'})
        v = View(ds)

        for (pt, filtstr, filt) in queries:
            self.assertEqualSearchSpace(v.nnSearch(pt, dist, filtstr),
                                        v.nnSearch(pt, dist, filt))
Пример #12
0
def testValidPoint(dataset, clause, fromList=None):
    # search the point using the clause:
    # if we have a result, the clause was true
    # if we have no result, the clause was false
    v = View(dataset)
    dist = MetricFactory.create('null', dataset.layout())
    filtr = 'WHERE ' + clause
    if fromList:
        filtr = 'FROM ' + fromList + ' ' + filtr
    result = v.nnSearch(dataset.samplePoint(), dist, filtr).get(1)

    if len(result) == 1:
        return True
    return False
Пример #13
0
    def testWeightedPearson(self):
        ds = testdata.createSimpleDataSet()
        ds.point('p')['a.1'] = [ 0, 0 ] # need to have 2 values before fixing length
        p1 = transform(ds, 'fixlength').point('p')
        p2 = Point(p1)

        dist = MetricFactory.create('WeightedPearson', p1.layout(), { 'weights': { '1': 0.3,
                                                                                   'c': 0.7 }
                                                                      })
        p1['a.1'] = [ 0.12, 2.71 ]
        p1['c'] = 4.32
        p2['1'] = [ 0.46, 1.12 ]
        p2['c'] = 2.4242

        self.assertAlmostEqual(dist(p1, p2), 0.038222129799, 6)
Пример #14
0
    def testDeleteUnderlyingDataSet(self):
        ds = testdata.loadTestDB()

        params = {'descriptorNames': ['*.mean', '*.var']}

        ds = transform(ds, 'fixlength', params)
        ds = transform(ds, 'cleaner', params)
        ds = transform(ds, 'normalize', params)
        dist = MetricFactory.create('euclidean', ds.layout(), params)

        v = View(ds)
        del ds

        #self.assertRaises(Exception, v.nnSearch, '01 Respect.mp3')
        # this doesn't throw anymore, as the View keeps a ref to the dataset
        v.nnSearch('01 Respect.mp3', dist)
Пример #15
0
    def testRhythmDistance(self):
        p1 = testdata.createSimpleDataSet().point('p')
        p2 = Point(p1)

        dist = MetricFactory.create('Rhythm', p1.layout(), { 'descriptorName': 'a.1',
                                                             'indexRange': [ 1, 2, 4, 8 ],
                                                             'alpha': 0.8 })

        p1['a.1'] = 3
        p2['a.1'] = 2
        self.assertAlmostEqual(dist(p1, p1), 0.0)
        self.assertAlmostEqual(dist(p1, p2), 0.4)
        self.assertAlmostEqual(dist(p2, p1), 0.4)

        p1['a.1'] = 3.14
        self.assertAlmostEqual(dist(p1, p2), 0.344)

        p1['a.1'] = 6.23
        self.assertAlmostEqual(dist(p1, p2), 0.45312)
Пример #16
0
    def testSubspaceSearch(self):
        ds = testdata.loadTestDB()
        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('euclidean', ds.layout(),
                                    {'descriptorNames': '*.mean'})
        v = View(ds)
        pid = 'Higher State of Consciousness.mp3'

        key_a_minor = v.nnSearch(
            pid, dist,
            'WHERE label.key_key = "A" AND label.key_mode = "minor"')
        key_a = v.nnSearch(pid, dist, 'WHERE label.key_key = "A"')
        key_minor = v.nnSearch(pid, dist, 'WHERE label.key_mode = "minor"')

        key_a_minor_sspace1 = v.nnSearch(pid, key_minor, dist,
                                         'WHERE label.key_key = "A"')
        key_a_minor_sspace2 = v.nnSearch(pid, key_a, dist,
                                         'WHERE label.key_mode = "minor"')

        self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace1)
        self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace2)
Пример #17
0
    def testCreatedInputSpace(self):
        ds = testdata.createSimpleDataSet()
        ds.point('p')['a.1'] = 23.0

        for i in range(5):
            p = Point()
            p.setName('p%d' % i)
            p.setLayout(ds.originalLayout())
            p['a.1'] = float(i)
            ds.addPoint(p)

        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('euclidean', ds.layout())
        v = View(ds)

        p = ds.point('p')

        RS_remove = v.nnSearch(p, dist)
        RS_remove.removePoints(['p2', 'p4'])

        RS_create = InputSpace()
        RS_create.addPoints(ds, ['p', 'p0', 'p1', 'p3'])

        rsc = v.nnSearch(p, RS_remove, dist)
        rsa = v.nnSearch(p, RS_create, dist)

        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)),
                         v.nnSearch(p, rsc, dist).get(10))

        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)),
                         v.nnSearch(p, rsa, dist).get(10))

        # test thresholdLimit method
        self.assertEqual((('p', 0.), ),
                         v.nnSearch(p, rsa, dist).thresholdLimit(10).get(10))
        self.assertEqual((('p', 0.), ('p3', 20.)),
                         v.nnSearch(p, rsa, dist).thresholdLimit(20).get(10))
        self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.)),
                         v.nnSearch(p, rsa,
                                    dist).thresholdLimit(22.01).get(10))
Пример #18
0
    def testComplete(self):
        # load 2.0 dataset, history, apply history to dataset
        # check nn-search results are the same as the ones we get when doing it from gaia 2.0
        ds = DataSet()
        ds.load(testdata.GAIA_20_BACKWARDS_COMPAT_DATASET)

        h = TransfoChain()

        self.assertRaises(Exception, h.load, testdata.GAIA_20_BACKWARDS_COMPAT_HISTORY)
        return

        h.load(testdata.GAIA_20_BACKWARDS_COMPAT_HISTORY)

        ds = h.mapDataSet(ds)
        v = View(ds)
        dist = MetricFactory.create('euclidean', ds.layout())

        results = v.nnSearch('01 Respect.mp3', dist).get(5)
        self.compareResults(results, testdata.GAIA_20_BACKWARDS_COMPAT_RESULTS)

        ds21 = DataSet()
        ds21.load(testdata.TEST_DATABASE)
        results = v.nnSearch(h.mapPoint(ds21.point('01 Respect.mp3')), dist).get(5)
        self.compareResults(results, testdata.GAIA_20_BACKWARDS_COMPAT_RESULTS)
Пример #19
0
    def testSimple(self):
        ds = testdata.createSimpleDataSet()
        p2 = Point(ds.point('p'))
        p2.setName('p2')
        p3 = Point(ds.point('p'))
        p3.setName('p3')
        p3['a.1'] = 1
        p4 = Point(ds.point('p'))
        p4.setName('p4')
        p4['a.1'] = 1
        p4['a.2'] = 1
        ds.addPoint(p2)
        ds.addPoint(p3)
        ds.addPoint(p4)

        ds = transform(ds, 'fixlength')
        dist = MetricFactory.create('euclidean', ds.layout())
        v = View(ds)

        results = v.nnSearch('p', dist).get(10)
        self.assertEqual(results[0][1], 0.0)
        self.assertEqual(results[1][1], 0.0)
        self.assertSearchResultEqual(results[2], ('p3', 1.0))
        self.assertSearchResultEqual(results[3], ('p4', math.sqrt(2)))
Пример #20
0
 def search(ds, p):
     p = ds.history().mapPoint(p)
     dist = MetricFactory.create('euclidean', ds.layout())
     return View(ds).nnSearch(p, dist).get(5)
Пример #21
0
def search(dataset, id, n):
    v = View(dataset)
    dist = MetricFactory.create('euclidean', dataset.layout())
    return v.nnSearch(id, dist).get(n)
Пример #22
0
    def testRegressionIndexing(self):
        ds = testdata.loadTestDB()
        ds = transform(ds, 'removevl')
        ds = transform(ds, 'fixlength')

        p = ds.samplePoint().name()
        p2 = 'Higher State of Consciousness.mp3'

        queries = [
            (p, ''), (p2, ''),
            (p, 'WHERE (True AND True) and (true and TRUE)'),
            (p, 'WHERE (false AND True) OR (true and false)'),
            (p2, 'WHERE value.tempotap_bpm.value > 140'),
            (p, 'WHERE true AND value.tempotap_bpm.value > 140'),
            (p, 'WHERE value.tempotap_bpm > 110'),
            (p, 'WHERE value.tempotap_bpm > -10'),
            (p, 'WHERE value.tempotap_bpm < -10'),
            (p, 'WHERE value.tempotap_bpm > 23000'),
            (p, 'WHERE value.tempotap_bpm < 23000'),
            (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130'),
            (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 130'),
            (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120'),
            (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 120'),
            (p, 'WHERE value.tempotap_bpm BETWEEN -2.3 AND 4096'),
            (p, "WHERE value.tempotap_bpm BETWEEN -2.3 AND -1.4"),
            (p, "WHERE value.tempotap_bpm BETWEEN 2048 AND 4096"),
            (p, 'WHERE label.key_key = "C"'), (p,
                                               'WHERE label.key_key != "C"'),
            (p, 'WHERE label.key_key = "X"'),
            (p, 'WHERE label.key_key != "X"'),
            (p, 'WHERE label.key_key != "C" AND label.key_mode != "major"'),
            (p2,
             '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR
                                   (label.key_key = "E" AND label.key_mode = "minor"))
                                  AND value.tempotap_bpm < 90''')
        ]

        # test with standard views
        dist = MetricFactory.create('euclidean', ds.layout(),
                                    {'descriptorNames': '*.mean'})
        v = View(ds)
        vidx = View(ds)
        vidx.indexOn('tempotap_bpm')
        vidx.indexOn('key_key')
        vidx.indexOn('key_mode')

        for (pt, filt) in queries:
            self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt),
                                        vidx.nnSearch(pt, dist, filt))

        # test with frozen views
        dsr = transform(
            ds, 'select',
            {'descriptorNames': ds.layout().descriptorNames(RealType)})
        dsnorm = transform(dsr, 'normalize')
        dspca = transform(dsnorm, 'pca', {
            'resultName': 'pca',
            'dimension': 25
        })
        fds = FrozenDataSet()
        fds.fromDataSet(dspca)
        fds.setReferenceDataSet(ds)
        dist = FrozenDistanceFactory.create('Euclidean', fds.layout(),
                                            {'descriptorName': 'pca'})

        v = FrozenView(fds)
        vidx = FrozenView(fds)
        vidx.indexOn('tempotap_bpm')
        vidx.indexOn('key_key')
        vidx.indexOn('key_mode')

        for (pt, filt) in queries:
            self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt),
                                        vidx.nnSearch(pt, dist, filt))