Exemplo n.º 1
0
    def search_dataset(self, query_point, number_of_results, preset_name):
        preset_name = str(preset_name)
        query_point = str(query_point)
        logger.debug("NN search for point with name %s (preset = %s)" % (query_point, preset_name))
        size = self.original_dataset.size()
        if size < SIMILARITY_MINIMUM_POINTS:
            msg = "Not enough datapoints in the dataset (%s < %s)." % (size, SIMILARITY_MINIMUM_POINTS)
            logger.debug(msg)
            return {"error": True, "result": msg}
            # raise Exception('Not enough datapoints in the dataset (%s < %s).' % (size, SIMILARITY_MINIMUM_POINTS))

        if query_point.endswith(".yaml"):
            # The point doesn't exist in the dataset....
            # So, make a temporary point, add all the transformations
            # to it and search for it
            p, p1 = Point(), Point()
            p.load(query_point)
            p1 = self.original_dataset.history().mapPoint(p)
            similar_sounds = self.view.nnSearch(p1, self.metrics[preset_name]).get(int(number_of_results))
        else:
            if not self.original_dataset.contains(query_point):
                msg = "Sound with id %s doesn't exist in the dataset." % query_point
                logger.debug(msg)
                return {"error": True, "result": msg}
                # raise Exception("Sound with id %s doesn't exist in the dataset." % query_point)

            similar_sounds = self.view.nnSearch(query_point, self.metrics[preset_name]).get(int(number_of_results))

        return {"error": False, "result": similar_sounds}
Exemplo n.º 2
0
    def add_point(self, point_location, point_name):
        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))
        try:
            p = Point()
            p.load(str(point_location))
            p.setName(str(point_name))
            self.original_dataset.addPoint(p)
            size = self.original_dataset.size()
            logger.debug("Added point with name %s. Index has now %i points." % (str(point_name), size))
        except:
            msg = "Point with name %s could NOT be added. Index has now %i points." % (str(point_name), size)
            logger.debug(msg)
            return {"error": True, "result": msg}

        # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics
        #   This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points.
        if size == SIMILARITY_MINIMUM_POINTS:
            self.__prepare_original_dataset()
            self.__normalize_original_dataset()
            self.save_index(msg="(reaching 2000 points)")

            # build metrics for the different similarity presets
            self.__build_metrics()
            # create view
            view = View(self.original_dataset)
            self.view = view

        return {"error": False, "result": True}
Exemplo n.º 3
0
    def testEnumerateKey(self):
        db = testdata.loadTestDB()

        testdata.useEnumerate = True
        dbe = testdata.loadTestDB()

        # also make sure we can map single points correctly
        # we need to load it separately and not take it from the dataset to ensure
        # that it'll have a different enum map
        p = Point()
        p.load('data/dataset_small/Vocal and Acapella/04 Blue Skies.mp3.sig')
        print(p.name())

        #also create a transfo that forwards enums after we did the enumerate transfo
        dbes = transform(dbe, 'select', { 'descriptorNames': '*key*' })
        pe = dbes.history().mapPoint(p)

        self.assertEqual(p['key_mode'], pe['key_mode'])
        self.assertEqual(p['key_key'],  pe['key_key'])

        self.assertNotEqual(db.layout(), dbe.layout())

        for p in db.points():
            pe = dbe.point(p.name())

            self.assertEqual(p.label('key_key'),
                             pe.label('key_key'))

            self.assertEqual(p.label('key_mode'),
                             pe.label('key_mode'))
Exemplo n.º 4
0
    def testSecondChanceForLayoutEquality(self):
        '''ticket #21: points try to morph to adapt to dataset if they cannot be naturally inserted'''
        ds = DataSet()
        p = Point()

        p.setName('Paris Hilton')
        p.load('data/04 - Cansei de Ser Sexy - Meeting Paris Hilton.mp3.sig')
        ds.addPoint(p)

        p.setName('2005')
        p.load('data/11_2005-fwyh.mp3.sig')
        ds.addPoint(p)

        self.assertEqual(ds.point('2005')['title'], '2005')
Exemplo n.º 5
0
    def add_point(self, point_location, point_name):

        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))

        p = Point()
        if os.path.exists(str(point_location)):
            try:
                p.load(str(point_location))
                p.setName(str(point_name))
                if self.original_dataset.size(
                ) <= sim_settings.SIMILARITY_MINIMUM_POINTS:
                    # Add point to original_dataset because PCA dataset has not been created yet
                    self.original_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points.' % \
                          (str(point_name), self.original_dataset.size())
                    logger.info(msg)
                else:
                    # Add point to PCA dataset because it has been already created.
                    # PCA dataset will take care of adding the point to the original dataset as well.
                    self.pca_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % \
                          (str(point_name), self.original_dataset.size(), self.pca_dataset.size())
                    logger.info(msg)

            except Exception as e:
                msg = 'Point with name %s could NOT be added (%s).' % (
                    str(point_name), str(e))
                logger.info(msg)
                return {
                    'error': True,
                    'result': msg,
                    'status_code': sim_settings.SERVER_ERROR_CODE
                }
        else:
            msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % \
                  (str(point_name), str(point_location))
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        if self.original_dataset.size(
        ) == sim_settings.SIMILARITY_MINIMUM_POINTS:
            # Do enumerate
            try:
                self.original_dataset = transform(
                    self.original_dataset, 'enumerate',
                    {'descriptorNames': ['.tonal.chords_progression']})
            except:  # TODO: exception too broad here...
                logger.info(
                    'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.'
                )

        # If when adding a new point we reach the minimum points for similarity, do the needful so that the dataset
        # can be used for search. This includes preparing the dataset, normalizing it, saveing it and creating view and
        # distance metrics. This will only happen once when the size of the dataset reaches SIMILARITY_MINIMUM_POINTS.
        if self.original_dataset.size(
        ) == sim_settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode:
            self.__prepare_original_dataset()
            self.__normalize_original_dataset()
            self.transformations_history = self.original_dataset.history(
            ).toPython()
            self.save_index(msg="(reaching %i points)" %
                            sim_settings.SIMILARITY_MINIMUM_POINTS)

            # TODO: the code below is repeated from __load_dataset() method, should be moved into a util function
            # Build metrics for the different similarity presets, create a Gaia view
            self.__build_metrics()
            view = View(self.original_dataset)
            self.view = view

            # Compute PCA and create pca view and metric
            # NOTE: this step may take a long time if the dataset is big, but it only needs to be performed once
            # when the similarity server is loaded-
            self.pca_dataset = transform(
                self.original_dataset, 'pca', {
                    'descriptorNames': sim_settings.PCA_DESCRIPTORS,
                    'dimension': sim_settings.PCA_DIMENSIONS,
                    'resultName': 'pca'
                })
            self.pca_dataset.setReferenceDataSet(self.original_dataset)
            self.view_pca = View(self.pca_dataset)
            self.__build_pca_metric()

        return {'error': False, 'result': msg}
Exemplo n.º 6
0
 def testNamespaceClash(self):
     p = Point()
     p.load('data/namespace_clash.sig')
Exemplo n.º 7
0
    def add_point(self, point_location, point_name):

        if self.original_dataset.contains(str(point_name)):
            self.original_dataset.removePoint(str(point_name))

        p = Point()
        if os.path.exists(str(point_location)):
            try:
                p.load(str(point_location))
                p.setName(str(point_name))
                if self.original_dataset.size(
                ) <= settings.SIMILARITY_MINIMUM_POINTS:
                    # Add point to original_dataset
                    self.original_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points.' % (
                        str(point_name), self.original_dataset.size())
                    logger.info(msg)
                else:
                    # Add point to pca dataset (as it has been already created). Pca dataset will take are of add it to the original too
                    self.pca_dataset.addPoint(p)
                    msg = 'Added point with name %s. Index has now %i points (pca index has %i points).' % (
                        str(point_name), self.original_dataset.size(),
                        self.pca_dataset.size())
                    logger.info(msg)

            except Exception as e:
                msg = 'Point with name %s could NOT be added (%s).' % (
                    str(point_name), str(e))
                logger.info(msg)
                return {
                    'error': True,
                    'result': msg,
                    'status_code': settings.SERVER_ERROR_CODE
                }
        else:
            msg = 'Point with name %s could NOT be added because analysis file does not exist (%s).' % (
                str(point_name), str(point_location))
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': settings.SERVER_ERROR_CODE
            }

        if self.original_dataset.size() == settings.SIMILARITY_MINIMUM_POINTS:
            # Do enumerate
            try:
                self.original_dataset = transform(
                    self.original_dataset, 'enumerate',
                    {'descriptorNames': ['.tonal.chords_progression']})
            except:
                logger.info(
                    'WARNING: enumerate transformation to .tonal.chords_progression could not be performed.'
                )

        # If when adding a new point we reach the minimum points for similarity, prepare the dataset, save and create view and distance metrics
        #   This will most never happen, only the first time we start similarity server, there is no index created and we add 2000 points.
        if self.original_dataset.size(
        ) == settings.SIMILARITY_MINIMUM_POINTS and not self.indexing_only_mode:
            #self.__prepare_original_dataset()
            #self.__normalize_original_dataset()
            self.transformations_history = self.original_dataset.history(
            ).toPython()
            self.save_index(msg="(reaching 2000 points)")

            # build metrics for the different similarity presets
            self.__build_metrics()
            # create view
            view = View(self.original_dataset)
            self.view = view
            # do pca and create pca view and metric
            self.pca_dataset = transform(
                self.original_dataset, 'pca', {
                    'descriptorNames': settings.PCA_DESCRIPTORS,
                    'dimension': settings.PCA_DIMENSIONS,
                    'resultName': 'pca'
                })
            self.pca_dataset.setReferenceDataSet(self.original_dataset)
            self.view_pca = View(self.pca_dataset)
            self.__build_pca_metric()

        return {'error': False, 'result': msg}