Пример #1
0
 def initialize(self):
     """Handle more expensive initialization."""
     self.gaia_db = self.initialize_gaia_db()
     try:
         self.metric = DistanceFunctionFactory.create(
             'euclidean', self.gaia_db.layout())
     except Exception as ex:
         print(repr(ex))
         self.gaia_db = self.transform(self.gaia_db)
         self.metric = DistanceFunctionFactory.create(
             'euclidean', self.gaia_db.layout())
         self.transformed = True
Пример #2
0
 def initialize(self):
     """Handle more expensive initialization."""
     self.gaia_db = self.initialize_gaia_db()
     try:
         self.metric = DistanceFunctionFactory.create(
             'euclidean', self.gaia_db.layout())
     except Exception as ex:
         print(repr(ex))
         self.gaia_db = self.transform(self.gaia_db)
         self.metric = DistanceFunctionFactory.create(
             'euclidean', self.gaia_db.layout())
         self.transformed = True
Пример #3
0
 def __build_pca_metric(self):
     logger.info('Bulding metric for preset pca')
     preset_file = yaml.load(open(settings.PRESET_DIR + "pca.yaml"))
     distance = preset_file['distance']['type']
     parameters = preset_file['distance']['parameters']
     search_metric = DistanceFunctionFactory.create(
         str(distance), self.pca_dataset.layout(), parameters)
     self.metrics['pca'] = search_metric
Пример #4
0
 def transform_and_save(self, dataset, path):
     """Transform dataset and save to disk."""
     if not self.transformed:
         dataset = self.transform(dataset)
         self.metric = DistanceFunctionFactory.create(
             'euclidean', dataset.layout())
         self.transformed = True
     dataset.save(path)
     return dataset
Пример #5
0
 def transform_and_save(self, dataset, path):
     """Transform dataset and save to disk."""
     if not self.transformed:
         dataset = self.transform(dataset)
         self.metric = DistanceFunctionFactory.create(
             'euclidean', dataset.layout())
         self.transformed = True
     dataset.save(path)
     return dataset
Пример #6
0
 def __build_metrics(self):
     for preset in PRESETS:
         logger.debug("Bulding metric for preset %s" % preset)
         name = preset
         path = PRESET_DIR + name + ".yaml"
         preset_file = yaml.load(open(path))
         distance = preset_file["distance"]["type"]
         parameters = preset_file["distance"]["parameters"]
         search_metric = DistanceFunctionFactory.create(str(distance), self.original_dataset.layout(), parameters)
         self.metrics[name] = search_metric
Пример #7
0
    def __load_datasets(self):
        self.as_dataset.load(self.__get_dataset_path(clust_settings.INDEX_NAME_AS))
        self.as_view = View(self.as_dataset)
        # self.as_metric = DistanceFunctionFactory.create('euclidean', self.as_dataset.layout())
        # self.as_metric = DistanceFunctionFactory.create('CosineSimilarity',  self.as_dataset.layout())
        # self.as_metric = DistanceFunctionFactory.create('CosineAngle',  self.as_dataset.layout())
        self.as_metric = DistanceFunctionFactory.create('Manhattan',  self.as_dataset.layout())

        self.tag_dataset.load(self.__get_dataset_path(clust_settings.INDEX_NAME_TAG))
        self.tag_view = View(self.tag_dataset)
        self.tag_metric = DistanceFunctionFactory.create('euclidean', self.tag_dataset.layout())

        self.fs_dataset.load(self.__get_dataset_path(clust_settings.INDEX_NAME_FS))
        self.fs_view = View(self.fs_dataset)
        self.fs_metric = DistanceFunctionFactory.create('euclidean', self.fs_dataset.layout(), {'descriptorNames': 'pca'})

        # self.gaia_similiarity = GaiaWrapperSimilarity()

        self.__load_ac_descriptors_dataset()
Пример #8
0
 def __build_metrics(self):
     for preset in sim_settings.PRESETS:
         if preset != 'pca':  # PCA metric is built only after pca dataset is created so it should not be built here
             logger.info('Bulding metric for preset %s' % preset)
             name = preset
             path = sim_settings.PRESET_DIR + name + ".yaml"
             preset_file = yaml.safe_load(open(path))
             distance = preset_file['distance']['type']
             parameters = preset_file['distance']['parameters']
             search_metric = DistanceFunctionFactory.create(
                 str(distance), self.original_dataset.layout(), parameters)
             self.metrics[name] = search_metric
Пример #9
0
 def __load_ac_descriptors_dataset(self):
     self.ac_dataset.load(self.__get_dataset_path('FS_AC_descriptors_normalized'))  # TODO: add this in clustering settings
     self.ac_view = View(self.ac_dataset)
     self.ac_metric = DistanceFunctionFactory.create('euclidean', self.ac_dataset.layout(), 
         {'descriptorNames': [
             'ac_brightness', 
             'ac_boominess', 
             'ac_depth', 
             'ac_hardness', 
             'ac_roughness', 
             'ac_sharpness', 
             'ac_warmth'
         ]})
Пример #10
0
    def api_search(self, target_type, target, filter, preset_name,
                   metric_descriptor_names, num_results, offset, in_ids):

        # Check if index has sufficient points
        size = self.original_dataset.size()
        if size < sim_settings.SIMILARITY_MINIMUM_POINTS:
            msg = 'Not enough datapoints in the dataset (%s < %s).' % (
                size, sim_settings.SIMILARITY_MINIMUM_POINTS)
            logger.info(msg)
            return {
                'error': True,
                'result': msg,
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        # Get some dataset parameters that will be useful later
        trans_hist = self.transformations_history
        layout = self.original_dataset.layout()
        pca_layout = self.pca_dataset.layout()
        coeffs = None  # Get normalization coefficients
        for i in range(0, len(trans_hist)):
            if trans_hist[-(i + 1)]['Analyzer name'] == 'normalize':
                coeffs = trans_hist[-(i + 1)]['Applier parameters']['coeffs']

        # Process target
        if target:
            if target_type == 'sound_id':
                query_point = str(target)
                if not self.original_dataset.contains(query_point):
                    msg = "Sound with id %s doesn't exist in the dataset and can not be set as similarity target." \
                          % query_point
                    logger.info(msg)
                    return {
                        'error': True,
                        'result': msg,
                        'status_code': sim_settings.NOT_FOUND_CODE
                    }
                else:
                    query = query_point

            elif target_type == 'descriptor_values':
                # Transform input params to the normalized feature space and add them to a query point
                # If there are no params specified in the target, the point is set as empty (probably random sounds
                # are returned)
                feature_names = []
                query = Point()
                query.setLayout(layout)
                try:
                    for param in target.keys():
                        # Only add numerical parameters. Non numerical ones (like key) are only used as filters
                        if param in coeffs.keys():
                            feature_names.append(str(param))
                            value = target[param]
                            if coeffs:
                                a = coeffs[param]['a']
                                b = coeffs[param]['b']
                                if len(a) == 1:
                                    norm_value = a[0] * value + b[0]
                                else:
                                    norm_value = []
                                    for i in range(0, len(a)):
                                        norm_value.append(a[i] * value[i] +
                                                          b[i])
                                query.setValue(str(param), norm_value)
                            else:
                                query.setValue(str(param), value)
                except:
                    return {
                        'error': True,
                        'result':
                        'Invalid target (descriptor values could not be correctly parsed)',
                        'status_code': sim_settings.BAD_REQUEST_CODE
                    }

                # Overwrite metric with present descriptors in target
                metric = DistanceFunctionFactory.create(
                    'euclidean', layout, {'descriptorNames': feature_names})

            elif target_type == 'file':
                # Target is specified as the attached file
                # Create a point with the data in 'descriptors_data' and search for it
                target_file_parsing_type = '-'

                try:
                    # Try directly loading the file
                    p, query = Point(), Point()
                    p.loadFromString(yaml.dump(target))
                    if preset_name == 'pca':
                        query = self.pca_dataset.history().mapPoint(
                            p)  # map point to pca dataset
                    else:
                        query = self.original_dataset.history().mapPoint(
                            p)  # map point to original dataset
                    target_file_parsing_type = 'mapPoint'

                except Exception as e:
                    logger.info(
                        'Unable to create gaia point from uploaded file (%s). '
                        'Trying adding descriptors one by one.' % e)

                    # If does not work load descriptors one by one
                    try:
                        query = Point()
                        #query.setLayout(layout)

                        feature_names = []
                        get_nested_descriptor_names(target, feature_names)
                        feature_names = [
                            '.%s' % item for item in feature_names
                        ]
                        nonused_features = []

                        for param in feature_names:
                            if param in coeffs.keys():
                                value = get_nested_dictionary_value(
                                    param[1:].split('.'), target)
                                if coeffs:
                                    try:
                                        a = coeffs[param]['a']
                                        b = coeffs[param]['b']
                                        if len(a) == 1:
                                            norm_value = a[0] * value + b[0]
                                        else:
                                            norm_value = []
                                            for i in range(0, len(a)):
                                                norm_value.append(a[i] *
                                                                  value[i] +
                                                                  b[i])
                                        query.setValue(str(param[1:]),
                                                       norm_value)
                                    except:
                                        nonused_features.append(param)
                                else:
                                    query.setValue(str(param[1:]), value)
                            else:
                                nonused_features.append(param)

                        if preset_name == 'pca':
                            query = self.pca_dataset.history().mapPoint(
                                query)  # map point to pca dataset
                        else:
                            query = self.original_dataset.history().mapPoint(
                                p)  # map point to original dataset

                        target_file_parsing_type = 'walkDict'

                    except Exception as e:
                        logger.info(
                            'Unable to create gaia point from uploaded file and adding descriptors one by '
                            'one (%s)' % e)
                        return {
                            'error':
                            True,
                            'result':
                            'Unable to create gaia point from uploaded file. Probably the '
                            'file does not have the required layout. Are you using the '
                            'correct version of Essentia\'s Freesound extractor?',
                            'status_code':
                            sim_settings.SERVER_ERROR_CODE
                        }
        else:
            query = Point()  # Empty target
            if preset_name == 'pca':
                query.setLayout(pca_layout)
            else:
                query.setLayout(layout)

        # Process filter
        if filter:
            filter = parse_filter_list(filter, coeffs)
        else:
            filter = ""  # Empty filter

        # log
        log_message = 'Similarity search'
        if target:
            if target_type == 'sound_id':
                log_target = '%s (sound id)' % str(target)
            elif target_type == 'descriptor_values':
                log_target = '%s (descriptor values)' % str(target)
            elif target_type == 'file':
                log_target = 'uploaded file (%s)' % target_file_parsing_type
            log_message += ' with target: %s' % log_target
        if filter:
            log_message += ' with filter: %s' % str(filter)
        logger.info(log_message)

        # if in_ids is specified, edit the filter accordingly
        if in_ids:
            if not filter:
                filter = 'WHERE point.id IN ("' + '", "'.join(in_ids) + '")'
            else:
                filter += ' AND point.id IN ("' + '", "'.join(in_ids) + '")'

        # Set query metric
        metric = self.metrics[preset_name]
        if metric_descriptor_names:
            metric = DistanceFunctionFactory.create(
                'euclidean', layout,
                {'descriptorNames': metric_descriptor_names})

        # Do query!
        try:
            if target_type == 'descriptor_values' and target:
                search = self.view.nnSearch(query, metric, str(filter))
            else:
                if preset_name == 'pca':
                    search = self.view_pca.nnSearch(query, metric, str(filter))
                else:
                    search = self.view.nnSearch(query, metric, str(filter))
            results = search.get(num_results, offset=offset)
            count = search.size()
        except Exception as e:
            return {
                'error': True,
                'result': 'Similarity server error',
                'status_code': sim_settings.SERVER_ERROR_CODE
            }

        note = None
        if target_type == 'file':
            if target_file_parsing_type == 'walkDict':
                note = 'The layout of the given analysis file differed from what we expected. Similarity results ' \
                       'might not be accurate. Was the file generated with the last version of Essentia\'s ' \
                       'Freesound extractor?'

        return {
            'error': False,
            'result': {
                'results': results,
                'count': count,
                'note': note
            }
        }
Пример #11
0
    def query_dataset(self, query_parameters, number_of_results):

        size = self.original_dataset.size()
        if size < SIMILARITY_MINIMUM_POINTS:
            msg = "Not enough datapoints in the dataset (%s < %s)." % (size, SIMILARITY_MINIMUM_POINTS)
            logger.debug(msg)
            return {"error": True, "result": msg}
            # raise Exception('Not enough datapoints in the dataset (%s < %s).' % (size, SIMILARITY_MINIMUM_POINTS))

        trans_hist = self.original_dataset.history().toPython()
        layout = self.original_dataset.layout()

        # Get normalization coefficients to transform the input data (get info from the last transformation which has been a normalization)
        coeffs = None
        for i in range(0, len(trans_hist)):
            if trans_hist[-(i + 1)]["Analyzer name"] == "normalize":
                coeffs = trans_hist[-(i + 1)]["Applier parameters"]["coeffs"]

        ##############
        # PARSE TARGET
        ##############

        # Transform input params to the normalized feature space and add them to a query point
        # If there are no params specified in the target, the point is set as empty (probably random sounds are returned)
        q = Point()
        q.setLayout(layout)
        feature_names = []
        # If some target has been specified...
        if query_parameters["target"].keys():
            for param in query_parameters["target"].keys():
                # Only add numerical parameters. Non numerical ones (like key) are only used as filters
                if param in coeffs.keys():
                    feature_names.append(str(param))
                    value = query_parameters["target"][param]
                    if coeffs:
                        a = coeffs[param]["a"]
                        b = coeffs[param]["b"]
                        if len(a) == 1:
                            norm_value = a[0] * value + b[0]
                        else:
                            norm_value = []
                            for i in range(0, len(a)):
                                norm_value.append(a[i] * value[i] + b[i])
                        # text = str(type(param)) + " " + str(type(norm_value))
                        q.setValue(str(param), norm_value)
                    else:
                        q.setValue(str(param), value)

        ##############
        # PARSE FILTER
        ##############

        filter = ""
        # If some filter has been specified...
        if query_parameters["filter"]:
            if type(query_parameters["filter"][0:5]) == str:
                filter = query_parameters["filter"]
            else:
                filter = self.parse_filter_list(query_parameters["filter"], coeffs)

        #############
        # DO QUERY!!!
        #############

        logger.debug(
            "Content based search with target: " + str(query_parameters["target"]) + " and filter: " + str(filter)
        )
        metric = DistanceFunctionFactory.create("euclidean", layout, {"descriptorNames": feature_names})
        # Looks like that depending on the version of gaia, variable filter must go after or before the metric
        # For the gaia version we have currently (sep 2012) in freesound: nnSearch(query,filter,metric)
        # results = self.view.nnSearch(q,str(filter),metric).get(int(number_of_results)) # <- Freesound
        results = self.view.nnSearch(q, metric, str(filter)).get(int(number_of_results))

        return {"error": False, "result": results}