Exemplo n.º 1
0
def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized'):

    if search_form.cleaned_data['query']  == None and search_form.cleaned_data['filter'] == None and not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] == None and search_form.cleaned_data['filter'] == None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(target=search_form.cleaned_data['target'],
                                                         filter=search_form.cleaned_data['descriptors_filter'],
                                                         num_results=search_form.cleaned_data['page_size'],
                                                         offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'],
                                                         target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException, e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' % e.message)
        except Exception, e:
            raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.')
Exemplo n.º 2
0
def get_gaia_results(search_form,
                     target_file,
                     page_size,
                     max_pages,
                     start_page=1,
                     valid_ids=None,
                     offset=None):
    gaia_ids = list()
    gaia_count = None
    distance_to_target_data = dict()
    note = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over gaia result pages
        while (len(gaia_ids) < gaia_count
               or gaia_count == None) and n_page_requests <= max_pages:
            if not offset:
                offset = (current_page - 1) * page_size
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=page_size,
                offset=offset,
                target_file=target_file,
                in_ids=valid_ids)

            gaia_ids += [id[0] for id in results]
            gaia_count = count
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into so it can be later used in the view class and added to results
                distance_to_target_data.update(dict(results))

            #print 'Gaia page %i (total %i sounds)' % (current_page, gaia_count)
            current_page += 1
            n_page_requests += 1

    except SimilarityException, e:
        if e.status_code == 500:
            raise ServerErrorException(msg=e.message)
        elif e.status_code == 400:
            raise BadRequestException(msg=e.message)
        elif e.status_code == 404:
            raise NotFoundException(msg=e.message)
        else:
            raise ServerErrorException(msg='Similarity server error: %s' %
                                       e.message)
Exemplo n.º 3
0
def api_search(search_form,
               target_file=None,
               extra_parameters=False,
               merging_strategy='merge_optimized'):

    if search_form.cleaned_data['query'] == None and search_form.cleaned_data[
            'filter'] == None and not search_form.cleaned_data[
                'descriptors_filter'] and not search_form.cleaned_data[
                    'target'] and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] == None and search_form.cleaned_data[
            'filter'] == None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=search_form.cleaned_data['page_size'],
                offset=(search_form.cleaned_data['page'] - 1) *
                search_form.cleaned_data['page_size'],
                target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException, e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' %
                                           e.message)
        except Exception, e:
            raise ServerErrorException(
                msg=
                'The similarity server could not be reached or some unexpected error occurred.'
            )
def get_gaia_results(search_form, target_file, page_size, max_pages, start_page=1, valid_ids=None, offset=None):
    gaia_ids = list()
    gaia_count = None
    distance_to_target_data = dict()
    note = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over gaia result pages
        while (len(gaia_ids) < gaia_count or gaia_count == None) and n_page_requests <= max_pages:
            if not offset:
                offset = (current_page - 1) * page_size
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data["target"],
                filter=search_form.cleaned_data["descriptors_filter"],
                num_results=page_size,
                offset=offset,
                target_file=target_file,
                in_ids=valid_ids,
            )

            gaia_ids += [id[0] for id in results]
            gaia_count = count
            if search_form.cleaned_data["target"] or target_file:
                # Save sound distance to target into so it can be later used in the view class and added to results
                distance_to_target_data.update(dict(results))

            # print 'Gaia page %i (total %i sounds)' % (current_page, gaia_count)
            current_page += 1
            n_page_requests += 1

    except SimilarityException, e:
        if e.status_code == 500:
            raise ServerErrorException(msg=e.message)
        elif e.status_code == 400:
            raise BadRequestException(msg=e.message)
        elif e.status_code == 404:
            raise NotFoundException(msg=e.message)
        else:
            raise ServerErrorException(msg="Similarity server error: %s" % e.message)
Exemplo n.º 5
0
def api_search(search_form, target_file=None):

    distance_to_target_data = None

    if not search_form.cleaned_data['query'] and not search_form.cleaned_data['filter'] and not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None

    if not search_form.cleaned_data['query'] and not search_form.cleaned_data['filter']:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(target=search_form.cleaned_data['target'],
                                                         filter=search_form.cleaned_data['descriptors_filter'],
                                                         num_results=search_form.cleaned_data['page_size'],
                                                         offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'],
                                                         target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note
        except SimilarityException, e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message)
            elif e.status_code == 400:
                raise InvalidUrlException(msg=e.message)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message)
            else:
                raise ServerErrorException(msg=e.message)
        except Exception, e:
            if settings.DEBUG:
                raise ServerErrorException(msg=e.message)
            else:
                raise ServerErrorException()
Exemplo n.º 6
0
def api_search(search_form, target_file=None, max_repeat=False, max_solr_filter_ids=False):

    MERGE_STRATEGY = "filter_solr_results_repeat"
    MAX_SOLR_FILTER_IDS = 350
    if max_solr_filter_ids:
        MAX_SOLR_FILTER_IDS = min(int(max_solr_filter_ids), MAX_SOLR_FILTER_IDS * 2)
    MAX_REPEAT = 7
    if max_repeat:
        MAX_REPEAT = min(int(max_repeat), MAX_REPEAT * 2)
    """
    In combined search queries we need to merge solr and gaia results.
    MERGE_STRATEGY determines which strategy we follow to approach this:
    - 'merge_all': merge all strategy will get all results from solr and all results from gaia and then combine the ids
      in a unique list. The advantage of this strategy is that it returns the exact total number of matches for the query.
      The disadvantage is that depending on the query it can become really slow, and sometimes throwing timeouts.
    - 'filter_solr_results': in this strategy we first get gaia results and then perform a solr query restricted to the
      results returned by gaia. Given that filtering in solr results must be done using OR clauses in a filter id field,
      we can not pass a very big number of ids as the performance is severely affected. The standard limit of OR clauses in a
      solr query is 1024 (parameter <maxBooleanClauses>1024</maxBooleanClauses> in solrconfig.xml). Therefore, the query can
      return a maximum of 1024 results. We actually set this parameter using MAX_SOLR_FILTER_IDS, so we can control the performance.
      This strategy is faster than 'merge_all' and the response time is under control, but we can not get all possible query matches.
    - 'filter_solr_results_repeat': is like the previous strategy but repeating the whole process MAX_REPEAT times so
      that we increase the probability of obtaining matches.
    """

    distance_to_target_data = None

    if (
        not search_form.cleaned_data["query"]
        and not search_form.cleaned_data["filter"]
        and not search_form.cleaned_data["descriptors_filter"]
        and not search_form.cleaned_data["target"]
        and not target_file
    ):
        # No input data for search, return empty results
        return [], 0, None, None, None

    if not search_form.cleaned_data["query"] and not search_form.cleaned_data["filter"]:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data["target"],
                filter=search_form.cleaned_data["descriptors_filter"],
                num_results=search_form.cleaned_data["page_size"],
                offset=(search_form.cleaned_data["page"] - 1) * search_form.cleaned_data["page_size"],
                target_file=target_file,
            )

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data["target"] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note
        except SimilarityException, e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message)
            else:
                raise ServerErrorException(msg="Similarity server error: %s" % e.message)
        except Exception, e:
            raise ServerErrorException(
                msg="The similarity server could not be reached or some unexpected error occurred."
            )
Exemplo n.º 7
0
    else:
        # Combined search (there is at least one of query/filter and one of descriptors_filter/target)
        # Get gaia results
        try:
            max_gaia_results = 99999999
            if MERGE_STRATEGY == "filter_solr_results":
                # If using 'filter_solr_results' strategy there is no need to get all gaia results as we will only
                # be able to use MAX_SOLR_FILTER_IDS when filtering in solr
                max_gaia_results = MAX_SOLR_FILTER_IDS
            elif MERGE_STRATEGY == "filter_solr_results_repeat":
                max_gaia_results = MAX_SOLR_FILTER_IDS * MAX_REPEAT

            results, count, note = similarity_api_search(
                target=search_form.cleaned_data["target"],
                filter=search_form.cleaned_data["descriptors_filter"],
                num_results=max_gaia_results,
                offset=0,
                target_file=target_file,
            )
            gaia_ids = [id[0] for id in results]
            distance_to_target_data = None
            if search_form.cleaned_data["target"] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

        except SimilarityException, e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message)
Exemplo n.º 8
0
                    # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                    more_from_pack_data.update(dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name'], element['other_ids']]) for element in result.docs]))
                current_page += 1
        except SolrException, e:
            raise InvalidUrlException(msg='Solr exception: %s' % e.message)
        except Exception, e:
            if settings.DEBUG:
                raise ServerErrorException(msg=e.message)
            else:
                raise ServerErrorException()

        # Get gaia results
        try:
            results, count, note = similarity_api_search(target=search_form.cleaned_data['target'],
                                                         filter=search_form.cleaned_data['descriptors_filter'],
                                                         num_results=99999999,  # Return all sounds in one page
                                                         offset=0,
                                                         target_file=target_file)
            gaia_ids = [id[0] for id in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            if search_form.cleaned_data['group_by_pack']:
                # If results were grouped by pack, we need to update the counts of the 'more_from_pack' property, as they do not
                # consider the gaia search result and will not be accurate.
                keys_to_remove = []
                for key, value in more_from_pack_data.items():
                    ids_from_pack_in_gaia_results = list(set(more_from_pack_data[key][3]).intersection(gaia_ids))