Пример #1
0
def add_sounds_to_solr(sounds):
    solr = Solr(settings.SOLR_URL)
    console_logger.info("creating XML")
    documents = [convert_to_solr_document(s) for s in sounds]
    console_logger.info("adding %d sounds to solr index" % len(documents))
    console_logger.info("posting to Solr")
    solr.add(documents)
Пример #2
0
def add_sounds_to_solr(sounds):
    solr = Solr(settings.SOLR_URL)
    console_logger.info("creating XML")
    documents = [convert_to_solr_document(s) for s in sounds]
    console_logger.info("adding %d sounds to solr index" % len(documents))
    console_logger.info("posting to Solr")
    solr.add(documents)
Пример #3
0
def get_all_sound_ids_from_solr(limit=False):
    logger.info("getting all sound ids from solr.")
    if not limit:
        limit = 99999999999999
    solr = Solr(settings.SOLR_URL)
    solr_ids = []
    solr_count = None
    PAGE_SIZE = 2000
    current_page = 1
    while (len(solr_ids) < solr_count
           or solr_count is None) and len(solr_ids) < limit:
        response = SolrResponseInterpreter(
            solr.select(
                unicode(
                    search_prepare_query('',
                                         '',
                                         search_prepare_sort(
                                             'created asc',
                                             SEARCH_SORT_OPTIONS_WEB),
                                         current_page,
                                         PAGE_SIZE,
                                         include_facets=False))))
        solr_ids += [element['id'] for element in response.docs]
        solr_count = response.num_found
        current_page += 1
    return sorted(solr_ids)
Пример #4
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException as e:
                return False
            except Exception as e:
                return False

            return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]

        else:
            return DelayedQueryExecuter("""
                   SELECT tags_tag.name AS name, X.c AS count
                     FROM ( SELECT tag_id, count(*) as c
                              FROM tags_taggeditem
                         LEFT JOIN sounds_sound ON object_id=sounds_sound.id
                             WHERE tags_taggeditem.user_id=%d AND
                                   sounds_sound.moderation_state='OK' AND
                                   sounds_sound.processing_state='OK'
                          GROUP BY tag_id
                          ORDER BY c
                        DESC LIMIT 10) AS X
                LEFT JOIN tags_tag ON tags_tag.id=X.tag_id
                 ORDER BY tags_tag.name;""" % self.user_id)
Пример #5
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []
    
    multiple_tags = sorted(filter(lambda x: x, multiple_tags))
    
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(group_func=None,
        group_query=None,
        group_rows=10,
        group_start=0,
        group_limit=1,
        group_offset=0,
        group_sort=None,
        group_sort_ingroup=None,
        group_format='grouped',
        group_main=False,
        group_num_groups=True,
        group_cache_percent=0,
        group_truncate=True)  # Sets how many results from the same grup are taken into account for computing the facets

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException, e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
Пример #6
0
def perform_solr_query(q, current_page):
    """
    This util function performs the query to Solr and returns needed parameters to continue with the view.
    The main reason to have this util function is to facilitate mocking in unit tests for this view.
    """
    solr = Solr(settings.SOLR_URL)
    results = SolrResponseInterpreter(solr.select(unicode(q)))
    paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
    page = paginator.page(current_page)
    return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
Пример #7
0
def check_if_sound_exists_in_solr(sound):
    solr = Solr(settings.SOLR_URL)
    response = SolrResponseInterpreter(
        solr.select(
            unicode(
                search_prepare_query(
                    '', 'id:%i' % sound.id,
                    search_prepare_sort('created asc',
                                        SEARCH_SORT_OPTIONS_WEB), 1, 1))))
    return response.num_found > 0
def get_solr_results(search_form,
                     page_size,
                     max_pages,
                     start_page=1,
                     valid_ids=None,
                     solr=None,
                     offset=None):
    if not solr:
        solr = Solr(settings.SOLR_URL)

    query_filter = search_form.cleaned_data['filter']
    if valid_ids:
        # Update solr filter to only return results in valid ids
        ids_filter = 'id:(' + ' OR '.join([str(item)
                                           for item in valid_ids]) + ')'
        if query_filter:
            query_filter += ' %s' % ids_filter
        else:
            query_filter = ids_filter

    solr_ids = []
    solr_count = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over solr result pages
        while (len(solr_ids) < solr_count
               or solr_count == None) and n_page_requests <= max_pages:
            query = search_prepare_query(unquote(
                search_form.cleaned_data['query'] or ""),
                                         unquote(query_filter or ""),
                                         search_form.cleaned_data['sort'],
                                         current_page,
                                         page_size,
                                         grouping=False,
                                         include_facets=False,
                                         offset=offset)
            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids += [element['id'] for element in result.docs]
            solr_count = result.num_found

            #print 'Solr page %i (total %i sounds)' % (current_page, solr_count)
            current_page += 1
            n_page_requests += 1

    except SolrException as e:
        raise ServerErrorException(msg='Search server error: %s' % e.message)
    except Exception as e:
        raise ServerErrorException(
            msg=
            'The search server could not be reached or some unexpected error occurred.'
        )

    return solr_ids, solr_count
Пример #9
0
def perform_solr_query(q, current_page):
    """
    This util function performs the query to SOLR and returns needed parameters to continue with the view.
    The main reason to have this util function is to facilitate mocking in unit tests for this view.
    """
    solr = Solr(settings.SOLR_URL)
    results = SolrResponseInterpreter(solr.select(unicode(q)))
    paginator = SolrResponseInterpreterPaginator(results,
                                                 settings.SOUNDS_PER_PAGE)
    page = paginator.page(current_page)
    return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
Пример #10
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    try:
        solr = Solr(settings.SOLR_URL)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except (SolrException, Exception) as e:
        #  TODO: do something here?
        return False
    return results.facets
Пример #11
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username,
                                                    pack_obj.name)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    try:
        solr = Solr(settings.SOLR_URL)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except (SolrException, Exception) as e:
        #  TODO: do something here?
        return False
    return results.facets
Пример #12
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException, e:
                return False
            except Exception, e:
                return False
Пример #13
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException, e:
                return False
            except Exception, e:
                return False
Пример #14
0
def delete_sound_from_solr(sound_id):
    search_logger.info("deleting sound with id %d" % sound_id)
    try:
        Solr(settings.SOLR_URL).delete_by_id(sound_id)
    except (SolrException, socket.error) as e:
        search_logger.error('could not delete sound with id %s (%s).' %
                            (sound_id, e))
Пример #15
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    #filter_query = 'pack:\"%s\"' % (pack_obj.name,)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except SolrException, e:
        #logger.warning("search error: query: %s error %s" % (query, e))
        #error = True
        #error_text = 'There was an error while searching, is your query correct?'
        return False
Пример #16
0
    def items(self, obj):
        if obj['query'] != "": 
            try:
                solr = Solr(settings.SOLR_URL)
                query = SolrQuery()
                fields=[('id',4),
                        ('tag', 3),
                        ('description', 3),
                        ('username', 2),
                        ('pack_tokenized', 2),
                        ('original_filename', 2),]
                
                
                if obj['type'] == "phrase":
                    query.set_dismax_query('"' + obj['query'] + '"',query_fields=fields) # EXACT (not 100%)    
                elif obj['type'] == "any":
                    query.set_dismax_query(obj['query'],query_fields=[],minimum_match=0) # OR
                else:
                    query.set_dismax_query(obj['query'],query_fields=[],minimum_match="100%") # AND
                
                lim = obj['limit']
                if lim > 100:
                    lim = 100
                
                    
                query.set_query_options(start=obj['offset'], rows=lim, filter_query="", sort=['created desc'])
                
                try:
                    results = SolrResponseInterpreter(solr.select(unicode(query)))
                    
                    sounds = []
                    for object in results.docs :
                        try:
                            sounds.append(object)
                        except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate
                            pass

                    logger.info("Sound pool search RSS")
                    return sounds
        
                except SolrException, e:
                    return []
            except:
                return []
        else:
            return []
Пример #17
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    #filter_query = 'pack:\"%s\"' % (pack_obj.name,)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except SolrException, e:
        #logger.warning("search error: query: %s error %s" % (query, e))
        #error = True
        #error_text = 'There was an error while searching, is your query correct?'
        return False
Пример #18
0
    def get_user_tags(self):
        query = SolrQuery()
        query.set_dismax_query('')
        filter_query = 'username:\"%s\"' % self.user.username
        query.set_query_options(field_list=["id"], filter_query=filter_query)
        query.add_facet_fields("tag")
        query.set_facet_options("tag", limit=10, mincount=1)
        solr = Solr(settings.SOLR_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
        except SolrException as e:
            return False
        except Exception as e:
            return False

        return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]
Пример #19
0
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_sound_ids = []
        solr = Solr(url=settings.SOLR_URL)
        query = SolrQuery()
        query.set_dismax_query("") # Query to get ALL sounds

        print "Retrieving ids from %i to %i"%(0,SLICE_SIZE)
        query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT,total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE,number_of_documents,SLICE_SIZE):
            print "Retrieving ids from %i to %i"%(i,i+SLICE_SIZE)
            query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_sound_ids = sorted(list(set(solr_sound_ids)))
        if LIMIT:
            solr_sound_ids = solr_sound_ids[0:LIMIT]
        print "%i document ids retrieved"%len(solr_sound_ids)
        n_deleted = 0
        print ""
        for count,id in enumerate(solr_sound_ids):
            sys.stdout.write("\rChecking doc %i of %i"%(count,len(solr_sound_ids)))
            sys.stdout.flush()

            if Sound.objects.filter(id=id,moderation_state="OK",processing_state="OK").exists():
                pass
            else:
                # Sound does not exist in the Db or is not properly moderated and processed
                print "\n\t - Deleting sound with id %i from solr index"%id
                solr.delete_by_id(id)
                n_deleted += 1

        print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)"%n_deleted
Пример #20
0
def get_all_sound_ids_from_solr(limit=False):
    logger.info("getting all sound ids from solr.")
    if not limit:
        limit = 99999999999999
    solr = Solr(settings.SOLR_URL)
    solr_ids = []
    solr_count = None
    PAGE_SIZE = 2000
    current_page = 1
    while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit:
        response = SolrResponseInterpreter(
            solr.select(unicode(search_prepare_query(
                '', '', search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE,
                include_facets=False))))
        solr_ids += [element['id'] for element in response.docs]
        solr_count = response.num_found
        current_page += 1
    return sorted(solr_ids)
Пример #21
0
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None):
    if not solr:
        solr = Solr(settings.SOLR_URL)

    query_filter = search_form.cleaned_data['filter']
    if valid_ids:
        # Update solr filter to only return results in valid ids
        ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')'
        if query_filter:
            query_filter += ' %s' % ids_filter
        else:
            query_filter = ids_filter

    solr_ids = []
    solr_count = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over solr result pages
        while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages:
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(query_filter or ""),
                                         search_form.cleaned_data['sort'],
                                         current_page,
                                         page_size,
                                         grouping=False,
                                         include_facets=False,
                                         offset=offset)
            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids += [element['id'] for element in result.docs]
            solr_count = result.num_found

            #print 'Solr page %i (total %i sounds)' % (current_page, solr_count)
            current_page += 1
            n_page_requests += 1

    except SolrException as e:
        raise ServerErrorException(msg='Search server error: %s' % e.message)
    except Exception as e:
        raise ServerErrorException(msg='The search server could not be reached or some unexpected error occurred.')

    return solr_ids, solr_count
Пример #22
0
def get_random_sound_from_solr():
    """ Get a random sound from solr.
    This is used for random sound browsing. We filter explicit sounds,
    but otherwise don't have any other restrictions on sound attributes
    """
    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    rand_key = random.randint(1, 10000000)
    sort = ['random_%d asc' % rand_key]
    filter_query = 'is_explicit:0'
    query.set_query("*:*")
    query.set_query_options(start=0, rows=1, field_list=["*"], filter_query=filter_query, sort=sort)
    try:
        response = SolrResponseInterpreter(solr.select(unicode(query)))
        docs = response.docs
        if docs:
            return docs[0]
    except (SolrException, socket.error):
        pass
    return {}
Пример #23
0
def delete_sounds_from_solr(sound_ids):
    solr_max_boolean_clause = 1000  # This number is specified in solrconfig.xml
    for count, i in enumerate(range(0, len(sound_ids), solr_max_boolean_clause)):
        range_ids = sound_ids[i:i+solr_max_boolean_clause]
        try:
            logger.info("deleting %i sounds from solr [%i of %i, %i sounds]" %
                        (len(sound_ids), count + 1, int(math.ceil(float(len(sound_ids)) / solr_max_boolean_clause)),
                         len(range_ids)))
            sound_ids_query = ' OR '.join(['id:{0}'.format(sid) for sid in range_ids])
            Solr(settings.SOLR_URL).delete_by_query(sound_ids_query)
        except (SolrException, socket.error) as e:
            logger.error('could not delete solr sounds chunk %i of %i' %
                         (count + 1, int(math.ceil(float(len(sound_ids)) / solr_max_boolean_clause))))
Пример #24
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []
    
    multiple_tags = sorted(filter(lambda x:x, multiple_tags))
    
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    
    try:

        results = SolrResponseInterpreter(solr.select(unicode(query)))


        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]
    except SolrException, e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
Пример #25
0
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_sound_ids = []
        solr = Solr(url=settings.SOLR_URL)
        query = SolrQuery()
        query.set_dismax_query("")  # Query to get ALL sounds

        print "Retrieving ids from %i to %i" % (0, SLICE_SIZE)
        query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT, total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE, number_of_documents, SLICE_SIZE):
            print "Retrieving ids from %i to %i" % (i, i + SLICE_SIZE)
            query.set_query_options(field_list=["id"],
                                    rows=SLICE_SIZE,
                                    start=i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_sound_ids = sorted(list(set(solr_sound_ids)))
        if LIMIT:
            solr_sound_ids = solr_sound_ids[0:LIMIT]
        print "%i document ids retrieved" % len(solr_sound_ids)
        n_deleted = 0
        print ""
        for count, id in enumerate(solr_sound_ids):
            sys.stdout.write("\rChecking doc %i of %i" %
                             (count, len(solr_sound_ids)))
            sys.stdout.flush()

            if Sound.objects.filter(id=id,
                                    moderation_state="OK",
                                    processing_state="OK").exists():
                pass
            else:
                # Sound does not exist in the Db or is not properly moderated and processed
                print "\n\t - Deleting sound with id %i from solr index" % id
                solr.delete_by_id(id)
                n_deleted += 1

        print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)" % n_deleted
Пример #26
0
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_post_ids = []
        solr = Solr(url=settings.SOLR_FORUM_URL)
        query = SolrQuery()
        query.set_dismax_query("")  # Query to get ALL forums

        console_logger.info("Retrieving ids from %i to %i"%(0,SLICE_SIZE))
        query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_post_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT,total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE, number_of_documents,SLICE_SIZE):
            console_logger.info("Retrieving ids from %i to %i"%(i,i+SLICE_SIZE-1))
            query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_post_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_post_ids = sorted(list(set(solr_post_ids)))
        if LIMIT:
            solr_post_ids = solr_post_ids[0:LIMIT]
        console_logger.info("%i document ids retrieved"%len(solr_post_ids))
        n_deleted = 0
        console_logger.info("")
        for count, id in enumerate(solr_post_ids):
            if count % 100 == 0:
                console_logger.info("\rChecking docs %i/%i"%(count,len(solr_post_ids)))

            if Post.objects.filter(id=id,moderation_state="OK").exists():
                pass
            else:
                # Post does not exist in the Db or is not properly moderated and processed
                console_logger.info("\n\t - Deleting forum with id %i from solr index" % id)
                solr.delete_by_id(id)
                n_deleted += 1

        console_logger.info("\n\nDONE! %i forums deleted from solr index (it may take some minutes to actually see "
                            "the changes in the page)" % n_deleted)
Пример #27
0
def api_search(search_form,
               target_file=None,
               extra_parameters=False,
               merging_strategy='merge_optimized',
               resource=None):

    if search_form.cleaned_data['query']  is None \
            and search_form.cleaned_data['filter'] is None \
            and not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] is None and search_form.cleaned_data[
            'filter'] is None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=search_form.cleaned_data['page_size'],
                offset=(search_form.cleaned_data['page'] - 1) *
                search_form.cleaned_data['page_size'],
                target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException as e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' %
                                           e.message,
                                           resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg=
                'The similarity server could not be reached or some unexpected error occurred.',
                resource=resource)

    elif not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:

        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(
                unquote(search_form.cleaned_data['query'] or ""),
                unquote(search_form.cleaned_data['filter'] or ""),
                search_form.cleaned_data['sort'],
                search_form.cleaned_data['page'],
                search_form.cleaned_data['page_size'],
                grouping=search_form.cleaned_data['group_by_pack'],
                include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([(int(element['id']), [
                    element['more_from_pack'], element['pack_id'],
                    element['pack_name']
                ]) for element in result.docs])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException as e:
            if search_form.cleaned_data['filter'] is not None:
                raise BadRequestException(
                    msg=
                    'Search server error: %s (please check that your filter syntax and field '
                    'names are correct)' % e.message,
                    resource=resource)
            raise BadRequestException(msg='Search server error: %s' %
                                      e.message,
                                      resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg=
                'The search server could not be reached or some unexpected error occurred.',
                resource=resource)

    else:
        # Combined search (there is at least one of query/filter and one of descriptors_filter/target)
        # Strategies are implemented in 'combined_search_strategies'
        strategy = getattr(combined_search_strategies, merging_strategy)
        return strategy(search_form,
                        target_file=target_file,
                        extra_parameters=extra_parameters)
Пример #28
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )
    
    solr = Solr(settings.SOLR_URL) 
        
    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
       
        # clickusage tracking           
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Пример #29
0
def get_stream_sounds(user, time_lapse):

    solr = Solr(settings.SOLR_URL)

    sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB)

    #
    # USERS FOLLOWING
    #

    users_following = get_users_following(user)

    users_sounds = []
    for user_following in users_following:

        filter_str = "username:"******" created:" + time_lapse

        query = search_prepare_query(
            "",
            filter_str,
            sort_str,
            1,
            SOLR_QUERY_LIMIT_PARAM,
            grouping=False,
            include_facets=False
        )

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [urllib.quote(filter_str), urllib.quote(sort_str[0])]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids).select_related('license', 'user')
            new_count = more_count + len(sound_ids)
            users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count))

    #
    # TAGS FOLLOWING
    #

    tags_following = get_tags_following(user)

    tags_sounds = []
    for tag_following in tags_following:

        tags = tag_following.split(" ")
        tag_filter_query = ""
        for tag in tags:
            tag_filter_query += "tag:" + tag + " "

        tag_filter_str = tag_filter_query + " created:" + time_lapse

        query = search_prepare_query(
            "",
            tag_filter_str,
            sort_str,
            1,
            SOLR_QUERY_LIMIT_PARAM,
            grouping=False,
            include_facets=False
        )

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [urllib.quote(tag_filter_str), urllib.quote(sort_str[0])]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            tags_sounds.append((tags, sound_objs, more_url_params, more_count, new_count))

    return users_sounds, tags_sounds
Пример #30
0
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#     See AUTHORS file.
#

import time
from utils.search.solr import Solr, SolrQuery, SolrResponseInterpreter

"""An example of usage, with the freesound custom schema...
"""

solr = Solr("http://localhost:8983/solr/", persistent=True)

lines = file("searches").readlines()
lines.reverse()

num_queries_total = 0
num_queries_this_loop = 0

time_solr = 0
results_solr = 0
results_before = 0

start = time.time()
start_this_loop = start

for index, line in enumerate(lines):
Пример #31
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ', '+')

    # Generate array with information of filters
    filter_query_split = []
    if filter_query != "":
        for filter_str in re.findall(r'[\w-]+:\"[^\"]+', filter_query):
            filter_str = filter_str + '"'
            filter_display = filter_str.replace('"', '')
            filter_name = filter_str.split(":")[0]
            if filter_name != "duration" and filter_name != "is_geotagged":
                if filter_name == "grouping_pack":
                    val = filter_display.split(":")[1]
                    filter_display = "pack:" + val.split("_")[1]

                filter = {
                    'name': filter_display,
                    'remove_url': filter_query.replace(filter_str, ''),
                }
                filter_query_split.append(filter)

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB

    grouping = request.GET.get("g", "1")  # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[
        'original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1":
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "":

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "":
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "":
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "":
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'description']
            if a_username != "":
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "":
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'pack_tokenized']
            if a_filename != "":
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    logger.info(u'Search (%s)' % json.dumps({
        'ip':
        get_client_ip(request),
        'query':
        search_query,
        'filter':
        filter_query,
        'username':
        request.user.username,
        'page':
        current_page,
        'sort':
        sort[0],
        'group_by_pack':
        actual_groupnig,
        'advanced':
        json.dumps({
            'search_in_tag': a_tag,
            'search_in_filename': a_filename,
            'search_in_description': a_description,
            'search_in_packname': a_packname,
            'search_in_soundid': a_soundid,
            'search_in_username': a_username
        }) if advanced == "1" else ""
    }))

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping=actual_groupnig)

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results,
                                                     settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        # allsounds will contain info from all the sounds returned by bulk_query_id. This should
        # be all sounds in docs, but if solr and db are not synchronised, it might happen that there
        # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements
        # in docs that have not been loaded in allsounds.
        docs = [doc for doc in docs if doc["id"] in allsounds]
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Пример #32
0
    def handle(self,  *args, **options):

        # init
        solr = Solr(settings.SOLR_URL)

        # Get all solr ids
        print "Getting solr ids...",
        solr_ids = get_all_sound_ids_from_solr()
        print "done!"

        # Get ell gaia ids
        print "Getting gaia ids...",
        gaia_ids = Similarity.get_all_sound_ids()
        print "done!"

        print "Getting freesound db data..."
        # Get all moderated and processed sound ids
        queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK').order_by('id').only("id")
        fs_mp = [sound.id for sound in queryset]
        # Get ell moderated, processed and analysed sounds
        queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK', analysis_state='OK').order_by('id').only("id")
        fs_mpa = [sound.id for sound in queryset]
        print "done!"

        print "\nNumber of sounds per index:\n--------------------------"
        print "Solr index\t\t%i" % len(solr_ids)
        print "Gaia index\t\t%i" % len(gaia_ids)
        print "Freesound\t\t%i  (moderated and processed)" % len(fs_mp)
        print "Freesound\t\t%i  (moderated, processed and analyzed)" % len(fs_mpa)

        print "\n\n***************\nSOLR INDEX\n***************\n"
        in_solr_not_in_fs = list(set(solr_ids).intersection(set(set(solr_ids).difference(fs_mp))))
        in_fs_not_in_solr = list(set(fs_mp).intersection(set(set(fs_mp).difference(solr_ids))))
        print "Sounds in solr but not in fs:\t%i" % len(in_solr_not_in_fs)
        print "Sounds in fs but not in solr:\t%i" % len(in_fs_not_in_solr)

        if not options['no-changes']:
            # Mark fs sounds to go processing
            if in_fs_not_in_solr:
                print "Changing is_index_dirty_state of sounds that require it"
                N = len(in_fs_not_in_solr)
                for count, sid in enumerate(in_fs_not_in_solr):
                    sys.stdout.write('\r\tChanging state of sound sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    sound = Sound.objects.get(id=sid)
                    sound.set_single_field('is_index_dirty', True)

            # Delete sounds from solr that are not in the db
            if in_solr_not_in_fs:
                print "\nDeleting sounds that should not be in solr"
                N = len(in_solr_not_in_fs)
                for count, sid in enumerate(in_solr_not_in_fs):
                    sys.stdout.write('\r\tDeleting sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    solr.delete_by_id(sid)

        print "\n***************\nGAIA INDEX\n***************\n"
        in_gaia_not_in_fs = list(set(gaia_ids).intersection(set(set(gaia_ids).difference(fs_mpa))))
        in_fs_not_in_gaia = list(set(fs_mpa).intersection(set(set(fs_mpa).difference(gaia_ids))))
        print "Sounds in gaia but not in fs:\t%i" % len(in_gaia_not_in_fs)
        print "Sounds in fs but not in gaia:\t%i  (only considering sounds correctly analyzed)" % len(in_fs_not_in_gaia)
        #Similarity.save()

        if not options['no-changes']:
            # Mark fs sounds to go processing
            if in_fs_not_in_gaia:
                print "Changing similarity_state of sounds that require it"
                N = len(in_fs_not_in_gaia)
                for count, sid in enumerate(in_fs_not_in_gaia):
                    sys.stdout.write('\r\tChanging state of sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    sound = Sound.objects.get(id=sid)
                    sound.set_similarity_state('PE')

            # Delete sounds from gaia that are not in the db
            if in_gaia_not_in_fs:
                print "\nDeleting sounds that should not be in solr"
                N = len(in_gaia_not_in_fs)
                for count, sid in enumerate(in_gaia_not_in_fs):
                    sys.stdout.write('\r\tDeleting sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    Similarity.delete(sid)
Пример #33
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip()    # for context sensitive search
    current_forum_name = request.GET.get("current_forum_name", "").strip()              # used in breadcrumb
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    date_to = request.GET.get("dt_to", "")

    # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search...
    # It only happens in some situations.
    if "search in " in search_query:
        invalid = 1

    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum_name_slug.strip() != "":
            filter_query += "forum_name_slug:" + current_forum_name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException, e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception, e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'
Пример #34
0
def merge_optimized(search_form, target_file=None, extra_parameters=None):
    """
    Filter both strategy will first get either some results from solr and then check if returned results are also
    valid results in a gaia query, or the other way around.
    In gaia and solr we can restrict the query to a particular set of results, but there are limitations both in the
    length of the resulting url and in the number of OR clauses that solr can support.
    """

    if not extra_parameters:
        extra_parameters = dict()
    solr_filter_id_block_size = extra_parameters.get(
        'cs_solr_filter_id_block_size', 350)
    solr_filter_id_max_pages = extra_parameters.get(
        'cs_solr_filter_id_max_pages', 7)
    solr_max_requests = extra_parameters.get('cs_max_solr_requests', 20)
    solr_page_size = extra_parameters.get('cs_solr_page_size', 200)
    gaia_max_pages = extra_parameters.get('cs_max_gaia_pages', 1)
    gaia_page_size = extra_parameters.get(
        'cs_gaia_page_size', 9999999)  # We can get ALL gaia results at once

    num_requested_results = search_form.cleaned_data['page_size']
    params_for_next_page = dict()

    debug_note = ''

    if search_form.cleaned_data['target'] or target_file:
        # First search into gaia and get all results that have not been checked in previous calls (indicated in request parameter 'cs_lcvidp')
        last_checked_valid_id_position = extra_parameters.get('cs_lcvidp', 0)
        if last_checked_valid_id_position < 0:
            last_checked_valid_id_position = 0
        gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results(
            search_form,
            target_file,
            page_size=gaia_page_size,
            max_pages=gaia_max_pages,
            offset=last_checked_valid_id_position)
        if len(gaia_ids):
            # Now divide gaia results in blocks of "solr_filter_id_block_size" results and iteratively query solr limiting the
            # results to those ids in the common block to obtain common results for the search.
            # Once we get as many results as "num_requested_results" or we exceed a maximum number
            # of iterations (solr_filter_id_max_pages), return what we got and update 'cs_lcvidp' parameter for further calls.
            valid_ids_pages = [
                gaia_ids[i:i + solr_filter_id_block_size]
                for i in range(0, len(gaia_ids), solr_filter_id_block_size)
            ]
            solr_ids = list()
            checked_gaia_ids = list()
            solr = Solr(settings.SOLR_URL)
            for count, valid_ids_page in enumerate(valid_ids_pages):
                page_solr_ids, solr_count = get_solr_results(
                    search_form,
                    page_size=len(valid_ids_page),
                    max_pages=1,
                    valid_ids=valid_ids_page,
                    solr=solr)
                solr_ids += page_solr_ids
                checked_gaia_ids += valid_ids_page
                if len(solr_ids) >= num_requested_results:
                    debug_note = 'Found enough results in %i solr requests' % (
                        count + 1)
                    #print 'Did %i requests to solr' % (count + 1)
                    break
                if count + 1 > solr_filter_id_max_pages:
                    debug_note = 'Did %i solr requests (still not enough results)' % (
                        count + 1)
                    #print 'Too many requests and not enough results'
                    break

            combined_ids = list()
            for index, sid in enumerate(checked_gaia_ids):
                if sid in solr_ids:
                    combined_ids.append(sid)
                new_last_checked_valid_id_position = index + 1
                if len(combined_ids) == num_requested_results:
                    break

            if len(checked_gaia_ids) == len(gaia_ids):
                params_for_next_page['no_more_results'] = True
            params_for_next_page[
                'cs_lcvidp'] = last_checked_valid_id_position + new_last_checked_valid_id_position
        else:
            # No more gaia ids to check against solr, no more possible results!
            combined_ids = list()
            distance_to_target_data = dict()
            note = None
            params_for_next_page['no_more_results'] = True

    else:
        # First search into gaia to obtain a list of all sounds that match content-based query parameters
        gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results(
            search_form,
            target_file,
            page_size=gaia_page_size,
            max_pages=gaia_max_pages)
        last_retrieved_solr_id_pos = extra_parameters.get('cs_lrsidp', 0)
        if last_retrieved_solr_id_pos < 0:
            last_retrieved_solr_id_pos = 0

        if len(gaia_ids) < solr_filter_id_block_size:
            # optimization, if there are few gaia_ids, we can get all results in one query
            solr_ids, solr_count = get_solr_results(
                search_form,
                page_size=len(gaia_ids),
                max_pages=1,
                valid_ids=gaia_ids,
                offset=last_retrieved_solr_id_pos)
            combined_ids = solr_ids[:num_requested_results]
            params_for_next_page[
                'cs_lrsidp'] = last_retrieved_solr_id_pos + num_requested_results
            if len(combined_ids) < num_requested_results:
                params_for_next_page['no_more_results'] = True
        else:
            # Now query solr starting at the last retrieved solr result position (parameter 'cs_lrsidp') and iteratively combine the results of
            # each page of the query with gaia ids. Once we reach the desired  "num_requested_results", return what we got and
            # update 'cs_lrsidp' parameter for further queries. Set a maximum number of iterations (solr_max_requests) to prevent a virtually
            # infinite query if not enough results are found (num_requested_results is not reached).
            combined_ids = list()
            new_last_retrieved_solr_id_pos = last_retrieved_solr_id_pos
            stop_main_for_loop = False
            n_requests_made = 0
            for i in range(0, solr_max_requests):
                if stop_main_for_loop:
                    continue
                offset = last_retrieved_solr_id_pos + i * solr_page_size
                solr_ids, solr_count = get_solr_results(
                    search_form,
                    page_size=solr_page_size,
                    max_pages=1,
                    offset=offset)
                n_requests_made += 1
                common_ids = list(set(solr_ids).intersection(gaia_ids))
                for index, sid in enumerate(solr_ids):
                    new_last_retrieved_solr_id_pos += 1
                    if sid in common_ids:
                        combined_ids.append(sid)
                    if len(combined_ids) == num_requested_results:
                        stop_main_for_loop = True
                        break
                    if new_last_retrieved_solr_id_pos == solr_count:
                        params_for_next_page['no_more_results'] = True
                        stop_main_for_loop = True
                        break
            if n_requests_made == solr_max_requests and len(
                    combined_ids) < num_requested_results:
                debug_note = 'Did %i solr requests (still not enough results)' % n_requests_made
                #print 'Too many requests and not enough results'
            else:
                debug_note = 'Found enough results in %i solr requests' % n_requests_made
                #print 'Did %i requests to solr' % n_requests_made
            params_for_next_page['cs_lrsidp'] = new_last_retrieved_solr_id_pos

    # Combine results
    return combined_ids, len(
        combined_ids
    ), distance_to_target_data, None, note, params_for_next_page, debug_note
Пример #35
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get(
        "forum", "").strip()  # for context sensitive search
    if current_forum_name_slug:
        current_forum = get_object_or_404(forum.models.Forum.objects,
                                          name_slug=current_forum_name_slug)
    else:
        current_forum = None
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    try:
        df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d")
        date_from_display = df_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_from = ""
        date_from_display = "Choose a Date"
    date_to = request.GET.get("dt_to", "")
    try:
        dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d")
        date_to_display = dt_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_to = ""
        date_to_display = "Choose a Date"

    if search_query.startswith("search in"):
        search_query = ""

    error = False
    error_text = ""
    paginator = None
    num_results = None
    page = None
    results = []
    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum:
            filter_query += "forum_name_slug:" + current_forum.name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query,
                               query_fields=[("thread_title", 4),
                                             ("post_body", 3),
                                             ("thread_author", 3),
                                             ("post_author", 3),
                                             ("forum_name", 2)])
        query.set_highlighting_options_default(
            field_list=["post_body"],
            fragment_size=200,
            alternate_field="post_body",  # TODO: revise this param
            require_field_match=False,
            pre="<strong>",
            post="</strong>")
        query.set_query_options(
            start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
            rows=settings.SOUNDS_PER_PAGE,
            field_list=[
                "id", "forum_name", "forum_name_slug", "thread_id",
                "thread_title", "thread_author", "thread_created", "post_body",
                "post_author", "post_created", "num_posts"
            ],
            filter_query=filter_query,
            sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(
                results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException as e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception as e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'

    tvars = {
        'advanced_search': advanced_search,
        'current_forum': current_forum,
        'current_page': current_page,
        'date_from': date_from,
        'date_from_display': date_from_display,
        'date_to': date_to,
        'date_to_display': date_to_display,
        'error': error,
        'error_text': error_text,
        'filter_query': filter_query,
        'num_results': num_results,
        'page': page,
        'paginator': paginator,
        'search_query': search_query,
        'sort': sort,
        'results': results,
    }

    return render(request, 'search/search_forum.html', tvars)
Пример #36
0
    def read(self, request):
        ip = get_client_ip(request)
        form = SoundSearchForm(SEARCH_SORT_OPTIONS_API, request.GET)
        if not form.is_valid():
            resp = rc.BAD_REQUEST
            resp.content = form.errors
            return resp

        cd = form.cleaned_data
        grouping = request.GET.get("g", "")
        if grouping == "0":
            grouping = ""

        solr = Solr(settings.SOLR_URL)
        sounds_per_page = min(int(request.GET.get('sounds_per_page', settings.SOUNDS_PER_API_RESPONSE)),settings.MAX_SOUNDS_PER_API_RESPONSE)
        query = search_prepare_query(cd['q'],
                                     cd['f'],
                                     search_prepare_sort(cd['s'], SEARCH_SORT_OPTIONS_API),
                                     cd['p'],
                                     sounds_per_page,
                                     grouping = grouping)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results,sounds_per_page)
            page = paginator.page(form.cleaned_data['p'])
            sounds = []
            bad_results = 0
            for object in page['object_list'] :
                try:
                    sound = prepare_collection_sound(Sound.objects.select_related('user').get(id=object['id']), custom_fields = request.GET.get('fields', False))
                    if 'more_from_pack' in object.keys():
                        if object['more_from_pack'] > 0:
                            link = prepend_base(reverse('api-search')+'?q=%s&f=pack:"%s" %s&s=%s&g=%s' % (my_quote(cd['q']),object['pack_name'],my_quote(cd['f']),cd['s'],""))
                            if request.GET.get('sounds_per_page', None):
                                link += "&sounds_per_page=" +  str(request.GET.get('sounds_per_page', None))
                            if request.GET.get('fields', False):
                                link += "&fields=" + str(request.GET.get('fields', False))
                            sound['results_from_the_same_pack'] = link
                            sound['n_results_from_the_same_pack'] = object['more_from_pack']
                    sounds.append(sound)
                except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate
                    pass
            result = {'sounds': sounds, 'num_results': paginator.count - bad_results, 'num_pages': paginator.num_pages}

            # construct previous and next urls
            if page['has_other_pages']:
                if page['has_previous']:
                    result['previous'] = self.__construct_pagination_link(cd['q'],
                                                                          page['previous_page_number'],
                                                                          cd['f'],
                                                                          find_api_option(cd['s']),
                                                                          request.GET.get('sounds_per_page', None),
                                                                          request.GET.get('fields', False),
                                                                          grouping)
                if page['has_next']:
                    result['next'] = self.__construct_pagination_link(cd['q'],
                                                                      page['next_page_number'],
                                                                      cd['f'],
                                                                      find_api_option(cd['s']),
                                                                      request.GET.get('sounds_per_page',None),
                                                                      request.GET.get('fields', False),
                                                                      grouping)
            add_request_id(request,result)
            logger.info("Searching,q=" + cd['q'] + ",f=" + cd['f'] + ",p=" + str(cd['p']) + ",sounds_per_page=" + str(sounds_per_page) + ",api_key=" + request.GET.get("api_key", False) + ",api_key_username="******",ip=" + ip)
            return result

        except SolrException, e:
            error = "search_query %s filter_query %s sort %s error %s" \
                        % (cd['s'], cd['f'], cd['s'], e)
            raise ReturnError(500, "SearchError", {"explanation": error})
Пример #37
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip()    # for context sensitive search
    current_forum_name = request.GET.get("current_forum_name", "").strip()              # used in breadcrumb
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    date_to = request.GET.get("dt_to", "")

    # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search...
    # It only happens in some situations.
    if "search in " in search_query:
        invalid = 1

    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum_name_slug.strip() != "":
            filter_query += "forum_name_slug:" + current_forum_name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException, e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception, e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'
Пример #38
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

        # clickusage tracking
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Пример #39
0
def api_search(
        search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None):

    if search_form.cleaned_data['query']  is None \
            and search_form.cleaned_data['filter'] is None \
            and not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] is None and search_form.cleaned_data['filter'] is None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=search_form.cleaned_data['page_size'],
                offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'],
                target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException as e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)

    elif not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:

        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([
                    (int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']])
                    for element in result.docs
                ])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException as e:
            if search_form.cleaned_data['filter'] is not None:
                raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field '
                                              'names are correct)' % e.message, resource=resource)
            raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg='The search server could not be reached or some unexpected error occurred.', resource=resource)

    else:
        # Combined search (there is at least one of query/filter and one of descriptors_filter/target)
        # Strategies are implemented in 'combined_search_strategies'
        strategy = getattr(combined_search_strategies, merging_strategy)
        return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
Пример #40
0
    def handle(self,  *args, **options):

        # init
        solr = Solr(settings.SOLR_URL)

        # Get all solr ids
        print "Getting solr ids...",
        solr_ids = get_all_sound_ids_from_solr()
        print "done!"

        # Get ell gaia ids
        print "Getting gaia ids...",
        gaia_ids = Similarity.get_all_sound_ids()
        print "done!"

        print "Getting freesound db data..."
        # Get all moderated and processed sound ids
        queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK').order_by('id').only("id")
        fs_mp = [sound.id for sound in queryset]
        # Get ell moderated, processed and analysed sounds
        queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK', analysis_state='OK').order_by('id').only("id")
        fs_mpa = [sound.id for sound in queryset]
        print "done!"

        print "\nNumber of sounds per index:\n--------------------------"
        print "Solr index\t\t%i" % len(solr_ids)
        print "Gaia index\t\t%i" % len(gaia_ids)
        print "Freesound\t\t%i  (moderated and processed)" % len(fs_mp)
        print "Freesound\t\t%i  (moderated, processed and analyzed)" % len(fs_mpa)

        print "\n\n***************\nSOLR INDEX\n***************\n"
        in_solr_not_in_fs = list(set(solr_ids).intersection(set(set(solr_ids).difference(fs_mp))))
        in_fs_not_in_solr = list(set(fs_mp).intersection(set(set(fs_mp).difference(solr_ids))))
        print "Sounds in solr but not in fs:\t%i" % len(in_solr_not_in_fs)
        print "Sounds in fs but not in solr:\t%i" % len(in_fs_not_in_solr)

        if not options['no-changes']:
            # Mark fs sounds to go processing
            if in_fs_not_in_solr:
                print "Changing is_index_dirty_state of sounds that require it"
                N = len(in_fs_not_in_solr)
                for count, sid in enumerate(in_fs_not_in_solr):
                    sys.stdout.write('\r\tChanging state of sound sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    sound = Sound.objects.get(id=sid)
                    sound.set_single_field('is_index_dirty', True)

            # Delete sounds from solr that are not in the db
            if in_solr_not_in_fs:
                print "\nDeleting sounds that should not be in solr"
                N = len(in_solr_not_in_fs)
                for count, sid in enumerate(in_solr_not_in_fs):
                    sys.stdout.write('\r\tDeleting sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    solr.delete_by_id(sid)

        print "\n***************\nGAIA INDEX\n***************\n"
        in_gaia_not_in_fs = list(set(gaia_ids).intersection(set(set(gaia_ids).difference(fs_mpa))))
        in_fs_not_in_gaia = list(set(fs_mpa).intersection(set(set(fs_mpa).difference(gaia_ids))))
        print "Sounds in gaia but not in fs:\t%i" % len(in_gaia_not_in_fs)
        print "Sounds in fs but not in gaia:\t%i  (only considering sounds correctly analyzed)" % len(in_fs_not_in_gaia)
        #Similarity.save()

        if not options['no-changes']:
            # Mark fs sounds to go processing
            if in_fs_not_in_gaia:
                print "Changing similarity_state of sounds that require it"
                N = len(in_fs_not_in_gaia)
                for count, sid in enumerate(in_fs_not_in_gaia):
                    sys.stdout.write('\r\tChanging state of sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    sound = Sound.objects.get(id=sid)
                    sound.set_similarity_state('PE')

            # Delete sounds from gaia that are not in the db
            if in_gaia_not_in_fs:
                print "\nDeleting sounds that should not be in gaia"
                N = len(in_gaia_not_in_fs)
                for count, sid in enumerate(in_gaia_not_in_fs):
                    sys.stdout.write('\r\tDeleting sound %i of %i         ' % (count+1, N))
                    sys.stdout.flush()
                    Similarity.delete(sid)
Пример #41
0
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource)
        except Exception, e:
            raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)


    elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
Пример #42
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    logger.info(u'Search (%s)' % json.dumps({
        'ip': get_client_ip(request),
        'query': search_query,
        'filter': filter_query,
        'username': request.user.username,
        'page': current_page,
        'sort': sort[0],
        'group_by_pack' : actual_groupnig,
        'advanced': json.dumps({
            'search_in_tag': a_tag,
            'search_in_filename': a_filename,
            'search_in_description': a_description,
            'search_in_packname': a_packname,
            'search_in_soundid': a_soundid,
            'search_in_username': a_username
        }) if advanced == "1" else ""
    }))

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        # allsounds will contain info from all the sounds returned by bulk_query_id. This should
        # be all sounds in docs, but if solr and db are not synchronised, it might happen that there
        # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements
        # in docs that have not been loaded in allsounds.
        docs = [doc for doc in docs if doc["id"] in allsounds]
        for d in docs:
            d["sound"] = allsounds[d["id"]]

        # clickusage tracking
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Пример #43
0
def get_stream_sounds(user, time_lapse):

    solr = Solr(settings.SOLR_URL)

    sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB)

    #
    # USERS FOLLOWING
    #

    users_following = get_users_following(user)

    users_sounds = []
    for user_following in users_following:

        filter_str = "username:"******" created:" + time_lapse

        query = search_prepare_query("",
                                     filter_str,
                                     sort_str,
                                     1,
                                     SOLR_QUERY_LIMIT_PARAM,
                                     grouping=False,
                                     include_facets=False)

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [
                urllib.quote(filter_str),
                urllib.quote(sort_str[0])
            ]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            users_sounds.append(((user_following, False), sound_objs,
                                 more_url_params, more_count, new_count))

    #
    # TAGS FOLLOWING
    #

    tags_following = get_tags_following(user)

    tags_sounds = []
    for tag_following in tags_following:

        tags = tag_following.split(" ")
        tag_filter_query = ""
        for tag in tags:
            tag_filter_query += "tag:" + tag + " "

        tag_filter_str = tag_filter_query + " created:" + time_lapse

        query = search_prepare_query("",
                                     tag_filter_str,
                                     sort_str,
                                     1,
                                     SOLR_QUERY_LIMIT_PARAM,
                                     grouping=False,
                                     include_facets=False)

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [
                urllib.quote(tag_filter_str),
                urllib.quote(sort_str[0])
            ]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            tags_sounds.append(
                (tags, sound_objs, more_url_params, more_count, new_count))

    return users_sounds, tags_sounds
Пример #44
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("forum", "").strip()    # for context sensitive search
    if current_forum_name_slug:
        current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug)
    else:
        current_forum = None
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    try:
        df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d")
        date_from_display = df_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_from = ""
        date_from_display = "Choose a Date"
    date_to = request.GET.get("dt_to", "")
    try:
        dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d")
        date_to_display = dt_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_to = ""
        date_to_display = "Choose a Date"

    if search_query.startswith("search in"):
        search_query = ""

    error = False
    error_text = ""
    paginator = None
    num_results = None
    page = None
    results = []
    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum:
            filter_query += "forum_name_slug:" + current_forum.name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException as e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception as e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'


    tvars = {
        'advanced_search': advanced_search,
        'current_forum': current_forum,
        'current_page': current_page,
        'date_from': date_from,
        'date_from_display': date_from_display,
        'date_to': date_to,
        'date_to_display': date_to_display,
        'error': error,
        'error_text': error_text,
        'filter_query': filter_query,
        'num_results': num_results,
        'page': page,
        'paginator': paginator,
        'search_query': search_query,
        'sort': sort,
        'results': results,
    }

    return render(request, 'search/search_forum.html', tvars)
Пример #45
0
def check_if_sound_exists_in_solr(sound):
    solr = Solr(settings.SOLR_URL)
    response = SolrResponseInterpreter(
        solr.select(unicode(search_prepare_query(
            '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1))))
    return response.num_found > 0
Пример #46
0
def filter_both(search_form, target_file=None, extra_parameters=None):
    """
    Filter both strategy will first get either some results from solr and then check if returned results are also
    valid results in a gaia query, or the other way around.
    In gaia and solr we can restrict the query to a particular set of results, but there are limitations both in the
    length of the resulting url and in the number of OR clauses that solr can support.
    """

    if not extra_parameters:
        extra_parameters = dict()
    solr_filter_id_block_size = extra_parameters.get(
        'cs_solr_filter_id_block_size', 350)
    solr_filter_id_max_pages = extra_parameters.get(
        'cs_solr_filter_id_max_pages', 7)
    solr_max_pages = extra_parameters.get('cs_max_solr_pages', 7)
    solr_page_size = extra_parameters.get('cs_solr_page_size', 1000)
    gaia_filter_id_block_size = extra_parameters.get(
        'cs_gaia_filter_id_block_size', 350)
    gaia_filter_id_max_pages = extra_parameters.get(
        'cs_gaia_filter_id_max_pages', 7)
    gaia_max_pages = extra_parameters.get('cs_max_gaia_pages', 1)
    gaia_page_size = extra_parameters.get(
        'cs_gaia_page_size', 9999999)  # We can get ALL gaia results at once

    if search_form.cleaned_data['target'] or target_file:
        # First search into gaia and then into solr (get all gaia results)
        gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results(
            search_form,
            target_file,
            page_size=gaia_page_size,
            max_pages=gaia_max_pages)
        valid_ids_pages = [
            gaia_ids[i:i + solr_filter_id_block_size]
            for i in range(0, len(gaia_ids), solr_filter_id_block_size)
            if (i / solr_filter_id_block_size) < solr_filter_id_max_pages
        ]
        solr_ids = list()
        solr = Solr(settings.SOLR_URL)
        for valid_ids_page in valid_ids_pages:
            page_solr_ids, solr_count = get_solr_results(
                search_form,
                page_size=len(valid_ids_page),
                max_pages=1,
                valid_ids=valid_ids_page,
                solr=solr)
            solr_ids += page_solr_ids

        if gaia_count <= solr_filter_id_block_size * solr_filter_id_max_pages:
            # Got complete results, maybe we should log that?
            #print 'COMPLETE results (starting with gaia)'
            pass
    else:
        # First search into solr and then into gaia
        # These queries are SLOW because we need to get many pages from solr
        solr_ids, solr_count = get_solr_results(search_form,
                                                page_size=solr_page_size,
                                                max_pages=solr_max_pages)

        # Now we should split solr ids in blocks and iteratively query gaia restricting the results to those ids
        # present in the current block. However given that gaia results can be retrieved
        # all at once very quickly, we optimize this bit by retrieving them all at once and avoiding many requests
        # to similarity server.
        gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results(
            search_form,
            target_file,
            page_size=gaia_page_size,
            max_pages=gaia_max_pages)
        '''
        # That would be the code without the optimization:
        valid_ids_pages = [solr_ids[i:i+gaia_filter_id_block_size] for i in range(0, len(solr_ids), gaia_filter_id_block_size) if (i/gaia_filter_id_block_size) < gaia_filter_id_max_pages]
        gaia_ids = list()
        distance_to_target_data = None
        note = None
        for valid_ids_page in valid_ids_pages:
            page_gaia_ids, page_gaia_count, page_distance_to_target_data, note = get_gaia_results(search_form, target_file, page_size=len(valid_ids_page), max_pages=1, valid_ids=valid_ids_page)
            gaia_ids += page_gaia_ids
        '''
        if solr_count <= solr_page_size * solr_max_pages and gaia_count < gaia_page_size * gaia_max_pages:
            # Got complete results, maybe we should log that?
            #print 'COMPLETE results (starting with solr)'
            pass

    if search_form.cleaned_data['target'] or target_file:
        # Combined search, sort by gaia_ids
        results_a = gaia_ids
        results_b = solr_ids
    else:
        # Combined search, sort by solr ids
        results_a = solr_ids
        results_b = gaia_ids

    # Combine results
    results_b_set = set(results_b)
    combined_ids = [id for id in results_a if id in results_b_set]
    combined_count = len(combined_ids)
    return combined_ids[(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size']:search_form.cleaned_data['page'] * search_form.cleaned_data['page_size']], \
           combined_count, distance_to_target_data, None, note, None, None
Пример #47
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []

    multiple_tags = sorted(filter(lambda x: x, multiple_tags))

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(group_func=None,
                            group_query=None,
                            group_rows=10,
                            group_start=0,
                            group_limit=1,
                            group_offset=0,
                            group_sort=None,
                            group_sort_ingroup=None,
                            group_format='grouped',
                            group_main=False,
                            group_num_groups=True,
                            group_cache_percent=0,
                            group_truncate=True)  # Sets how many results from the same group are taken into account for computing the facets

    page = None
    num_results = 0
    tags = []
    error = False
    docs = {}
    non_grouped_number_of_results = 0
    paginator = None
    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException as e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
    except:
        error = True

    slash_tag = "/".join(multiple_tags)

    follow_tags_url = ''
    unfollow_tags_url = ''
    show_unfollow_button = False
    if slash_tag:
        follow_tags_url = reverse('follow-tags', args=[slash_tag])
        unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag])
        show_unfollow_button = False

        if request.user.is_authenticated:
            show_unfollow_button = follow_utils.is_user_following_tag(request.user, slash_tag)

    tvars = {'show_unfollow_button': show_unfollow_button,
             'multiple_tags': multiple_tags,
             'follow_tags_url': follow_tags_url,
             'unfollow_tags_url': unfollow_tags_url,
             'error': error,
             'tags': tags,
             'slash_tag': slash_tag,
             'num_results': num_results,
             'non_grouped_number_of_results': non_grouped_number_of_results,
             'docs': docs,
             'paginator': paginator,
             'page': page,
             'current_page': current_page
             }
    return render(request, 'sounds/tags.html', tvars)
Пример #48
0
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource)
        except Exception, e:
            raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)


    elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
Пример #49
0
def add_sounds_to_solr(sounds):
    solr = Solr(settings.SOLR_URL)
    documents = [convert_to_solr_document(s) for s in sounds]
    console_logger.info("Adding %d sounds to solr index" % len(documents))
    logger.info("Adding %d sounds to solr index" % len(documents))
    solr.add(documents)
Пример #50
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []

    multiple_tags = sorted(filter(lambda x: x, multiple_tags))

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\""
                                 for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) *
                            settings.SOUNDS_PER_PAGE,
                            rows=settings.SOUNDS_PER_PAGE,
                            field_list=["id"],
                            sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100,
                                    sort=True,
                                    mincount=1,
                                    count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(
        group_func=None,
        group_query=None,
        group_rows=10,
        group_start=0,
        group_limit=1,
        group_offset=0,
        group_sort=None,
        group_sort_ingroup=None,
        group_format='grouped',
        group_main=False,
        group_num_groups=True,
        group_cache_percent=0,
        group_truncate=True
    )  # Sets how many results from the same grup are taken into account for computing the facets

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results,
                                                     settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException as e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
    except:
        error = True

    slash_tag = "/".join(multiple_tags)
    space_tag = " ".join(multiple_tags)

    if slash_tag:
        follow_tags_url = reverse('follow-tags', args=[slash_tag])
        unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag])
        show_unfollow_button = False
        if request.user.is_authenticated:
            show_unfollow_button = follow_utils.is_user_following_tag(
                request.user, slash_tag)

    return render(request, 'sounds/tags.html', locals())
Пример #51
0
def add_sounds_to_solr(sounds):
    solr = Solr(settings.SOLR_URL)
    documents = [convert_to_solr_document(s) for s in sounds]
    console_logger.info("Adding %d sounds to solr index" % len(documents))
    logger.info("Adding %d sounds to solr index" % len(documents))
    solr.add(documents)