Exemplo n.º 1
0
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_sound_ids = []
        solr = Solr(url=settings.SOLR_URL)
        query = SolrQuery()
        query.set_dismax_query("")  # Query to get ALL sounds

        print "Retrieving ids from %i to %i" % (0, SLICE_SIZE)
        query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT, total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE, number_of_documents, SLICE_SIZE):
            print "Retrieving ids from %i to %i" % (i, i + SLICE_SIZE)
            query.set_query_options(field_list=["id"],
                                    rows=SLICE_SIZE,
                                    start=i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_sound_ids = sorted(list(set(solr_sound_ids)))
        if LIMIT:
            solr_sound_ids = solr_sound_ids[0:LIMIT]
        print "%i document ids retrieved" % len(solr_sound_ids)
        n_deleted = 0
        print ""
        for count, id in enumerate(solr_sound_ids):
            sys.stdout.write("\rChecking doc %i of %i" %
                             (count, len(solr_sound_ids)))
            sys.stdout.flush()

            if Sound.objects.filter(id=id,
                                    moderation_state="OK",
                                    processing_state="OK").exists():
                pass
            else:
                # Sound does not exist in the Db or is not properly moderated and processed
                print "\n\t - Deleting sound with id %i from solr index" % id
                solr.delete_by_id(id)
                n_deleted += 1

        print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)" % n_deleted
Exemplo n.º 2
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException as e:
                return False
            except Exception as e:
                return False

            return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]

        else:
            return DelayedQueryExecuter("""
                   SELECT tags_tag.name AS name, X.c AS count
                     FROM ( SELECT tag_id, count(*) as c
                              FROM tags_taggeditem
                         LEFT JOIN sounds_sound ON object_id=sounds_sound.id
                             WHERE tags_taggeditem.user_id=%d AND
                                   sounds_sound.moderation_state='OK' AND
                                   sounds_sound.processing_state='OK'
                          GROUP BY tag_id
                          ORDER BY c
                        DESC LIMIT 10) AS X
                LEFT JOIN tags_tag ON tags_tag.id=X.tag_id
                 ORDER BY tags_tag.name;""" % self.user_id)
Exemplo n.º 3
0
def get_all_sound_ids_from_solr(limit=False):
    logger.info("getting all sound ids from solr.")
    if not limit:
        limit = 99999999999999
    solr = Solr(settings.SOLR_URL)
    solr_ids = []
    solr_count = None
    PAGE_SIZE = 2000
    current_page = 1
    while (len(solr_ids) < solr_count
           or solr_count is None) and len(solr_ids) < limit:
        response = SolrResponseInterpreter(
            solr.select(
                unicode(
                    search_prepare_query('',
                                         '',
                                         search_prepare_sort(
                                             'created asc',
                                             SEARCH_SORT_OPTIONS_WEB),
                                         current_page,
                                         PAGE_SIZE,
                                         include_facets=False))))
        solr_ids += [element['id'] for element in response.docs]
        solr_count = response.num_found
        current_page += 1
    return sorted(solr_ids)
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_post_ids = []
        solr = Solr(url=settings.SOLR_FORUM_URL)
        query = SolrQuery()
        query.set_dismax_query("")  # Query to get ALL forums

        console_logger.info("Retrieving ids from %i to %i"%(0,SLICE_SIZE))
        query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_post_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT,total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE, number_of_documents,SLICE_SIZE):
            console_logger.info("Retrieving ids from %i to %i"%(i,i+SLICE_SIZE-1))
            query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_post_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_post_ids = sorted(list(set(solr_post_ids)))
        if LIMIT:
            solr_post_ids = solr_post_ids[0:LIMIT]
        console_logger.info("%i document ids retrieved"%len(solr_post_ids))
        n_deleted = 0
        console_logger.info("")
        for count, id in enumerate(solr_post_ids):
            if count % 100 == 0:
                console_logger.info("\rChecking docs %i/%i"%(count,len(solr_post_ids)))

            if Post.objects.filter(id=id,moderation_state="OK").exists():
                pass
            else:
                # Post does not exist in the Db or is not properly moderated and processed
                console_logger.info("\n\t - Deleting forum with id %i from solr index" % id)
                solr.delete_by_id(id)
                n_deleted += 1

        console_logger.info("\n\nDONE! %i forums deleted from solr index (it may take some minutes to actually see "
                            "the changes in the page)" % n_deleted)
Exemplo n.º 5
0
    def handle(self, *args, **options):

        LIMIT = None
        SLICE_SIZE = 500
        solr_sound_ids = []
        solr = Solr(url=settings.SOLR_URL)
        query = SolrQuery()
        query.set_dismax_query("") # Query to get ALL sounds

        print "Retrieving ids from %i to %i"%(0,SLICE_SIZE)
        query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)
        total_num_documents = results.num_found

        # Start iterating over other pages (slices)
        if LIMIT:
            number_of_documents = min(LIMIT,total_num_documents)
        else:
            number_of_documents = total_num_documents

        for i in range(SLICE_SIZE,number_of_documents,SLICE_SIZE):
            print "Retrieving ids from %i to %i"%(i,i+SLICE_SIZE)
            query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i)
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs)

        solr_sound_ids = sorted(list(set(solr_sound_ids)))
        if LIMIT:
            solr_sound_ids = solr_sound_ids[0:LIMIT]
        print "%i document ids retrieved"%len(solr_sound_ids)
        n_deleted = 0
        print ""
        for count,id in enumerate(solr_sound_ids):
            sys.stdout.write("\rChecking doc %i of %i"%(count,len(solr_sound_ids)))
            sys.stdout.flush()

            if Sound.objects.filter(id=id,moderation_state="OK",processing_state="OK").exists():
                pass
            else:
                # Sound does not exist in the Db or is not properly moderated and processed
                print "\n\t - Deleting sound with id %i from solr index"%id
                solr.delete_by_id(id)
                n_deleted += 1

        print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)"%n_deleted
Exemplo n.º 6
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []
    
    multiple_tags = sorted(filter(lambda x: x, multiple_tags))
    
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(group_func=None,
        group_query=None,
        group_rows=10,
        group_start=0,
        group_limit=1,
        group_offset=0,
        group_sort=None,
        group_sort_ingroup=None,
        group_format='grouped',
        group_main=False,
        group_num_groups=True,
        group_cache_percent=0,
        group_truncate=True)  # Sets how many results from the same grup are taken into account for computing the facets

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException, e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
Exemplo n.º 7
0
def perform_solr_query(q, current_page):
    """
    This util function performs the query to Solr and returns needed parameters to continue with the view.
    The main reason to have this util function is to facilitate mocking in unit tests for this view.
    """
    solr = Solr(settings.SOLR_URL)
    results = SolrResponseInterpreter(solr.select(unicode(q)))
    paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
    page = paginator.page(current_page)
    return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
Exemplo n.º 8
0
def check_if_sound_exists_in_solr(sound):
    solr = Solr(settings.SOLR_URL)
    response = SolrResponseInterpreter(
        solr.select(
            unicode(
                search_prepare_query(
                    '', 'id:%i' % sound.id,
                    search_prepare_sort('created asc',
                                        SEARCH_SORT_OPTIONS_WEB), 1, 1))))
    return response.num_found > 0
def get_solr_results(search_form,
                     page_size,
                     max_pages,
                     start_page=1,
                     valid_ids=None,
                     solr=None,
                     offset=None):
    if not solr:
        solr = Solr(settings.SOLR_URL)

    query_filter = search_form.cleaned_data['filter']
    if valid_ids:
        # Update solr filter to only return results in valid ids
        ids_filter = 'id:(' + ' OR '.join([str(item)
                                           for item in valid_ids]) + ')'
        if query_filter:
            query_filter += ' %s' % ids_filter
        else:
            query_filter = ids_filter

    solr_ids = []
    solr_count = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over solr result pages
        while (len(solr_ids) < solr_count
               or solr_count == None) and n_page_requests <= max_pages:
            query = search_prepare_query(unquote(
                search_form.cleaned_data['query'] or ""),
                                         unquote(query_filter or ""),
                                         search_form.cleaned_data['sort'],
                                         current_page,
                                         page_size,
                                         grouping=False,
                                         include_facets=False,
                                         offset=offset)
            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids += [element['id'] for element in result.docs]
            solr_count = result.num_found

            #print 'Solr page %i (total %i sounds)' % (current_page, solr_count)
            current_page += 1
            n_page_requests += 1

    except SolrException as e:
        raise ServerErrorException(msg='Search server error: %s' % e.message)
    except Exception as e:
        raise ServerErrorException(
            msg=
            'The search server could not be reached or some unexpected error occurred.'
        )

    return solr_ids, solr_count
Exemplo n.º 10
0
def perform_solr_query(q, current_page):
    """
    This util function performs the query to SOLR and returns needed parameters to continue with the view.
    The main reason to have this util function is to facilitate mocking in unit tests for this view.
    """
    solr = Solr(settings.SOLR_URL)
    results = SolrResponseInterpreter(solr.select(unicode(q)))
    paginator = SolrResponseInterpreterPaginator(results,
                                                 settings.SOUNDS_PER_PAGE)
    page = paginator.page(current_page)
    return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
Exemplo n.º 11
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    try:
        solr = Solr(settings.SOLR_URL)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except (SolrException, Exception) as e:
        #  TODO: do something here?
        return False
    return results.facets
Exemplo n.º 12
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username,
                                                    pack_obj.name)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    try:
        solr = Solr(settings.SOLR_URL)
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except (SolrException, Exception) as e:
        #  TODO: do something here?
        return False
    return results.facets
Exemplo n.º 13
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException, e:
                return False
            except Exception, e:
                return False
Exemplo n.º 14
0
    def get_user_tags(self, use_solr=True):
        if use_solr:
            query = SolrQuery()
            query.set_dismax_query('')
            filter_query = 'username:\"%s\"' % self.user.username
            query.set_query_options(field_list=["id"], filter_query=filter_query)
            query.add_facet_fields("tag")
            query.set_facet_options("tag", limit=10, mincount=1)
            solr = Solr(settings.SOLR_URL)

            try:
                results = SolrResponseInterpreter(solr.select(unicode(query)))
            except SolrException, e:
                return False
            except Exception, e:
                return False
Exemplo n.º 15
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    #filter_query = 'pack:\"%s\"' % (pack_obj.name,)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except SolrException, e:
        #logger.warning("search error: query: %s error %s" % (query, e))
        #error = True
        #error_text = 'There was an error while searching, is your query correct?'
        return False
Exemplo n.º 16
0
    def items(self, obj):
        if obj['query'] != "": 
            try:
                solr = Solr(settings.SOLR_URL)
                query = SolrQuery()
                fields=[('id',4),
                        ('tag', 3),
                        ('description', 3),
                        ('username', 2),
                        ('pack_tokenized', 2),
                        ('original_filename', 2),]
                
                
                if obj['type'] == "phrase":
                    query.set_dismax_query('"' + obj['query'] + '"',query_fields=fields) # EXACT (not 100%)    
                elif obj['type'] == "any":
                    query.set_dismax_query(obj['query'],query_fields=[],minimum_match=0) # OR
                else:
                    query.set_dismax_query(obj['query'],query_fields=[],minimum_match="100%") # AND
                
                lim = obj['limit']
                if lim > 100:
                    lim = 100
                
                    
                query.set_query_options(start=obj['offset'], rows=lim, filter_query="", sort=['created desc'])
                
                try:
                    results = SolrResponseInterpreter(solr.select(unicode(query)))
                    
                    sounds = []
                    for object in results.docs :
                        try:
                            sounds.append(object)
                        except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate
                            pass

                    logger.info("Sound pool search RSS")
                    return sounds
        
                except SolrException, e:
                    return []
            except:
                return []
        else:
            return []
Exemplo n.º 17
0
    def get_user_tags(self):
        query = SolrQuery()
        query.set_dismax_query('')
        filter_query = 'username:\"%s\"' % self.user.username
        query.set_query_options(field_list=["id"], filter_query=filter_query)
        query.add_facet_fields("tag")
        query.set_facet_options("tag", limit=10, mincount=1)
        solr = Solr(settings.SOLR_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
        except SolrException as e:
            return False
        except Exception as e:
            return False

        return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]
Exemplo n.º 18
0
def get_pack_tags(pack_obj):
    query = SolrQuery()
    query.set_dismax_query('')
    filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name)
    #filter_query = 'pack:\"%s\"' % (pack_obj.name,)
    query.set_query_options(field_list=["id"], filter_query=filter_query)
    query.add_facet_fields("tag")
    query.set_facet_options("tag", limit=20, mincount=1)
    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
    except SolrException, e:
        #logger.warning("search error: query: %s error %s" % (query, e))
        #error = True
        #error_text = 'There was an error while searching, is your query correct?'
        return False
Exemplo n.º 19
0
def get_all_sound_ids_from_solr(limit=False):
    logger.info("getting all sound ids from solr.")
    if not limit:
        limit = 99999999999999
    solr = Solr(settings.SOLR_URL)
    solr_ids = []
    solr_count = None
    PAGE_SIZE = 2000
    current_page = 1
    while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit:
        response = SolrResponseInterpreter(
            solr.select(unicode(search_prepare_query(
                '', '', search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE,
                include_facets=False))))
        solr_ids += [element['id'] for element in response.docs]
        solr_count = response.num_found
        current_page += 1
    return sorted(solr_ids)
Exemplo n.º 20
0
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None):
    if not solr:
        solr = Solr(settings.SOLR_URL)

    query_filter = search_form.cleaned_data['filter']
    if valid_ids:
        # Update solr filter to only return results in valid ids
        ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')'
        if query_filter:
            query_filter += ' %s' % ids_filter
        else:
            query_filter = ids_filter

    solr_ids = []
    solr_count = None

    try:
        current_page = start_page
        n_page_requests = 1
        # Iterate over solr result pages
        while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages:
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(query_filter or ""),
                                         search_form.cleaned_data['sort'],
                                         current_page,
                                         page_size,
                                         grouping=False,
                                         include_facets=False,
                                         offset=offset)
            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids += [element['id'] for element in result.docs]
            solr_count = result.num_found

            #print 'Solr page %i (total %i sounds)' % (current_page, solr_count)
            current_page += 1
            n_page_requests += 1

    except SolrException as e:
        raise ServerErrorException(msg='Search server error: %s' % e.message)
    except Exception as e:
        raise ServerErrorException(msg='The search server could not be reached or some unexpected error occurred.')

    return solr_ids, solr_count
Exemplo n.º 21
0
def get_random_sound_from_solr():
    """ Get a random sound from solr.
    This is used for random sound browsing. We filter explicit sounds,
    but otherwise don't have any other restrictions on sound attributes
    """
    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    rand_key = random.randint(1, 10000000)
    sort = ['random_%d asc' % rand_key]
    filter_query = 'is_explicit:0'
    query.set_query("*:*")
    query.set_query_options(start=0, rows=1, field_list=["*"], filter_query=filter_query, sort=sort)
    try:
        response = SolrResponseInterpreter(solr.select(unicode(query)))
        docs = response.docs
        if docs:
            return docs[0]
    except (SolrException, socket.error):
        pass
    return {}
Exemplo n.º 22
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []
    
    multiple_tags = sorted(filter(lambda x:x, multiple_tags))
    
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    
    try:

        results = SolrResponseInterpreter(solr.select(unicode(query)))


        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]
    except SolrException, e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
Exemplo n.º 23
0
            raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)


    elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException, e:
            if search_form.cleaned_data['filter'] != None:
                raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field names are correct)' % e.message, resource=resource)
            raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource)
        except Exception, e:
Exemplo n.º 24
0
def api_search(search_form,
               target_file=None,
               extra_parameters=False,
               merging_strategy='merge_optimized',
               resource=None):

    if search_form.cleaned_data['query']  is None \
            and search_form.cleaned_data['filter'] is None \
            and not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] is None and search_form.cleaned_data[
            'filter'] is None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=search_form.cleaned_data['page_size'],
                offset=(search_form.cleaned_data['page'] - 1) *
                search_form.cleaned_data['page_size'],
                target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException as e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' %
                                           e.message,
                                           resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg=
                'The similarity server could not be reached or some unexpected error occurred.',
                resource=resource)

    elif not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:

        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(
                unquote(search_form.cleaned_data['query'] or ""),
                unquote(search_form.cleaned_data['filter'] or ""),
                search_form.cleaned_data['sort'],
                search_form.cleaned_data['page'],
                search_form.cleaned_data['page_size'],
                grouping=search_form.cleaned_data['group_by_pack'],
                include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([(int(element['id']), [
                    element['more_from_pack'], element['pack_id'],
                    element['pack_name']
                ]) for element in result.docs])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException as e:
            if search_form.cleaned_data['filter'] is not None:
                raise BadRequestException(
                    msg=
                    'Search server error: %s (please check that your filter syntax and field '
                    'names are correct)' % e.message,
                    resource=resource)
            raise BadRequestException(msg='Search server error: %s' %
                                      e.message,
                                      resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg=
                'The search server could not be reached or some unexpected error occurred.',
                resource=resource)

    else:
        # Combined search (there is at least one of query/filter and one of descriptors_filter/target)
        # Strategies are implemented in 'combined_search_strategies'
        strategy = getattr(combined_search_strategies, merging_strategy)
        return strategy(search_form,
                        target_file=target_file,
                        extra_parameters=extra_parameters)
Exemplo n.º 25
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )
    
    solr = Solr(settings.SOLR_URL) 
        
    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
       
        # clickusage tracking           
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Exemplo n.º 26
0
def get_stream_sounds(user, time_lapse):

    solr = Solr(settings.SOLR_URL)

    sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB)

    #
    # USERS FOLLOWING
    #

    users_following = get_users_following(user)

    users_sounds = []
    for user_following in users_following:

        filter_str = "username:"******" created:" + time_lapse

        query = search_prepare_query(
            "",
            filter_str,
            sort_str,
            1,
            SOLR_QUERY_LIMIT_PARAM,
            grouping=False,
            include_facets=False
        )

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [urllib.quote(filter_str), urllib.quote(sort_str[0])]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids).select_related('license', 'user')
            new_count = more_count + len(sound_ids)
            users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count))

    #
    # TAGS FOLLOWING
    #

    tags_following = get_tags_following(user)

    tags_sounds = []
    for tag_following in tags_following:

        tags = tag_following.split(" ")
        tag_filter_query = ""
        for tag in tags:
            tag_filter_query += "tag:" + tag + " "

        tag_filter_str = tag_filter_query + " created:" + time_lapse

        query = search_prepare_query(
            "",
            tag_filter_str,
            sort_str,
            1,
            SOLR_QUERY_LIMIT_PARAM,
            grouping=False,
            include_facets=False
        )

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [urllib.quote(tag_filter_str), urllib.quote(sort_str[0])]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            tags_sounds.append((tags, sound_objs, more_url_params, more_count, new_count))

    return users_sounds, tags_sounds
Exemplo n.º 27
0
def get_stream_sounds(user, time_lapse):

    solr = Solr(settings.SOLR_URL)

    sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB)

    #
    # USERS FOLLOWING
    #

    users_following = get_users_following(user)

    users_sounds = []
    for user_following in users_following:

        filter_str = "username:"******" created:" + time_lapse

        query = search_prepare_query("",
                                     filter_str,
                                     sort_str,
                                     1,
                                     SOLR_QUERY_LIMIT_PARAM,
                                     grouping=False,
                                     include_facets=False)

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [
                urllib.quote(filter_str),
                urllib.quote(sort_str[0])
            ]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            users_sounds.append(((user_following, False), sound_objs,
                                 more_url_params, more_count, new_count))

    #
    # TAGS FOLLOWING
    #

    tags_following = get_tags_following(user)

    tags_sounds = []
    for tag_following in tags_following:

        tags = tag_following.split(" ")
        tag_filter_query = ""
        for tag in tags:
            tag_filter_query += "tag:" + tag + " "

        tag_filter_str = tag_filter_query + " created:" + time_lapse

        query = search_prepare_query("",
                                     tag_filter_str,
                                     sort_str,
                                     1,
                                     SOLR_QUERY_LIMIT_PARAM,
                                     grouping=False,
                                     include_facets=False)

        result = SolrResponseInterpreter(solr.select(unicode(query)))

        if result.num_rows != 0:

            more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM)

            # the sorting only works if done like this!
            more_url_params = [
                urllib.quote(tag_filter_str),
                urllib.quote(sort_str[0])
            ]

            # this is the same link but for the email has to be "quoted"
            more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0]
            # more_url_quoted = urllib.quote(more_url)

            sound_ids = [element['id'] for element in result.docs]
            sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids)
            new_count = more_count + len(sound_ids)
            tags_sounds.append(
                (tags, sound_objs, more_url_params, more_count, new_count))

    return users_sounds, tags_sounds
Exemplo n.º 28
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("forum", "").strip()    # for context sensitive search
    if current_forum_name_slug:
        current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug)
    else:
        current_forum = None
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    try:
        df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d")
        date_from_display = df_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_from = ""
        date_from_display = "Choose a Date"
    date_to = request.GET.get("dt_to", "")
    try:
        dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d")
        date_to_display = dt_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_to = ""
        date_to_display = "Choose a Date"

    if search_query.startswith("search in"):
        search_query = ""

    error = False
    error_text = ""
    paginator = None
    num_results = None
    page = None
    results = []
    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum:
            filter_query += "forum_name_slug:" + current_forum.name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException as e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception as e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'


    tvars = {
        'advanced_search': advanced_search,
        'current_forum': current_forum,
        'current_page': current_page,
        'date_from': date_from,
        'date_from_display': date_from_display,
        'date_to': date_to,
        'date_to_display': date_to_display,
        'error': error,
        'error_text': error_text,
        'filter_query': filter_query,
        'num_results': num_results,
        'page': page,
        'paginator': paginator,
        'search_query': search_query,
        'sort': sort,
        'results': results,
    }

    return render(request, 'search/search_forum.html', tvars)
Exemplo n.º 29
0
def check_if_sound_exists_in_solr(sound):
    solr = Solr(settings.SOLR_URL)
    response = SolrResponseInterpreter(
        solr.select(unicode(search_prepare_query(
            '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1))))
    return response.num_found > 0
Exemplo n.º 30
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    logger.info(u'Search (%s)' % json.dumps({
        'ip': get_client_ip(request),
        'query': search_query,
        'filter': filter_query,
        'username': request.user.username,
        'page': current_page,
        'sort': sort[0],
        'group_by_pack' : actual_groupnig,
        'advanced': json.dumps({
            'search_in_tag': a_tag,
            'search_in_filename': a_filename,
            'search_in_description': a_description,
            'search_in_packname': a_packname,
            'search_in_soundid': a_soundid,
            'search_in_username': a_username
        }) if advanced == "1" else ""
    }))

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        # allsounds will contain info from all the sounds returned by bulk_query_id. This should
        # be all sounds in docs, but if solr and db are not synchronised, it might happen that there
        # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements
        # in docs that have not been loaded in allsounds.
        docs = [doc for doc in docs if doc["id"] in allsounds]
        for d in docs:
            d["sound"] = allsounds[d["id"]]

        # clickusage tracking
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Exemplo n.º 31
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip()    # for context sensitive search
    current_forum_name = request.GET.get("current_forum_name", "").strip()              # used in breadcrumb
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    date_to = request.GET.get("dt_to", "")

    # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search...
    # It only happens in some situations.
    if "search in " in search_query:
        invalid = 1

    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum_name_slug.strip() != "":
            filter_query += "forum_name_slug:" + current_forum_name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException, e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception, e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'
Exemplo n.º 32
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+')

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB


    grouping = request.GET.get("g", "1") # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1" :
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" :

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "" :
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "" :
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "" :
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
            if a_username != "" :
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "" :
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
            if a_filename != "" :
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping = actual_groupnig
                                 )

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

        # clickusage tracking
        if settings.LOG_CLICKTHROUGH_DATA:
            request_full_path = request.get_full_path()
            # The session id of an unauthenticated user is different from the session id of the same user when
            # authenticated.
            request.session["searchtime_session_key"] = request.session.session_key
            if results.docs is not None:
                ids = []
                for item in results.docs:
                    ids.append(item["id"])
            logger_click.info("QUERY : %s : %s : %s : %s" %
                                (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8')))

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Exemplo n.º 33
0
        count = int(count)

        results_before += count

        # clean the only few things DisMax doesn't like... :)
        search = search.strip("+-").replace("--", "").replace("+-", "").replace("-+", "").replace("++", "")
        if search == "\"" or search == "\"\"":
            search = ""

        query = SolrQuery()
        query.set_dismax_query(search, query_fields=[("id", 4), ("tag",3), ("description",3), ("username",2), ("pack_original",2), ("filename",2), "comment"])
        query.set_query_options(start=0, rows=10, field_list=["id"])
        query.add_facet_fields("samplerate", "pack_original", "username", "tag", "bitrate", "bitdepth")
        query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=True)
        query.set_facet_options("tag", limit=30)
        query.set_facet_options("username", limit=30)
        
        response = solr.select(unicode(query))
        interpreted = SolrResponseInterpreter(response)

        num_queries_total += 1
        num_queries_this_loop += 1
        
        time_solr += interpreted.q_time
        results_solr += interpreted.num_found

    except KeyboardInterrupt:
        break
    except UnicodeDecodeError:
        pass
Exemplo n.º 34
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []

    multiple_tags = sorted(filter(lambda x: x, multiple_tags))

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(group_func=None,
                            group_query=None,
                            group_rows=10,
                            group_start=0,
                            group_limit=1,
                            group_offset=0,
                            group_sort=None,
                            group_sort_ingroup=None,
                            group_format='grouped',
                            group_main=False,
                            group_num_groups=True,
                            group_cache_percent=0,
                            group_truncate=True)  # Sets how many results from the same group are taken into account for computing the facets

    page = None
    num_results = 0
    tags = []
    error = False
    docs = {}
    non_grouped_number_of_results = 0
    paginator = None
    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException as e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
    except:
        error = True

    slash_tag = "/".join(multiple_tags)

    follow_tags_url = ''
    unfollow_tags_url = ''
    show_unfollow_button = False
    if slash_tag:
        follow_tags_url = reverse('follow-tags', args=[slash_tag])
        unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag])
        show_unfollow_button = False

        if request.user.is_authenticated:
            show_unfollow_button = follow_utils.is_user_following_tag(request.user, slash_tag)

    tvars = {'show_unfollow_button': show_unfollow_button,
             'multiple_tags': multiple_tags,
             'follow_tags_url': follow_tags_url,
             'unfollow_tags_url': unfollow_tags_url,
             'error': error,
             'tags': tags,
             'slash_tag': slash_tag,
             'num_results': num_results,
             'non_grouped_number_of_results': non_grouped_number_of_results,
             'docs': docs,
             'paginator': paginator,
             'page': page,
             'current_page': current_page
             }
    return render(request, 'sounds/tags.html', tvars)
Exemplo n.º 35
0
            raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)


    elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file:
        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException, e:
            if search_form.cleaned_data['filter'] != None:
                raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field names are correct)' % e.message, resource=resource)
            raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource)
        except Exception, e:
Exemplo n.º 36
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip()    # for context sensitive search
    current_forum_name = request.GET.get("current_forum_name", "").strip()              # used in breadcrumb
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    date_to = request.GET.get("dt_to", "")

    # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search...
    # It only happens in some situations.
    if "search in " in search_query:
        invalid = 1

    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum_name_slug.strip() != "":
            filter_query += "forum_name_slug:" + current_forum_name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query, query_fields=[("thread_title", 4),
                                                           ("post_body", 3),
                                                           ("thread_author", 3),
                                                           ("post_author", 3),
                                                           ("forum_name", 2)])
        query.set_highlighting_options_default(field_list=["post_body"],
                                               fragment_size=200,
                                               alternate_field="post_body",  # TODO: revise this param
                                               require_field_match=False,
                                               pre="<strong>",
                                               post="</strong>")
        query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
                                rows=settings.SOUNDS_PER_PAGE,
                                field_list=["id",
                                            "forum_name",
                                            "forum_name_slug",
                                            "thread_id",
                                            "thread_title",
                                            "thread_author",
                                            "thread_created",
                                            "post_body",
                                            "post_author",
                                            "post_created",
                                            "num_posts"],
                                filter_query=filter_query,
                                sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException, e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception, e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'
Exemplo n.º 37
0
    def read(self, request):
        ip = get_client_ip(request)
        form = SoundSearchForm(SEARCH_SORT_OPTIONS_API, request.GET)
        if not form.is_valid():
            resp = rc.BAD_REQUEST
            resp.content = form.errors
            return resp

        cd = form.cleaned_data
        grouping = request.GET.get("g", "")
        if grouping == "0":
            grouping = ""

        solr = Solr(settings.SOLR_URL)
        sounds_per_page = min(int(request.GET.get('sounds_per_page', settings.SOUNDS_PER_API_RESPONSE)),settings.MAX_SOUNDS_PER_API_RESPONSE)
        query = search_prepare_query(cd['q'],
                                     cd['f'],
                                     search_prepare_sort(cd['s'], SEARCH_SORT_OPTIONS_API),
                                     cd['p'],
                                     sounds_per_page,
                                     grouping = grouping)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(results,sounds_per_page)
            page = paginator.page(form.cleaned_data['p'])
            sounds = []
            bad_results = 0
            for object in page['object_list'] :
                try:
                    sound = prepare_collection_sound(Sound.objects.select_related('user').get(id=object['id']), custom_fields = request.GET.get('fields', False))
                    if 'more_from_pack' in object.keys():
                        if object['more_from_pack'] > 0:
                            link = prepend_base(reverse('api-search')+'?q=%s&f=pack:"%s" %s&s=%s&g=%s' % (my_quote(cd['q']),object['pack_name'],my_quote(cd['f']),cd['s'],""))
                            if request.GET.get('sounds_per_page', None):
                                link += "&sounds_per_page=" +  str(request.GET.get('sounds_per_page', None))
                            if request.GET.get('fields', False):
                                link += "&fields=" + str(request.GET.get('fields', False))
                            sound['results_from_the_same_pack'] = link
                            sound['n_results_from_the_same_pack'] = object['more_from_pack']
                    sounds.append(sound)
                except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate
                    pass
            result = {'sounds': sounds, 'num_results': paginator.count - bad_results, 'num_pages': paginator.num_pages}

            # construct previous and next urls
            if page['has_other_pages']:
                if page['has_previous']:
                    result['previous'] = self.__construct_pagination_link(cd['q'],
                                                                          page['previous_page_number'],
                                                                          cd['f'],
                                                                          find_api_option(cd['s']),
                                                                          request.GET.get('sounds_per_page', None),
                                                                          request.GET.get('fields', False),
                                                                          grouping)
                if page['has_next']:
                    result['next'] = self.__construct_pagination_link(cd['q'],
                                                                      page['next_page_number'],
                                                                      cd['f'],
                                                                      find_api_option(cd['s']),
                                                                      request.GET.get('sounds_per_page',None),
                                                                      request.GET.get('fields', False),
                                                                      grouping)
            add_request_id(request,result)
            logger.info("Searching,q=" + cd['q'] + ",f=" + cd['f'] + ",p=" + str(cd['p']) + ",sounds_per_page=" + str(sounds_per_page) + ",api_key=" + request.GET.get("api_key", False) + ",api_key_username="******",ip=" + ip)
            return result

        except SolrException, e:
            error = "search_query %s filter_query %s sort %s error %s" \
                        % (cd['s'], cd['f'], cd['s'], e)
            raise ReturnError(500, "SearchError", {"explanation": error})
Exemplo n.º 38
0
def search(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    filter_query_link_more_when_grouping_packs = filter_query.replace(' ', '+')

    # Generate array with information of filters
    filter_query_split = []
    if filter_query != "":
        for filter_str in re.findall(r'[\w-]+:\"[^\"]+', filter_query):
            filter_str = filter_str + '"'
            filter_display = filter_str.replace('"', '')
            filter_name = filter_str.split(":")[0]
            if filter_name != "duration" and filter_name != "is_geotagged":
                if filter_name == "grouping_pack":
                    val = filter_display.split(":")[1]
                    filter_display = "pack:" + val.split("_")[1]

                filter = {
                    'name': filter_display,
                    'remove_url': filter_query.replace(filter_str, ''),
                }
                filter_query_split.append(filter)

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    sort = request.GET.get("s", None)
    sort_options = forms.SEARCH_SORT_OPTIONS_WEB

    grouping = request.GET.get("g", "1")  # Group by default
    actual_groupnig = grouping
    # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense)
    # If the query is thourhg ajax (for sources remix editing), do not collapse
    if "pack" in filter_query or request.GET.get("ajax", "") == "1":
        actual_groupnig = ""

    # Set default values
    id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
    tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
    description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description']
    username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
    pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized']
    original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[
        'original_filename']

    # Parse advanced search options
    advanced = request.GET.get("advanced", "")

    # if advanced search
    if advanced == "1":
        a_tag = request.GET.get("a_tag", "")
        a_filename = request.GET.get("a_filename", "")
        a_description = request.GET.get("a_description", "")
        a_packname = request.GET.get("a_packname", "")
        a_soundid = request.GET.get("a_soundid", "")
        a_username = request.GET.get("a_username", "")

        # If none is selected use all (so other filter can be appleid)
        if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "":

            # Initialize all weights to 0
            id_weight = 0
            tag_weight = 0
            description_weight = 0
            username_weight = 0
            pack_tokenized_weight = 0
            original_filename_weight = 0

            # Set the weights of selected checkboxes
            if a_soundid != "":
                id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id']
            if a_tag != "":
                tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag']
            if a_description != "":
                description_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'description']
            if a_username != "":
                username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username']
            if a_packname != "":
                pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'pack_tokenized']
            if a_filename != "":
                original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[
                    'original_filename']

    # ALLOW "q" empty queries
    #if search_query.strip() == ""

    sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB)

    logger.info(u'Search (%s)' % json.dumps({
        'ip':
        get_client_ip(request),
        'query':
        search_query,
        'filter':
        filter_query,
        'username':
        request.user.username,
        'page':
        current_page,
        'sort':
        sort[0],
        'group_by_pack':
        actual_groupnig,
        'advanced':
        json.dumps({
            'search_in_tag': a_tag,
            'search_in_filename': a_filename,
            'search_in_description': a_description,
            'search_in_packname': a_packname,
            'search_in_soundid': a_soundid,
            'search_in_username': a_username
        }) if advanced == "1" else ""
    }))

    query = search_prepare_query(search_query,
                                 filter_query,
                                 sort,
                                 current_page,
                                 settings.SOUNDS_PER_PAGE,
                                 id_weight,
                                 tag_weight,
                                 description_weight,
                                 username_weight,
                                 pack_tokenized_weight,
                                 original_filename_weight,
                                 grouping=actual_groupnig)

    solr = Solr(settings.SOLR_URL)

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results,
                                                     settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        # allsounds will contain info from all the sounds returned by bulk_query_id. This should
        # be all sounds in docs, but if solr and db are not synchronised, it might happen that there
        # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements
        # in docs that have not been loaded in allsounds.
        docs = [doc for doc in docs if doc["id"] in allsounds]
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException, e:
        logger.warning("search error: query: %s error %s" % (query, e))
        error = True
        error_text = 'There was an error while searching, is your query correct?'
Exemplo n.º 39
0
def search_forum(request):
    search_query = request.GET.get("q", "")
    filter_query = request.GET.get("f", "")
    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1
    current_forum_name_slug = request.GET.get(
        "forum", "").strip()  # for context sensitive search
    if current_forum_name_slug:
        current_forum = get_object_or_404(forum.models.Forum.objects,
                                          name_slug=current_forum_name_slug)
    else:
        current_forum = None
    sort = ["thread_created desc"]

    # Parse advanced search options
    advanced_search = request.GET.get("advanced_search", "")
    date_from = request.GET.get("dt_from", "")
    try:
        df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d")
        date_from_display = df_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_from = ""
        date_from_display = "Choose a Date"
    date_to = request.GET.get("dt_to", "")
    try:
        dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d")
        date_to_display = dt_parsed.strftime("%d-%m-%Y")
    except ValueError:
        date_to = ""
        date_to_display = "Choose a Date"

    if search_query.startswith("search in"):
        search_query = ""

    error = False
    error_text = ""
    paginator = None
    num_results = None
    page = None
    results = []
    if search_query.strip() != "" or filter_query:
        # add current forum
        if current_forum:
            filter_query += "forum_name_slug:" + current_forum.name_slug

        # add date range
        if advanced_search == "1" and date_from != "" or date_to != "":
            filter_query = __add_date_range(filter_query, date_from, date_to)

        query = SolrQuery()
        query.set_dismax_query(search_query,
                               query_fields=[("thread_title", 4),
                                             ("post_body", 3),
                                             ("thread_author", 3),
                                             ("post_author", 3),
                                             ("forum_name", 2)])
        query.set_highlighting_options_default(
            field_list=["post_body"],
            fragment_size=200,
            alternate_field="post_body",  # TODO: revise this param
            require_field_match=False,
            pre="<strong>",
            post="</strong>")
        query.set_query_options(
            start=(current_page - 1) * settings.SOUNDS_PER_PAGE,
            rows=settings.SOUNDS_PER_PAGE,
            field_list=[
                "id", "forum_name", "forum_name_slug", "thread_id",
                "thread_title", "thread_author", "thread_created", "post_body",
                "post_author", "post_created", "num_posts"
            ],
            filter_query=filter_query,
            sort=sort)

        query.set_group_field("thread_title_grouped")
        query.set_group_options(group_limit=30)

        solr = Solr(settings.SOLR_FORUM_URL)

        try:
            results = SolrResponseInterpreter(solr.select(unicode(query)))
            paginator = SolrResponseInterpreterPaginator(
                results, settings.SOUNDS_PER_PAGE)
            num_results = paginator.count
            page = paginator.page(current_page)
            error = False
        except SolrException as e:
            logger.warning("search error: query: %s error %s" % (query, e))
            error = True
            error_text = 'There was an error while searching, is your query correct?'
        except Exception as e:
            logger.error("Could probably not connect to Solr - %s" % e)
            error = True
            error_text = 'The search server could not be reached, please try again later.'

    tvars = {
        'advanced_search': advanced_search,
        'current_forum': current_forum,
        'current_page': current_page,
        'date_from': date_from,
        'date_from_display': date_from_display,
        'date_to': date_to,
        'date_to_display': date_to_display,
        'error': error,
        'error_text': error_text,
        'filter_query': filter_query,
        'num_results': num_results,
        'page': page,
        'paginator': paginator,
        'search_query': search_query,
        'sort': sort,
        'results': results,
    }

    return render(request, 'search/search_forum.html', tvars)
Exemplo n.º 40
0
def tags(request, multiple_tags=None):
    if multiple_tags:
        multiple_tags = multiple_tags.split('/')
    else:
        multiple_tags = []

    multiple_tags = sorted(filter(lambda x: x, multiple_tags))

    try:
        current_page = int(request.GET.get("page", 1))
    except ValueError:
        current_page = 1

    solr = Solr(settings.SOLR_URL)
    query = SolrQuery()
    if multiple_tags:
        query.set_query(" ".join("tag:\"" + tag + "\""
                                 for tag in multiple_tags))
    else:
        query.set_query("*:*")
    query.set_query_options(start=(current_page - 1) *
                            settings.SOUNDS_PER_PAGE,
                            rows=settings.SOUNDS_PER_PAGE,
                            field_list=["id"],
                            sort=["num_downloads desc"])
    query.add_facet_fields("tag")
    query.set_facet_options_default(limit=100,
                                    sort=True,
                                    mincount=1,
                                    count_missing=False)
    query.set_group_field(group_field="grouping_pack")
    query.set_group_options(
        group_func=None,
        group_query=None,
        group_rows=10,
        group_start=0,
        group_limit=1,
        group_offset=0,
        group_sort=None,
        group_sort_ingroup=None,
        group_format='grouped',
        group_main=False,
        group_num_groups=True,
        group_cache_percent=0,
        group_truncate=True
    )  # Sets how many results from the same grup are taken into account for computing the facets

    try:
        results = SolrResponseInterpreter(solr.select(unicode(query)))
        paginator = SolrResponseInterpreterPaginator(results,
                                                     settings.SOUNDS_PER_PAGE)
        num_results = paginator.count
        non_grouped_number_of_results = results.non_grouped_number_of_matches
        page = paginator.page(current_page)
        error = False
        tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]]

        docs = results.docs
        resultids = [d.get("id") for d in docs]
        resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids)
        allsounds = {}
        for s in resultsounds:
            allsounds[s.id] = s
        for d in docs:
            d["sound"] = allsounds[d["id"]]

    except SolrException as e:
        error = True
        search_logger.error("SOLR ERROR - %s" % e)
    except:
        error = True

    slash_tag = "/".join(multiple_tags)
    space_tag = " ".join(multiple_tags)

    if slash_tag:
        follow_tags_url = reverse('follow-tags', args=[slash_tag])
        unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag])
        show_unfollow_button = False
        if request.user.is_authenticated:
            show_unfollow_button = follow_utils.is_user_following_tag(
                request.user, slash_tag)

    return render(request, 'sounds/tags.html', locals())
Exemplo n.º 41
0
def api_search(
        search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None):

    if search_form.cleaned_data['query']  is None \
            and search_form.cleaned_data['filter'] is None \
            and not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:
        # No input data for search, return empty results
        return [], 0, None, None, None, None, None

    if search_form.cleaned_data['query'] is None and search_form.cleaned_data['filter'] is None:
        # Standard content-based search
        try:
            results, count, note = similarity_api_search(
                target=search_form.cleaned_data['target'],
                filter=search_form.cleaned_data['descriptors_filter'],
                num_results=search_form.cleaned_data['page_size'],
                offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'],
                target_file=target_file)

            gaia_ids = [result[0] for result in results]
            distance_to_target_data = None
            if search_form.cleaned_data['target'] or target_file:
                # Save sound distance to target into view class so it can be accessed by the serializer
                # We only do that when a target is specified (otherwise there is no meaningful distance value)
                distance_to_target_data = dict(results)

            gaia_count = count
            return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None
        except SimilarityException as e:
            if e.status_code == 500:
                raise ServerErrorException(msg=e.message, resource=resource)
            elif e.status_code == 400:
                raise BadRequestException(msg=e.message, resource=resource)
            elif e.status_code == 404:
                raise NotFoundException(msg=e.message, resource=resource)
            else:
                raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource)

    elif not search_form.cleaned_data['descriptors_filter'] \
            and not search_form.cleaned_data['target'] \
            and not target_file:

        # Standard text-based search
        try:
            solr = Solr(settings.SOLR_URL)
            query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""),
                                         unquote(search_form.cleaned_data['filter'] or ""),
                                         search_form.cleaned_data['sort'],
                                         search_form.cleaned_data['page'],
                                         search_form.cleaned_data['page_size'],
                                         grouping=search_form.cleaned_data['group_by_pack'],
                                         include_facets=False)

            result = SolrResponseInterpreter(solr.select(unicode(query)))
            solr_ids = [element['id'] for element in result.docs]
            solr_count = result.num_found

            more_from_pack_data = None
            if search_form.cleaned_data['group_by_pack']:
                # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
                more_from_pack_data = dict([
                    (int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']])
                    for element in result.docs
                ])

            return solr_ids, solr_count, None, more_from_pack_data, None, None, None

        except SolrException as e:
            if search_form.cleaned_data['filter'] is not None:
                raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field '
                                              'names are correct)' % e.message, resource=resource)
            raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource)
        except Exception as e:
            raise ServerErrorException(
                msg='The search server could not be reached or some unexpected error occurred.', resource=resource)

    else:
        # Combined search (there is at least one of query/filter and one of descriptors_filter/target)
        # Strategies are implemented in 'combined_search_strategies'
        strategy = getattr(combined_search_strategies, merging_strategy)
        return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)