def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']] else: return DelayedQueryExecuter(""" SELECT tags_tag.name AS name, X.c AS count FROM ( SELECT tag_id, count(*) as c FROM tags_taggeditem LEFT JOIN sounds_sound ON object_id=sounds_sound.id WHERE tags_taggeditem.user_id=%d AND sounds_sound.moderation_state='OK' AND sounds_sound.processing_state='OK' GROUP BY tag_id ORDER BY c DESC LIMIT 10) AS X LEFT JOIN tags_tag ON tags_tag.id=X.tag_id ORDER BY tags_tag.name;""" % self.user_id)
def search_prepare_query(search_query, filter_query, sort, current_page, sounds_per_page, id_weight = DEFAULT_SEARCH_WEIGHTS['id'], tag_weight = DEFAULT_SEARCH_WEIGHTS['tag'], description_weight = DEFAULT_SEARCH_WEIGHTS['description'], username_weight = DEFAULT_SEARCH_WEIGHTS['username'], pack_tokenized_weight = DEFAULT_SEARCH_WEIGHTS['pack_tokenized'], original_filename_weight = DEFAULT_SEARCH_WEIGHTS['original_filename'], grouping = False, include_facets = True, grouping_pack_limit = 1): query = SolrQuery() field_weights = [] if id_weight != 0 : field_weights.append(("id", id_weight)) if tag_weight != 0 : field_weights.append(("tag", tag_weight)) if description_weight != 0 : field_weights.append(("description", description_weight)) if username_weight != 0 : field_weights.append(("username", username_weight)) if pack_tokenized_weight != 0 : field_weights.append(("pack_tokenized", pack_tokenized_weight)) if original_filename_weight != 0 : field_weights.append(("original_filename", original_filename_weight)) query.set_dismax_query(search_query, query_fields=field_weights,) query.set_query_options(start=(current_page - 1) * sounds_per_page, rows=sounds_per_page, field_list=["id"], filter_query=filter_query, sort=sort) if include_facets: query.add_facet_fields("samplerate", "grouping_pack", "username", "tag", "bitrate", "bitdepth", "type", "channels", "license") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=False) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) query.set_facet_options("grouping_pack", limit=10) query.set_facet_options("license", limit=10) if grouping: query.set_group_field(group_field="grouping_pack") query.set_group_options(group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=grouping_pack_limit, # This is the number of documents that will be returned for each group. By default only 1 is returned. group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0) return query
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options(group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True) # Sets how many results from the same grup are taken into account for computing the facets try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException, e: error = True search_logger.error("SOLR ERROR - %s" % e)
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_sound_ids = [] solr = Solr(url=settings.SOLR_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL sounds print "Retrieving ids from %i to %i" % (0, SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT, total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents, SLICE_SIZE): print "Retrieving ids from %i to %i" % (i, i + SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) solr_sound_ids = sorted(list(set(solr_sound_ids))) if LIMIT: solr_sound_ids = solr_sound_ids[0:LIMIT] print "%i document ids retrieved" % len(solr_sound_ids) n_deleted = 0 print "" for count, id in enumerate(solr_sound_ids): sys.stdout.write("\rChecking doc %i of %i" % (count, len(solr_sound_ids))) sys.stdout.flush() if Sound.objects.filter(id=id, moderation_state="OK", processing_state="OK").exists(): pass else: # Sound does not exist in the Db or is not properly moderated and processed print "\n\t - Deleting sound with id %i from solr index" % id solr.delete_by_id(id) n_deleted += 1 print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)" % n_deleted
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) try: solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(query))) except (SolrException, Exception) as e: # TODO: do something here? return False return results.facets
def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: return False except Exception, e: return False
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_post_ids = [] solr = Solr(url=settings.SOLR_FORUM_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL forums console_logger.info("Retrieving ids from %i to %i"%(0,SLICE_SIZE)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT,total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents,SLICE_SIZE): console_logger.info("Retrieving ids from %i to %i"%(i,i+SLICE_SIZE-1)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) solr_post_ids = sorted(list(set(solr_post_ids))) if LIMIT: solr_post_ids = solr_post_ids[0:LIMIT] console_logger.info("%i document ids retrieved"%len(solr_post_ids)) n_deleted = 0 console_logger.info("") for count, id in enumerate(solr_post_ids): if count % 100 == 0: console_logger.info("\rChecking docs %i/%i"%(count,len(solr_post_ids))) if Post.objects.filter(id=id,moderation_state="OK").exists(): pass else: # Post does not exist in the Db or is not properly moderated and processed console_logger.info("\n\t - Deleting forum with id %i from solr index" % id) solr.delete_by_id(id) n_deleted += 1 console_logger.info("\n\nDONE! %i forums deleted from solr index (it may take some minutes to actually see " "the changes in the page)" % n_deleted)
def get_user_tags(self): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]
def items(self, obj): if obj['query'] != "": try: solr = Solr(settings.SOLR_URL) query = SolrQuery() fields=[('id',4), ('tag', 3), ('description', 3), ('username', 2), ('pack_tokenized', 2), ('original_filename', 2),] if obj['type'] == "phrase": query.set_dismax_query('"' + obj['query'] + '"',query_fields=fields) # EXACT (not 100%) elif obj['type'] == "any": query.set_dismax_query(obj['query'],query_fields=[],minimum_match=0) # OR else: query.set_dismax_query(obj['query'],query_fields=[],minimum_match="100%") # AND lim = obj['limit'] if lim > 100: lim = 100 query.set_query_options(start=obj['offset'], rows=lim, filter_query="", sort=['created desc']) try: results = SolrResponseInterpreter(solr.select(unicode(query))) sounds = [] for object in results.docs : try: sounds.append(object) except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate pass logger.info("Sound pool search RSS") return sounds except SolrException, e: return [] except: return [] else: return []
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) #filter_query = 'pack:\"%s\"' % (pack_obj.name,) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: #logger.warning("search error: query: %s error %s" % (query, e)) #error = True #error_text = 'There was an error while searching, is your query correct?' return False
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_sound_ids = [] solr = Solr(url=settings.SOLR_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL sounds print "Retrieving ids from %i to %i"%(0,SLICE_SIZE) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT,total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE,number_of_documents,SLICE_SIZE): print "Retrieving ids from %i to %i"%(i,i+SLICE_SIZE) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) solr_sound_ids = sorted(list(set(solr_sound_ids))) if LIMIT: solr_sound_ids = solr_sound_ids[0:LIMIT] print "%i document ids retrieved"%len(solr_sound_ids) n_deleted = 0 print "" for count,id in enumerate(solr_sound_ids): sys.stdout.write("\rChecking doc %i of %i"%(count,len(solr_sound_ids))) sys.stdout.flush() if Sound.objects.filter(id=id,moderation_state="OK",processing_state="OK").exists(): pass else: # Sound does not exist in the Db or is not properly moderated and processed print "\n\t - Deleting sound with id %i from solr index"%id solr.delete_by_id(id) n_deleted += 1 print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)"%n_deleted
def get_random_sound_from_solr(): """ Get a random sound from solr. This is used for random sound browsing. We filter explicit sounds, but otherwise don't have any other restrictions on sound attributes """ solr = Solr(settings.SOLR_URL) query = SolrQuery() rand_key = random.randint(1, 10000000) sort = ['random_%d asc' % rand_key] filter_query = 'is_explicit:0' query.set_query("*:*") query.set_query_options(start=0, rows=1, field_list=["*"], filter_query=filter_query, sort=sort) try: response = SolrResponseInterpreter(solr.select(unicode(query))) docs = response.docs if docs: return docs[0] except (SolrException, socket.error): pass return {}
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x:x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] except SolrException, e: error = True search_logger.error("SOLR ERROR - %s" % e)
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True ) # Sets how many results from the same grup are taken into account for computing the facets try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.error("SOLR ERROR - %s" % e) except: error = True slash_tag = "/".join(multiple_tags) space_tag = " ".join(multiple_tags) if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag( request.user, slash_tag) return render(request, 'sounds/tags.html', locals())
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True ) # Sets how many results from the same group are taken into account for computing the facets page = None tags = [] error = False docs = {} non_grouped_number_of_results = 0 paginator = None try: non_grouped_number_of_results, facets, paginator, page, docs = perform_solr_query( query, current_page) tags = [dict(name=f[0], count=f[1]) for f in facets["tag"]] resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.warning('Search error: query: %s error %s' % (query, e)) except Exception as e: error = True search_logger.error('Could probably not connect to Solr - %s' % e) slash_tag = "/".join(multiple_tags) follow_tags_url = '' unfollow_tags_url = '' show_unfollow_button = False if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag( request.user, slash_tag) tvars = { 'show_unfollow_button': show_unfollow_button, 'multiple_tags': multiple_tags, 'follow_tags_url': follow_tags_url, 'unfollow_tags_url': unfollow_tags_url, 'error': error, 'tags': tags, 'slash_tag': slash_tag, 'non_grouped_number_of_results': non_grouped_number_of_results, 'docs': docs, 'paginator': paginator, 'page': page, 'current_page': current_page } return render(request, 'sounds/tags.html', tvars)
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip() # for context sensitive search current_forum_name = request.GET.get("current_forum_name", "").strip() # used in breadcrumb sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") date_to = request.GET.get("dt_to", "") # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search... # It only happens in some situations. if "search in " in search_query: invalid = 1 if search_query.strip() != "" or filter_query: # add current forum if current_forum_name_slug.strip() != "": filter_query += "forum_name_slug:" + current_forum_name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default(field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts"], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception, e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.'
def search_prepare_query( search_query, filter_query, sort, current_page, sounds_per_page, id_weight=settings.DEFAULT_SEARCH_WEIGHTS['id'], tag_weight=settings.DEFAULT_SEARCH_WEIGHTS['tag'], description_weight=settings.DEFAULT_SEARCH_WEIGHTS['description'], username_weight=settings.DEFAULT_SEARCH_WEIGHTS['username'], pack_tokenized_weight=settings. DEFAULT_SEARCH_WEIGHTS['pack_tokenized'], original_filename_weight=settings. DEFAULT_SEARCH_WEIGHTS['original_filename'], grouping=False, include_facets=True, grouping_pack_limit=1, offset=None, in_ids=[]): query = SolrQuery() # Set field weights and scoring function field_weights = [] if id_weight != 0: field_weights.append(("id", id_weight)) if tag_weight != 0: field_weights.append(("tag", tag_weight)) if description_weight != 0: field_weights.append(("description", description_weight)) if username_weight != 0: field_weights.append(("username", username_weight)) if pack_tokenized_weight != 0: field_weights.append(("pack_tokenized", pack_tokenized_weight)) if original_filename_weight != 0: field_weights.append(("original_filename", original_filename_weight)) query.set_dismax_query( search_query, query_fields=field_weights, ) # Set start and rows parameters (offset and size) if not offset: start = (current_page - 1) * sounds_per_page else: start = offset # Process filter filter_query = search_process_filter(filter_query) # Process filter for clustering (maybe consider only applying this filter in this case...) if in_ids: filter_query = '' # for now we remove all the other filters if len(in_ids) == 1: filter_query += ' id:{}'.format(in_ids[0]) else: filter_query += ' id:' filter_query += ' OR id:'.join(in_ids) # Set all options query.set_query_options(start=start, rows=sounds_per_page, field_list=["id"], filter_query=filter_query, sort=sort) # Specify query factes if include_facets: query.add_facet_fields("samplerate", "grouping_pack", "username", "tag", "bitrate", "bitdepth", "type", "channels", "license") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=False) query.set_facet_options("type", limit=len( sounds.models.Sound.SOUND_TYPE_CHOICES)) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) query.set_facet_options("grouping_pack", limit=10) query.set_facet_options("license", limit=10) # Add groups if grouping: query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit= grouping_pack_limit, # This is the number of documents that will be returned for each group. By default only 1 is returned. group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0) return query
def search_prepare_query( search_query, filter_query, sort, current_page, sounds_per_page, id_weight=settings.DEFAULT_SEARCH_WEIGHTS['id'], tag_weight=settings.DEFAULT_SEARCH_WEIGHTS['tag'], description_weight=settings.DEFAULT_SEARCH_WEIGHTS['description'], username_weight=settings.DEFAULT_SEARCH_WEIGHTS['username'], pack_tokenized_weight=settings. DEFAULT_SEARCH_WEIGHTS['pack_tokenized'], original_filename_weight=settings. DEFAULT_SEARCH_WEIGHTS['original_filename'], grouping=False, include_facets=True, grouping_pack_limit=1, offset=None): query = SolrQuery() field_weights = [] if id_weight != 0: field_weights.append(("id", id_weight)) if tag_weight != 0: field_weights.append(("tag", tag_weight)) if description_weight != 0: field_weights.append(("description", description_weight)) if username_weight != 0: field_weights.append(("username", username_weight)) if pack_tokenized_weight != 0: field_weights.append(("pack_tokenized", pack_tokenized_weight)) if original_filename_weight != 0: field_weights.append(("original_filename", original_filename_weight)) query.set_dismax_query( search_query, query_fields=field_weights, ) if not offset: start = (current_page - 1) * sounds_per_page else: start = offset query.set_query_options(start=start, rows=sounds_per_page, field_list=["id"], filter_query=filter_query, sort=sort) if include_facets: query.add_facet_fields("samplerate", "grouping_pack", "username", "tag", "bitrate", "bitdepth", "type", "channels", "license") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=False) query.set_facet_options("type", limit=len( sounds.models.Sound.SOUND_TYPE_CHOICES)) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) query.set_facet_options("grouping_pack", limit=10) query.set_facet_options("license", limit=10) if grouping: query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit= grouping_pack_limit, # This is the number of documents that will be returned for each group. By default only 1 is returned. group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0) return query
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get("forum", "").strip() # for context sensitive search if current_forum_name_slug: current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug) else: current_forum = None sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") try: df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d") date_from_display = df_parsed.strftime("%d-%m-%Y") except ValueError: date_from = "" date_from_display = "Choose a Date" date_to = request.GET.get("dt_to", "") try: dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d") date_to_display = dt_parsed.strftime("%d-%m-%Y") except ValueError: date_to = "" date_to_display = "Choose a Date" if search_query.startswith("search in"): search_query = "" error = False error_text = "" paginator = None num_results = None page = None results = [] if search_query.strip() != "" or filter_query: # add current forum if current_forum: filter_query += "forum_name_slug:" + current_forum.name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default(field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts"], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException as e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception as e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.' tvars = { 'advanced_search': advanced_search, 'current_forum': current_forum, 'current_page': current_page, 'date_from': date_from, 'date_from_display': date_from_display, 'date_to': date_to, 'date_to_display': date_to_display, 'error': error, 'error_text': error_text, 'filter_query': filter_query, 'num_results': num_results, 'page': page, 'paginator': paginator, 'search_query': search_query, 'sort': sort, 'results': results, } return render(request, 'search/search_forum.html', tvars)
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get( "forum", "").strip() # for context sensitive search if current_forum_name_slug: current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug) else: current_forum = None sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") try: df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d") date_from_display = df_parsed.strftime("%d-%m-%Y") except ValueError: date_from = "" date_from_display = "Choose a Date" date_to = request.GET.get("dt_to", "") try: dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d") date_to_display = dt_parsed.strftime("%d-%m-%Y") except ValueError: date_to = "" date_to_display = "Choose a Date" if search_query.startswith("search in"): search_query = "" error = False error_text = "" paginator = None num_results = None page = None results = [] if search_query.strip() != "" or filter_query: # add current forum if current_forum: filter_query += "forum_name_slug:" + current_forum.name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default( field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options( start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=[ "id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts" ], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator( results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException as e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception as e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.' tvars = { 'advanced_search': advanced_search, 'current_forum': current_forum, 'current_page': current_page, 'date_from': date_from, 'date_from_display': date_from_display, 'date_to': date_to, 'date_to_display': date_to_display, 'error': error, 'error_text': error_text, 'filter_query': filter_query, 'num_results': num_results, 'page': page, 'paginator': paginator, 'search_query': search_query, 'sort': sort, 'results': results, } return render(request, 'search/search_forum.html', tvars)
def search_prepare_query(search_query, filter_query, sort, current_page, sounds_per_page, id_weight=settings.DEFAULT_SEARCH_WEIGHTS['id'], tag_weight=settings.DEFAULT_SEARCH_WEIGHTS['tag'], description_weight=settings.DEFAULT_SEARCH_WEIGHTS['description'], username_weight=settings.DEFAULT_SEARCH_WEIGHTS['username'], pack_tokenized_weight=settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'], original_filename_weight=settings.DEFAULT_SEARCH_WEIGHTS['original_filename'], grouping=False, include_facets=True, grouping_pack_limit=1, offset=None): query = SolrQuery() # Set field weights and scoring function field_weights = [] if id_weight != 0: field_weights.append(("id", id_weight)) if tag_weight != 0: field_weights.append(("tag", tag_weight)) if description_weight != 0: field_weights.append(("description", description_weight)) if username_weight != 0: field_weights.append(("username", username_weight)) if pack_tokenized_weight != 0: field_weights.append(("pack_tokenized", pack_tokenized_weight)) if original_filename_weight != 0: field_weights.append(("original_filename", original_filename_weight)) query.set_dismax_query(search_query, query_fields=field_weights,) # Set start and rows parameters (offset and size) if not offset: start = (current_page - 1) * sounds_per_page else: start = offset # Process filter filter_query = search_process_filter(filter_query) # Set all options query.set_query_options(start=start, rows=sounds_per_page, field_list=["id"], filter_query=filter_query, sort=sort) # Specify query factes if include_facets: query.add_facet_fields("samplerate", "grouping_pack", "username", "tag", "bitrate", "bitdepth", "type", "channels", "license") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=False) query.set_facet_options("type", limit=len(sounds.models.Sound.SOUND_TYPE_CHOICES)) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) query.set_facet_options("grouping_pack", limit=10) query.set_facet_options("license", limit=10) # Add groups if grouping: query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=grouping_pack_limit, # This is the number of documents that will be returned for each group. By default only 1 is returned. group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0) return query
(search, count) = line.strip().split("\t") except ValueError: continue count = int(count) results_before += count # clean the only few things DisMax doesn't like... :) search = search.strip("+-").replace("--", "").replace("+-", "").replace("-+", "").replace("++", "") if search == "\"" or search == "\"\"": search = "" query = SolrQuery() query.set_dismax_query(search, query_fields=[("id", 4), ("tag",3), ("description",3), ("username",2), ("pack_original",2), ("filename",2), "comment"]) query.set_query_options(start=0, rows=10, field_list=["id"]) query.add_facet_fields("samplerate", "pack_original", "username", "tag", "bitrate", "bitdepth") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=True) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) response = solr.select(unicode(query)) interpreted = SolrResponseInterpreter(response) num_queries_total += 1 num_queries_this_loop += 1 time_solr += interpreted.q_time results_solr += interpreted.num_found except KeyboardInterrupt:
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options(group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True) # Sets how many results from the same group are taken into account for computing the facets page = None num_results = 0 tags = [] error = False docs = {} non_grouped_number_of_results = 0 paginator = None try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.error("SOLR ERROR - %s" % e) except: error = True slash_tag = "/".join(multiple_tags) follow_tags_url = '' unfollow_tags_url = '' show_unfollow_button = False if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag(request.user, slash_tag) tvars = {'show_unfollow_button': show_unfollow_button, 'multiple_tags': multiple_tags, 'follow_tags_url': follow_tags_url, 'unfollow_tags_url': unfollow_tags_url, 'error': error, 'tags': tags, 'slash_tag': slash_tag, 'num_results': num_results, 'non_grouped_number_of_results': non_grouped_number_of_results, 'docs': docs, 'paginator': paginator, 'page': page, 'current_page': current_page } return render(request, 'sounds/tags.html', tvars)