def add_sounds_to_solr(sounds): solr = Solr(settings.SOLR_URL) console_logger.info("creating XML") documents = [convert_to_solr_document(s) for s in sounds] console_logger.info("adding %d sounds to solr index" % len(documents)) console_logger.info("posting to Solr") solr.add(documents)
def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit: response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query('', '', search_prepare_sort( 'created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False)))) solr_ids += [element['id'] for element in response.docs] solr_count = response.num_found current_page += 1 return sorted(solr_ids)
def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']] else: return DelayedQueryExecuter(""" SELECT tags_tag.name AS name, X.c AS count FROM ( SELECT tag_id, count(*) as c FROM tags_taggeditem LEFT JOIN sounds_sound ON object_id=sounds_sound.id WHERE tags_taggeditem.user_id=%d AND sounds_sound.moderation_state='OK' AND sounds_sound.processing_state='OK' GROUP BY tag_id ORDER BY c DESC LIMIT 10) AS X LEFT JOIN tags_tag ON tags_tag.id=X.tag_id ORDER BY tags_tag.name;""" % self.user_id)
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options(group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True) # Sets how many results from the same grup are taken into account for computing the facets try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException, e: error = True search_logger.error("SOLR ERROR - %s" % e)
def perform_solr_query(q, current_page): """ This util function performs the query to Solr and returns needed parameters to continue with the view. The main reason to have this util function is to facilitate mocking in unit tests for this view. """ solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(q))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
def check_if_sound_exists_in_solr(sound): solr = Solr(settings.SOLR_URL) response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query( '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1)))) return response.num_found > 0
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data['filter'] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')' if query_filter: query_filter += ' %s' % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query(unquote( search_form.cleaned_data['query'] or ""), unquote(query_filter or ""), search_form.cleaned_data['sort'], current_page, page_size, grouping=False, include_facets=False, offset=offset) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element['id'] for element in result.docs] solr_count = result.num_found #print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException as e: raise ServerErrorException(msg='Search server error: %s' % e.message) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.' ) return solr_ids, solr_count
def perform_solr_query(q, current_page): """ This util function performs the query to SOLR and returns needed parameters to continue with the view. The main reason to have this util function is to facilitate mocking in unit tests for this view. """ solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(q))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) try: solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(query))) except (SolrException, Exception) as e: # TODO: do something here? return False return results.facets
def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: return False except Exception, e: return False
def delete_sound_from_solr(sound_id): search_logger.info("deleting sound with id %d" % sound_id) try: Solr(settings.SOLR_URL).delete_by_id(sound_id) except (SolrException, socket.error) as e: search_logger.error('could not delete sound with id %s (%s).' % (sound_id, e))
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) #filter_query = 'pack:\"%s\"' % (pack_obj.name,) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: #logger.warning("search error: query: %s error %s" % (query, e)) #error = True #error_text = 'There was an error while searching, is your query correct?' return False
def items(self, obj): if obj['query'] != "": try: solr = Solr(settings.SOLR_URL) query = SolrQuery() fields=[('id',4), ('tag', 3), ('description', 3), ('username', 2), ('pack_tokenized', 2), ('original_filename', 2),] if obj['type'] == "phrase": query.set_dismax_query('"' + obj['query'] + '"',query_fields=fields) # EXACT (not 100%) elif obj['type'] == "any": query.set_dismax_query(obj['query'],query_fields=[],minimum_match=0) # OR else: query.set_dismax_query(obj['query'],query_fields=[],minimum_match="100%") # AND lim = obj['limit'] if lim > 100: lim = 100 query.set_query_options(start=obj['offset'], rows=lim, filter_query="", sort=['created desc']) try: results = SolrResponseInterpreter(solr.select(unicode(query))) sounds = [] for object in results.docs : try: sounds.append(object) except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate pass logger.info("Sound pool search RSS") return sounds except SolrException, e: return [] except: return [] else: return []
def get_user_tags(self): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_sound_ids = [] solr = Solr(url=settings.SOLR_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL sounds print "Retrieving ids from %i to %i"%(0,SLICE_SIZE) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT,total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE,number_of_documents,SLICE_SIZE): print "Retrieving ids from %i to %i"%(i,i+SLICE_SIZE) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) solr_sound_ids = sorted(list(set(solr_sound_ids))) if LIMIT: solr_sound_ids = solr_sound_ids[0:LIMIT] print "%i document ids retrieved"%len(solr_sound_ids) n_deleted = 0 print "" for count,id in enumerate(solr_sound_ids): sys.stdout.write("\rChecking doc %i of %i"%(count,len(solr_sound_ids))) sys.stdout.flush() if Sound.objects.filter(id=id,moderation_state="OK",processing_state="OK").exists(): pass else: # Sound does not exist in the Db or is not properly moderated and processed print "\n\t - Deleting sound with id %i from solr index"%id solr.delete_by_id(id) n_deleted += 1 print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)"%n_deleted
def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit: response = SolrResponseInterpreter( solr.select(unicode(search_prepare_query( '', '', search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False)))) solr_ids += [element['id'] for element in response.docs] solr_count = response.num_found current_page += 1 return sorted(solr_ids)
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data['filter'] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')' if query_filter: query_filter += ' %s' % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(query_filter or ""), search_form.cleaned_data['sort'], current_page, page_size, grouping=False, include_facets=False, offset=offset) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element['id'] for element in result.docs] solr_count = result.num_found #print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException as e: raise ServerErrorException(msg='Search server error: %s' % e.message) except Exception as e: raise ServerErrorException(msg='The search server could not be reached or some unexpected error occurred.') return solr_ids, solr_count
def get_random_sound_from_solr(): """ Get a random sound from solr. This is used for random sound browsing. We filter explicit sounds, but otherwise don't have any other restrictions on sound attributes """ solr = Solr(settings.SOLR_URL) query = SolrQuery() rand_key = random.randint(1, 10000000) sort = ['random_%d asc' % rand_key] filter_query = 'is_explicit:0' query.set_query("*:*") query.set_query_options(start=0, rows=1, field_list=["*"], filter_query=filter_query, sort=sort) try: response = SolrResponseInterpreter(solr.select(unicode(query))) docs = response.docs if docs: return docs[0] except (SolrException, socket.error): pass return {}
def delete_sounds_from_solr(sound_ids): solr_max_boolean_clause = 1000 # This number is specified in solrconfig.xml for count, i in enumerate(range(0, len(sound_ids), solr_max_boolean_clause)): range_ids = sound_ids[i:i+solr_max_boolean_clause] try: logger.info("deleting %i sounds from solr [%i of %i, %i sounds]" % (len(sound_ids), count + 1, int(math.ceil(float(len(sound_ids)) / solr_max_boolean_clause)), len(range_ids))) sound_ids_query = ' OR '.join(['id:{0}'.format(sid) for sid in range_ids]) Solr(settings.SOLR_URL).delete_by_query(sound_ids_query) except (SolrException, socket.error) as e: logger.error('could not delete solr sounds chunk %i of %i' % (count + 1, int(math.ceil(float(len(sound_ids)) / solr_max_boolean_clause))))
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x:x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] except SolrException, e: error = True search_logger.error("SOLR ERROR - %s" % e)
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_sound_ids = [] solr = Solr(url=settings.SOLR_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL sounds print "Retrieving ids from %i to %i" % (0, SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT, total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents, SLICE_SIZE): print "Retrieving ids from %i to %i" % (i, i + SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) solr_sound_ids = sorted(list(set(solr_sound_ids))) if LIMIT: solr_sound_ids = solr_sound_ids[0:LIMIT] print "%i document ids retrieved" % len(solr_sound_ids) n_deleted = 0 print "" for count, id in enumerate(solr_sound_ids): sys.stdout.write("\rChecking doc %i of %i" % (count, len(solr_sound_ids))) sys.stdout.flush() if Sound.objects.filter(id=id, moderation_state="OK", processing_state="OK").exists(): pass else: # Sound does not exist in the Db or is not properly moderated and processed print "\n\t - Deleting sound with id %i from solr index" % id solr.delete_by_id(id) n_deleted += 1 print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)" % n_deleted
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_post_ids = [] solr = Solr(url=settings.SOLR_FORUM_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL forums console_logger.info("Retrieving ids from %i to %i"%(0,SLICE_SIZE)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT,total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents,SLICE_SIZE): console_logger.info("Retrieving ids from %i to %i"%(i,i+SLICE_SIZE-1)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) solr_post_ids = sorted(list(set(solr_post_ids))) if LIMIT: solr_post_ids = solr_post_ids[0:LIMIT] console_logger.info("%i document ids retrieved"%len(solr_post_ids)) n_deleted = 0 console_logger.info("") for count, id in enumerate(solr_post_ids): if count % 100 == 0: console_logger.info("\rChecking docs %i/%i"%(count,len(solr_post_ids))) if Post.objects.filter(id=id,moderation_state="OK").exists(): pass else: # Post does not exist in the Db or is not properly moderated and processed console_logger.info("\n\t - Deleting forum with id %i from solr index" % id) solr.delete_by_id(id) n_deleted += 1 console_logger.info("\n\nDONE! %i forums deleted from solr index (it may take some minutes to actually see " "the changes in the page)" % n_deleted)
def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None): if search_form.cleaned_data['query'] is None \ and search_form.cleaned_data['filter'] is None \ and not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] is None and search_form.cleaned_data[ 'filter'] is None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException as e: if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query( unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [ element['more_from_pack'], element['pack_id'], element['pack_name'] ]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException as e: if search_form.cleaned_data['filter'] is not None: raise BadRequestException( msg= 'Search server error: %s (please check that your filter syntax and field ' 'names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.', resource=resource) else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Strategies are implemented in 'combined_search_strategies' strategy = getattr(combined_search_strategies, merging_strategy) return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+') try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1" : a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" : # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "" : id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "" : tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "" : description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] if a_username != "" : username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "" : pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] if a_filename != "" : original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping = actual_groupnig ) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False # clickusage tracking if settings.LOG_CLICKTHROUGH_DATA: request_full_path = request.get_full_path() # The session id of an unauthenticated user is different from the session id of the same user when # authenticated. request.session["searchtime_session_key"] = request.session.session_key if results.docs is not None: ids = [] for item in results.docs: ids.append(item["id"]) logger_click.info("QUERY : %s : %s : %s : %s" % (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8'))) except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def get_stream_sounds(user, time_lapse): solr = Solr(settings.SOLR_URL) sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB) # # USERS FOLLOWING # users_following = get_users_following(user) users_sounds = [] for user_following in users_following: filter_str = "username:"******" created:" + time_lapse query = search_prepare_query( "", filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False ) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [urllib.quote(filter_str), urllib.quote(sort_str[0])] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids).select_related('license', 'user') new_count = more_count + len(sound_ids) users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count)) # # TAGS FOLLOWING # tags_following = get_tags_following(user) tags_sounds = [] for tag_following in tags_following: tags = tag_following.split(" ") tag_filter_query = "" for tag in tags: tag_filter_query += "tag:" + tag + " " tag_filter_str = tag_filter_query + " created:" + time_lapse query = search_prepare_query( "", tag_filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False ) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [urllib.quote(tag_filter_str), urllib.quote(sort_str[0])] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) tags_sounds.append((tags, sound_objs, more_url_params, more_count, new_count)) return users_sounds, tags_sounds
# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # Authors: # See AUTHORS file. # import time from utils.search.solr import Solr, SolrQuery, SolrResponseInterpreter """An example of usage, with the freesound custom schema... """ solr = Solr("http://localhost:8983/solr/", persistent=True) lines = file("searches").readlines() lines.reverse() num_queries_total = 0 num_queries_this_loop = 0 time_solr = 0 results_solr = 0 results_before = 0 start = time.time() start_this_loop = start for index, line in enumerate(lines):
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ', '+') # Generate array with information of filters filter_query_split = [] if filter_query != "": for filter_str in re.findall(r'[\w-]+:\"[^\"]+', filter_query): filter_str = filter_str + '"' filter_display = filter_str.replace('"', '') filter_name = filter_str.split(":")[0] if filter_name != "duration" and filter_name != "is_geotagged": if filter_name == "grouping_pack": val = filter_display.split(":")[1] filter_display = "pack:" + val.split("_")[1] filter = { 'name': filter_display, 'remove_url': filter_query.replace(filter_str, ''), } filter_query_split.append(filter) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1": a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "": # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "": id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "": tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "": description_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'description'] if a_username != "": username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "": pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'pack_tokenized'] if a_filename != "": original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) logger.info(u'Search (%s)' % json.dumps({ 'ip': get_client_ip(request), 'query': search_query, 'filter': filter_query, 'username': request.user.username, 'page': current_page, 'sort': sort[0], 'group_by_pack': actual_groupnig, 'advanced': json.dumps({ 'search_in_tag': a_tag, 'search_in_filename': a_filename, 'search_in_description': a_description, 'search_in_packname': a_packname, 'search_in_soundid': a_soundid, 'search_in_username': a_username }) if advanced == "1" else "" })) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping=actual_groupnig) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s # allsounds will contain info from all the sounds returned by bulk_query_id. This should # be all sounds in docs, but if solr and db are not synchronised, it might happen that there # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements # in docs that have not been loaded in allsounds. docs = [doc for doc in docs if doc["id"] in allsounds] for d in docs: d["sound"] = allsounds[d["id"]] except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def handle(self, *args, **options): # init solr = Solr(settings.SOLR_URL) # Get all solr ids print "Getting solr ids...", solr_ids = get_all_sound_ids_from_solr() print "done!" # Get ell gaia ids print "Getting gaia ids...", gaia_ids = Similarity.get_all_sound_ids() print "done!" print "Getting freesound db data..." # Get all moderated and processed sound ids queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK').order_by('id').only("id") fs_mp = [sound.id for sound in queryset] # Get ell moderated, processed and analysed sounds queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK', analysis_state='OK').order_by('id').only("id") fs_mpa = [sound.id for sound in queryset] print "done!" print "\nNumber of sounds per index:\n--------------------------" print "Solr index\t\t%i" % len(solr_ids) print "Gaia index\t\t%i" % len(gaia_ids) print "Freesound\t\t%i (moderated and processed)" % len(fs_mp) print "Freesound\t\t%i (moderated, processed and analyzed)" % len(fs_mpa) print "\n\n***************\nSOLR INDEX\n***************\n" in_solr_not_in_fs = list(set(solr_ids).intersection(set(set(solr_ids).difference(fs_mp)))) in_fs_not_in_solr = list(set(fs_mp).intersection(set(set(fs_mp).difference(solr_ids)))) print "Sounds in solr but not in fs:\t%i" % len(in_solr_not_in_fs) print "Sounds in fs but not in solr:\t%i" % len(in_fs_not_in_solr) if not options['no-changes']: # Mark fs sounds to go processing if in_fs_not_in_solr: print "Changing is_index_dirty_state of sounds that require it" N = len(in_fs_not_in_solr) for count, sid in enumerate(in_fs_not_in_solr): sys.stdout.write('\r\tChanging state of sound sound %i of %i ' % (count+1, N)) sys.stdout.flush() sound = Sound.objects.get(id=sid) sound.set_single_field('is_index_dirty', True) # Delete sounds from solr that are not in the db if in_solr_not_in_fs: print "\nDeleting sounds that should not be in solr" N = len(in_solr_not_in_fs) for count, sid in enumerate(in_solr_not_in_fs): sys.stdout.write('\r\tDeleting sound %i of %i ' % (count+1, N)) sys.stdout.flush() solr.delete_by_id(sid) print "\n***************\nGAIA INDEX\n***************\n" in_gaia_not_in_fs = list(set(gaia_ids).intersection(set(set(gaia_ids).difference(fs_mpa)))) in_fs_not_in_gaia = list(set(fs_mpa).intersection(set(set(fs_mpa).difference(gaia_ids)))) print "Sounds in gaia but not in fs:\t%i" % len(in_gaia_not_in_fs) print "Sounds in fs but not in gaia:\t%i (only considering sounds correctly analyzed)" % len(in_fs_not_in_gaia) #Similarity.save() if not options['no-changes']: # Mark fs sounds to go processing if in_fs_not_in_gaia: print "Changing similarity_state of sounds that require it" N = len(in_fs_not_in_gaia) for count, sid in enumerate(in_fs_not_in_gaia): sys.stdout.write('\r\tChanging state of sound %i of %i ' % (count+1, N)) sys.stdout.flush() sound = Sound.objects.get(id=sid) sound.set_similarity_state('PE') # Delete sounds from gaia that are not in the db if in_gaia_not_in_fs: print "\nDeleting sounds that should not be in solr" N = len(in_gaia_not_in_fs) for count, sid in enumerate(in_gaia_not_in_fs): sys.stdout.write('\r\tDeleting sound %i of %i ' % (count+1, N)) sys.stdout.flush() Similarity.delete(sid)
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip() # for context sensitive search current_forum_name = request.GET.get("current_forum_name", "").strip() # used in breadcrumb sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") date_to = request.GET.get("dt_to", "") # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search... # It only happens in some situations. if "search in " in search_query: invalid = 1 if search_query.strip() != "" or filter_query: # add current forum if current_forum_name_slug.strip() != "": filter_query += "forum_name_slug:" + current_forum_name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default(field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts"], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception, e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.'
def merge_optimized(search_form, target_file=None, extra_parameters=None): """ Filter both strategy will first get either some results from solr and then check if returned results are also valid results in a gaia query, or the other way around. In gaia and solr we can restrict the query to a particular set of results, but there are limitations both in the length of the resulting url and in the number of OR clauses that solr can support. """ if not extra_parameters: extra_parameters = dict() solr_filter_id_block_size = extra_parameters.get( 'cs_solr_filter_id_block_size', 350) solr_filter_id_max_pages = extra_parameters.get( 'cs_solr_filter_id_max_pages', 7) solr_max_requests = extra_parameters.get('cs_max_solr_requests', 20) solr_page_size = extra_parameters.get('cs_solr_page_size', 200) gaia_max_pages = extra_parameters.get('cs_max_gaia_pages', 1) gaia_page_size = extra_parameters.get( 'cs_gaia_page_size', 9999999) # We can get ALL gaia results at once num_requested_results = search_form.cleaned_data['page_size'] params_for_next_page = dict() debug_note = '' if search_form.cleaned_data['target'] or target_file: # First search into gaia and get all results that have not been checked in previous calls (indicated in request parameter 'cs_lcvidp') last_checked_valid_id_position = extra_parameters.get('cs_lcvidp', 0) if last_checked_valid_id_position < 0: last_checked_valid_id_position = 0 gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results( search_form, target_file, page_size=gaia_page_size, max_pages=gaia_max_pages, offset=last_checked_valid_id_position) if len(gaia_ids): # Now divide gaia results in blocks of "solr_filter_id_block_size" results and iteratively query solr limiting the # results to those ids in the common block to obtain common results for the search. # Once we get as many results as "num_requested_results" or we exceed a maximum number # of iterations (solr_filter_id_max_pages), return what we got and update 'cs_lcvidp' parameter for further calls. valid_ids_pages = [ gaia_ids[i:i + solr_filter_id_block_size] for i in range(0, len(gaia_ids), solr_filter_id_block_size) ] solr_ids = list() checked_gaia_ids = list() solr = Solr(settings.SOLR_URL) for count, valid_ids_page in enumerate(valid_ids_pages): page_solr_ids, solr_count = get_solr_results( search_form, page_size=len(valid_ids_page), max_pages=1, valid_ids=valid_ids_page, solr=solr) solr_ids += page_solr_ids checked_gaia_ids += valid_ids_page if len(solr_ids) >= num_requested_results: debug_note = 'Found enough results in %i solr requests' % ( count + 1) #print 'Did %i requests to solr' % (count + 1) break if count + 1 > solr_filter_id_max_pages: debug_note = 'Did %i solr requests (still not enough results)' % ( count + 1) #print 'Too many requests and not enough results' break combined_ids = list() for index, sid in enumerate(checked_gaia_ids): if sid in solr_ids: combined_ids.append(sid) new_last_checked_valid_id_position = index + 1 if len(combined_ids) == num_requested_results: break if len(checked_gaia_ids) == len(gaia_ids): params_for_next_page['no_more_results'] = True params_for_next_page[ 'cs_lcvidp'] = last_checked_valid_id_position + new_last_checked_valid_id_position else: # No more gaia ids to check against solr, no more possible results! combined_ids = list() distance_to_target_data = dict() note = None params_for_next_page['no_more_results'] = True else: # First search into gaia to obtain a list of all sounds that match content-based query parameters gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results( search_form, target_file, page_size=gaia_page_size, max_pages=gaia_max_pages) last_retrieved_solr_id_pos = extra_parameters.get('cs_lrsidp', 0) if last_retrieved_solr_id_pos < 0: last_retrieved_solr_id_pos = 0 if len(gaia_ids) < solr_filter_id_block_size: # optimization, if there are few gaia_ids, we can get all results in one query solr_ids, solr_count = get_solr_results( search_form, page_size=len(gaia_ids), max_pages=1, valid_ids=gaia_ids, offset=last_retrieved_solr_id_pos) combined_ids = solr_ids[:num_requested_results] params_for_next_page[ 'cs_lrsidp'] = last_retrieved_solr_id_pos + num_requested_results if len(combined_ids) < num_requested_results: params_for_next_page['no_more_results'] = True else: # Now query solr starting at the last retrieved solr result position (parameter 'cs_lrsidp') and iteratively combine the results of # each page of the query with gaia ids. Once we reach the desired "num_requested_results", return what we got and # update 'cs_lrsidp' parameter for further queries. Set a maximum number of iterations (solr_max_requests) to prevent a virtually # infinite query if not enough results are found (num_requested_results is not reached). combined_ids = list() new_last_retrieved_solr_id_pos = last_retrieved_solr_id_pos stop_main_for_loop = False n_requests_made = 0 for i in range(0, solr_max_requests): if stop_main_for_loop: continue offset = last_retrieved_solr_id_pos + i * solr_page_size solr_ids, solr_count = get_solr_results( search_form, page_size=solr_page_size, max_pages=1, offset=offset) n_requests_made += 1 common_ids = list(set(solr_ids).intersection(gaia_ids)) for index, sid in enumerate(solr_ids): new_last_retrieved_solr_id_pos += 1 if sid in common_ids: combined_ids.append(sid) if len(combined_ids) == num_requested_results: stop_main_for_loop = True break if new_last_retrieved_solr_id_pos == solr_count: params_for_next_page['no_more_results'] = True stop_main_for_loop = True break if n_requests_made == solr_max_requests and len( combined_ids) < num_requested_results: debug_note = 'Did %i solr requests (still not enough results)' % n_requests_made #print 'Too many requests and not enough results' else: debug_note = 'Found enough results in %i solr requests' % n_requests_made #print 'Did %i requests to solr' % n_requests_made params_for_next_page['cs_lrsidp'] = new_last_retrieved_solr_id_pos # Combine results return combined_ids, len( combined_ids ), distance_to_target_data, None, note, params_for_next_page, debug_note
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get( "forum", "").strip() # for context sensitive search if current_forum_name_slug: current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug) else: current_forum = None sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") try: df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d") date_from_display = df_parsed.strftime("%d-%m-%Y") except ValueError: date_from = "" date_from_display = "Choose a Date" date_to = request.GET.get("dt_to", "") try: dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d") date_to_display = dt_parsed.strftime("%d-%m-%Y") except ValueError: date_to = "" date_to_display = "Choose a Date" if search_query.startswith("search in"): search_query = "" error = False error_text = "" paginator = None num_results = None page = None results = [] if search_query.strip() != "" or filter_query: # add current forum if current_forum: filter_query += "forum_name_slug:" + current_forum.name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default( field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options( start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=[ "id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts" ], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator( results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException as e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception as e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.' tvars = { 'advanced_search': advanced_search, 'current_forum': current_forum, 'current_page': current_page, 'date_from': date_from, 'date_from_display': date_from_display, 'date_to': date_to, 'date_to_display': date_to_display, 'error': error, 'error_text': error_text, 'filter_query': filter_query, 'num_results': num_results, 'page': page, 'paginator': paginator, 'search_query': search_query, 'sort': sort, 'results': results, } return render(request, 'search/search_forum.html', tvars)
def read(self, request): ip = get_client_ip(request) form = SoundSearchForm(SEARCH_SORT_OPTIONS_API, request.GET) if not form.is_valid(): resp = rc.BAD_REQUEST resp.content = form.errors return resp cd = form.cleaned_data grouping = request.GET.get("g", "") if grouping == "0": grouping = "" solr = Solr(settings.SOLR_URL) sounds_per_page = min(int(request.GET.get('sounds_per_page', settings.SOUNDS_PER_API_RESPONSE)),settings.MAX_SOUNDS_PER_API_RESPONSE) query = search_prepare_query(cd['q'], cd['f'], search_prepare_sort(cd['s'], SEARCH_SORT_OPTIONS_API), cd['p'], sounds_per_page, grouping = grouping) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results,sounds_per_page) page = paginator.page(form.cleaned_data['p']) sounds = [] bad_results = 0 for object in page['object_list'] : try: sound = prepare_collection_sound(Sound.objects.select_related('user').get(id=object['id']), custom_fields = request.GET.get('fields', False)) if 'more_from_pack' in object.keys(): if object['more_from_pack'] > 0: link = prepend_base(reverse('api-search')+'?q=%s&f=pack:"%s" %s&s=%s&g=%s' % (my_quote(cd['q']),object['pack_name'],my_quote(cd['f']),cd['s'],"")) if request.GET.get('sounds_per_page', None): link += "&sounds_per_page=" + str(request.GET.get('sounds_per_page', None)) if request.GET.get('fields', False): link += "&fields=" + str(request.GET.get('fields', False)) sound['results_from_the_same_pack'] = link sound['n_results_from_the_same_pack'] = object['more_from_pack'] sounds.append(sound) except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate pass result = {'sounds': sounds, 'num_results': paginator.count - bad_results, 'num_pages': paginator.num_pages} # construct previous and next urls if page['has_other_pages']: if page['has_previous']: result['previous'] = self.__construct_pagination_link(cd['q'], page['previous_page_number'], cd['f'], find_api_option(cd['s']), request.GET.get('sounds_per_page', None), request.GET.get('fields', False), grouping) if page['has_next']: result['next'] = self.__construct_pagination_link(cd['q'], page['next_page_number'], cd['f'], find_api_option(cd['s']), request.GET.get('sounds_per_page',None), request.GET.get('fields', False), grouping) add_request_id(request,result) logger.info("Searching,q=" + cd['q'] + ",f=" + cd['f'] + ",p=" + str(cd['p']) + ",sounds_per_page=" + str(sounds_per_page) + ",api_key=" + request.GET.get("api_key", False) + ",api_key_username="******",ip=" + ip) return result except SolrException, e: error = "search_query %s filter_query %s sort %s error %s" \ % (cd['s'], cd['f'], cd['s'], e) raise ReturnError(500, "SearchError", {"explanation": error})
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+') try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1" : a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" : # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "" : id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "" : tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "" : description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] if a_username != "" : username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "" : pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] if a_filename != "" : original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping = actual_groupnig ) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] # clickusage tracking if settings.LOG_CLICKTHROUGH_DATA: request_full_path = request.get_full_path() # The session id of an unauthenticated user is different from the session id of the same user when # authenticated. request.session["searchtime_session_key"] = request.session.session_key if results.docs is not None: ids = [] for item in results.docs: ids.append(item["id"]) logger_click.info("QUERY : %s : %s : %s : %s" % (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8'))) except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def api_search( search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None): if search_form.cleaned_data['query'] is None \ and search_form.cleaned_data['filter'] is None \ and not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] is None and search_form.cleaned_data['filter'] is None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException as e: if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([ (int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs ]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException as e: if search_form.cleaned_data['filter'] is not None: raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field ' 'names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg='The search server could not be reached or some unexpected error occurred.', resource=resource) else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Strategies are implemented in 'combined_search_strategies' strategy = getattr(combined_search_strategies, merging_strategy) return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
def handle(self, *args, **options): # init solr = Solr(settings.SOLR_URL) # Get all solr ids print "Getting solr ids...", solr_ids = get_all_sound_ids_from_solr() print "done!" # Get ell gaia ids print "Getting gaia ids...", gaia_ids = Similarity.get_all_sound_ids() print "done!" print "Getting freesound db data..." # Get all moderated and processed sound ids queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK').order_by('id').only("id") fs_mp = [sound.id for sound in queryset] # Get ell moderated, processed and analysed sounds queryset = Sound.objects.filter(processing_state='OK', moderation_state='OK', analysis_state='OK').order_by('id').only("id") fs_mpa = [sound.id for sound in queryset] print "done!" print "\nNumber of sounds per index:\n--------------------------" print "Solr index\t\t%i" % len(solr_ids) print "Gaia index\t\t%i" % len(gaia_ids) print "Freesound\t\t%i (moderated and processed)" % len(fs_mp) print "Freesound\t\t%i (moderated, processed and analyzed)" % len(fs_mpa) print "\n\n***************\nSOLR INDEX\n***************\n" in_solr_not_in_fs = list(set(solr_ids).intersection(set(set(solr_ids).difference(fs_mp)))) in_fs_not_in_solr = list(set(fs_mp).intersection(set(set(fs_mp).difference(solr_ids)))) print "Sounds in solr but not in fs:\t%i" % len(in_solr_not_in_fs) print "Sounds in fs but not in solr:\t%i" % len(in_fs_not_in_solr) if not options['no-changes']: # Mark fs sounds to go processing if in_fs_not_in_solr: print "Changing is_index_dirty_state of sounds that require it" N = len(in_fs_not_in_solr) for count, sid in enumerate(in_fs_not_in_solr): sys.stdout.write('\r\tChanging state of sound sound %i of %i ' % (count+1, N)) sys.stdout.flush() sound = Sound.objects.get(id=sid) sound.set_single_field('is_index_dirty', True) # Delete sounds from solr that are not in the db if in_solr_not_in_fs: print "\nDeleting sounds that should not be in solr" N = len(in_solr_not_in_fs) for count, sid in enumerate(in_solr_not_in_fs): sys.stdout.write('\r\tDeleting sound %i of %i ' % (count+1, N)) sys.stdout.flush() solr.delete_by_id(sid) print "\n***************\nGAIA INDEX\n***************\n" in_gaia_not_in_fs = list(set(gaia_ids).intersection(set(set(gaia_ids).difference(fs_mpa)))) in_fs_not_in_gaia = list(set(fs_mpa).intersection(set(set(fs_mpa).difference(gaia_ids)))) print "Sounds in gaia but not in fs:\t%i" % len(in_gaia_not_in_fs) print "Sounds in fs but not in gaia:\t%i (only considering sounds correctly analyzed)" % len(in_fs_not_in_gaia) #Similarity.save() if not options['no-changes']: # Mark fs sounds to go processing if in_fs_not_in_gaia: print "Changing similarity_state of sounds that require it" N = len(in_fs_not_in_gaia) for count, sid in enumerate(in_fs_not_in_gaia): sys.stdout.write('\r\tChanging state of sound %i of %i ' % (count+1, N)) sys.stdout.flush() sound = Sound.objects.get(id=sid) sound.set_similarity_state('PE') # Delete sounds from gaia that are not in the db if in_gaia_not_in_fs: print "\nDeleting sounds that should not be in gaia" N = len(in_gaia_not_in_fs) for count, sid in enumerate(in_gaia_not_in_fs): sys.stdout.write('\r\tDeleting sound %i of %i ' % (count+1, N)) sys.stdout.flush() Similarity.delete(sid)
if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception, e: raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+') try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1" : a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" : # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "" : id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "" : tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "" : description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] if a_username != "" : username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "" : pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] if a_filename != "" : original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) logger.info(u'Search (%s)' % json.dumps({ 'ip': get_client_ip(request), 'query': search_query, 'filter': filter_query, 'username': request.user.username, 'page': current_page, 'sort': sort[0], 'group_by_pack' : actual_groupnig, 'advanced': json.dumps({ 'search_in_tag': a_tag, 'search_in_filename': a_filename, 'search_in_description': a_description, 'search_in_packname': a_packname, 'search_in_soundid': a_soundid, 'search_in_username': a_username }) if advanced == "1" else "" })) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping = actual_groupnig ) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s # allsounds will contain info from all the sounds returned by bulk_query_id. This should # be all sounds in docs, but if solr and db are not synchronised, it might happen that there # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements # in docs that have not been loaded in allsounds. docs = [doc for doc in docs if doc["id"] in allsounds] for d in docs: d["sound"] = allsounds[d["id"]] # clickusage tracking if settings.LOG_CLICKTHROUGH_DATA: request_full_path = request.get_full_path() # The session id of an unauthenticated user is different from the session id of the same user when # authenticated. request.session["searchtime_session_key"] = request.session.session_key if results.docs is not None: ids = [] for item in results.docs: ids.append(item["id"]) logger_click.info("QUERY : %s : %s : %s : %s" % (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8'))) except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def get_stream_sounds(user, time_lapse): solr = Solr(settings.SOLR_URL) sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB) # # USERS FOLLOWING # users_following = get_users_following(user) users_sounds = [] for user_following in users_following: filter_str = "username:"******" created:" + time_lapse query = search_prepare_query("", filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count)) # # TAGS FOLLOWING # tags_following = get_tags_following(user) tags_sounds = [] for tag_following in tags_following: tags = tag_following.split(" ") tag_filter_query = "" for tag in tags: tag_filter_query += "tag:" + tag + " " tag_filter_str = tag_filter_query + " created:" + time_lapse query = search_prepare_query("", tag_filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(tag_filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) tags_sounds.append( (tags, sound_objs, more_url_params, more_count, new_count)) return users_sounds, tags_sounds
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get("forum", "").strip() # for context sensitive search if current_forum_name_slug: current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug) else: current_forum = None sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") try: df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d") date_from_display = df_parsed.strftime("%d-%m-%Y") except ValueError: date_from = "" date_from_display = "Choose a Date" date_to = request.GET.get("dt_to", "") try: dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d") date_to_display = dt_parsed.strftime("%d-%m-%Y") except ValueError: date_to = "" date_to_display = "Choose a Date" if search_query.startswith("search in"): search_query = "" error = False error_text = "" paginator = None num_results = None page = None results = [] if search_query.strip() != "" or filter_query: # add current forum if current_forum: filter_query += "forum_name_slug:" + current_forum.name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default(field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts"], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException as e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception as e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.' tvars = { 'advanced_search': advanced_search, 'current_forum': current_forum, 'current_page': current_page, 'date_from': date_from, 'date_from_display': date_from_display, 'date_to': date_to, 'date_to_display': date_to_display, 'error': error, 'error_text': error_text, 'filter_query': filter_query, 'num_results': num_results, 'page': page, 'paginator': paginator, 'search_query': search_query, 'sort': sort, 'results': results, } return render(request, 'search/search_forum.html', tvars)
def check_if_sound_exists_in_solr(sound): solr = Solr(settings.SOLR_URL) response = SolrResponseInterpreter( solr.select(unicode(search_prepare_query( '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1)))) return response.num_found > 0
def filter_both(search_form, target_file=None, extra_parameters=None): """ Filter both strategy will first get either some results from solr and then check if returned results are also valid results in a gaia query, or the other way around. In gaia and solr we can restrict the query to a particular set of results, but there are limitations both in the length of the resulting url and in the number of OR clauses that solr can support. """ if not extra_parameters: extra_parameters = dict() solr_filter_id_block_size = extra_parameters.get( 'cs_solr_filter_id_block_size', 350) solr_filter_id_max_pages = extra_parameters.get( 'cs_solr_filter_id_max_pages', 7) solr_max_pages = extra_parameters.get('cs_max_solr_pages', 7) solr_page_size = extra_parameters.get('cs_solr_page_size', 1000) gaia_filter_id_block_size = extra_parameters.get( 'cs_gaia_filter_id_block_size', 350) gaia_filter_id_max_pages = extra_parameters.get( 'cs_gaia_filter_id_max_pages', 7) gaia_max_pages = extra_parameters.get('cs_max_gaia_pages', 1) gaia_page_size = extra_parameters.get( 'cs_gaia_page_size', 9999999) # We can get ALL gaia results at once if search_form.cleaned_data['target'] or target_file: # First search into gaia and then into solr (get all gaia results) gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results( search_form, target_file, page_size=gaia_page_size, max_pages=gaia_max_pages) valid_ids_pages = [ gaia_ids[i:i + solr_filter_id_block_size] for i in range(0, len(gaia_ids), solr_filter_id_block_size) if (i / solr_filter_id_block_size) < solr_filter_id_max_pages ] solr_ids = list() solr = Solr(settings.SOLR_URL) for valid_ids_page in valid_ids_pages: page_solr_ids, solr_count = get_solr_results( search_form, page_size=len(valid_ids_page), max_pages=1, valid_ids=valid_ids_page, solr=solr) solr_ids += page_solr_ids if gaia_count <= solr_filter_id_block_size * solr_filter_id_max_pages: # Got complete results, maybe we should log that? #print 'COMPLETE results (starting with gaia)' pass else: # First search into solr and then into gaia # These queries are SLOW because we need to get many pages from solr solr_ids, solr_count = get_solr_results(search_form, page_size=solr_page_size, max_pages=solr_max_pages) # Now we should split solr ids in blocks and iteratively query gaia restricting the results to those ids # present in the current block. However given that gaia results can be retrieved # all at once very quickly, we optimize this bit by retrieving them all at once and avoiding many requests # to similarity server. gaia_ids, gaia_count, distance_to_target_data, note = get_gaia_results( search_form, target_file, page_size=gaia_page_size, max_pages=gaia_max_pages) ''' # That would be the code without the optimization: valid_ids_pages = [solr_ids[i:i+gaia_filter_id_block_size] for i in range(0, len(solr_ids), gaia_filter_id_block_size) if (i/gaia_filter_id_block_size) < gaia_filter_id_max_pages] gaia_ids = list() distance_to_target_data = None note = None for valid_ids_page in valid_ids_pages: page_gaia_ids, page_gaia_count, page_distance_to_target_data, note = get_gaia_results(search_form, target_file, page_size=len(valid_ids_page), max_pages=1, valid_ids=valid_ids_page) gaia_ids += page_gaia_ids ''' if solr_count <= solr_page_size * solr_max_pages and gaia_count < gaia_page_size * gaia_max_pages: # Got complete results, maybe we should log that? #print 'COMPLETE results (starting with solr)' pass if search_form.cleaned_data['target'] or target_file: # Combined search, sort by gaia_ids results_a = gaia_ids results_b = solr_ids else: # Combined search, sort by solr ids results_a = solr_ids results_b = gaia_ids # Combine results results_b_set = set(results_b) combined_ids = [id for id in results_a if id in results_b_set] combined_count = len(combined_ids) return combined_ids[(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size']:search_form.cleaned_data['page'] * search_form.cleaned_data['page_size']], \ combined_count, distance_to_target_data, None, note, None, None
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options(group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True) # Sets how many results from the same group are taken into account for computing the facets page = None num_results = 0 tags = [] error = False docs = {} non_grouped_number_of_results = 0 paginator = None try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.error("SOLR ERROR - %s" % e) except: error = True slash_tag = "/".join(multiple_tags) follow_tags_url = '' unfollow_tags_url = '' show_unfollow_button = False if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag(request.user, slash_tag) tvars = {'show_unfollow_button': show_unfollow_button, 'multiple_tags': multiple_tags, 'follow_tags_url': follow_tags_url, 'unfollow_tags_url': unfollow_tags_url, 'error': error, 'tags': tags, 'slash_tag': slash_tag, 'num_results': num_results, 'non_grouped_number_of_results': non_grouped_number_of_results, 'docs': docs, 'paginator': paginator, 'page': page, 'current_page': current_page } return render(request, 'sounds/tags.html', tvars)
def add_sounds_to_solr(sounds): solr = Solr(settings.SOLR_URL) documents = [convert_to_solr_document(s) for s in sounds] console_logger.info("Adding %d sounds to solr index" % len(documents)) logger.info("Adding %d sounds to solr index" % len(documents)) solr.add(documents)
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True ) # Sets how many results from the same grup are taken into account for computing the facets try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.error("SOLR ERROR - %s" % e) except: error = True slash_tag = "/".join(multiple_tags) space_tag = " ".join(multiple_tags) if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag( request.user, slash_tag) return render(request, 'sounds/tags.html', locals())