def build_search_kwargs(user_q, request, types, staff, orderby): if not staff: user_q = And([ user_q, Or([Term('public', 't'), Term('users', request.user.username)] + [ Term('groups', group.name) for group in request.user.groups.all() ]) ]) if types and len(types) > 0: user_q = And([ user_q, Or([Term('type', resource_type) for resource_type in types]) ]) orderby_f = FieldFacet(orderby.replace('-', ''), reverse=orderby.find('-') > -1) search_kwargs = { 'sortedby': [orderby_f], 'collapse': FieldFacet('vendor_name'), 'collapse_limit': 1, 'collapse_order': FunctionFacet(order_by_version) } return (user_q, search_kwargs)
def query(q='', fields=['content'], **kwargs): """ Query the indexed, looking for a match in the specified fields. Results a tuple of results and an open searcher object. """ # Do not preform any queries if the index does not exist. if not index_exists(): return [] ix = init_index() searcher = ix.searcher() profile_score = FieldFacet("author_score", reverse=True) post_type = FieldFacet("type") thread = FieldFacet('thread_votecount') content_length = FieldFacet("content_length", reverse=True) rank = FieldFacet("rank", reverse=True) default = ScoreFacet() # Splits the query into words and applies # and OR filter, eg. 'foo bar' == 'foo OR bar' orgroup = OrGroup # Sort by: toplevel, match score, author reputation, post rank. # sort_by = [post_type, profile_score, rank, default] # sort_by = [post_type] # sort_by = [profile_score] # sort_by = [rank] # sort_by = [thread] sort_by = [post_type, default, content_length] # sort_by = [content_length] parser = MultifieldParser(fieldnames=fields, schema=ix.schema, group=orgroup).parse(q) results = searcher.search(parser, sortedby=sort_by, limit=settings.SEARCH_LIMIT, terms=True, **kwargs) # Allow larger fragments results.fragmenter.maxchars = 100 # results.fragmenter.charlimit = None # Show more context before and after results.fragmenter.surround = 100 return results
def _processSearch(self, data): try: threadident, source, query, n, gb = data qp = self.qp.parse(query) results = [] if not SOURCE_REGEX.match(query): qp = qp & Term(u"source", source.lstrip(CHANNEL_PREFIXES).lower()) if not gb: for item in self.searcher.search(qp, limit=n, groupedby=gb): results.append((item["timestamp"], item["nick"], item["source"], item["content"])) else: for user, count in self.searcher.search( qp, groupedby=FieldFacet( gb, maptype=Count)).groups().iteritems(): results.append((count, user)) except: self.index_p.send( (threadident, None )) # pass None back to caller so user error can be displayed. print_exc() prnt("EXCEPTION IN SEARCH") else: self.index_p.send((threadident, results))
def query_page(self, term, page_num, page_len, sort_type): with self.ix.searcher() as searcher: if sort_type == 1: # default sorted results = searcher.search_page(self.qp.parse( term), pagenum=page_num, pagelen=page_len,sortedby=ScoreFacet()) #results2 = searcher.search_page(self.qp.parse( # term), pagenum=page_num, pagelen=page_len, sortedby=ScoreAndTimeFacet()) #self.generate_similarQuery(results,term) if sort_type == 2: # sorted by custom hot value publish_time = FieldFacet("publish_time", reverse=True) results = searcher.search_page(self.qp.parse( term), pagenum=page_num, pagelen=page_len, sortedby=publish_time) if sort_type == 3: # sorted by time publish_time = FieldFacet("publish_time", reverse=True) results = searcher.search_page(self.qp.parse( term), pagenum=page_num, pagelen=page_len, sortedby=ScoreAndTimeFacet()) return self._results_todata(results), results.results.runtime
def preform_search(query, fields=['content'], **kwargs): """ Query the indexed, looking for a match in the specified fields. Results a tuple of results and an open searcher object. """ # Do not preform any queries if the index does not exist. if not index_exists() or len(query) < settings.SEARCH_CHAR_MIN: return [] ix = init_index() searcher = ix.searcher() profile_score = FieldFacet("author_score", reverse=True) post_type = FieldFacet("type") thread = FieldFacet('thread_votecount') # content_length = FieldFacet("content_length", reverse=True) rank = FieldFacet("rank", reverse=True) default = ScoreFacet() # Splits the query into words and applies # and OR filter, eg. 'foo bar' == 'foo OR bar' orgroup = OrGroup #sort_by = sort_by or [post_type, rank, thread, default, profile_score] #sort_by = [lastedit_date] parser = MultifieldParser(fieldnames=fields, schema=ix.schema, group=orgroup).parse(query) results = searcher.search(parser, limit=settings.SEARCH_LIMIT, terms=True, **kwargs) # Allow larger fragments results.fragmenter.maxchars = 100 # results.fragmenter.charlimit = None # Show more context before and after results.fragmenter.surround = 100 logger.info("Preformed index search") return results
def search1(name): new_list = [] index = open_dir("F:\PythonFile\Recommend\spider\search\index", indexname='comment') with index.searcher() as searcher: parser = QueryParser("phone_name", index.schema) myquery = parser.parse(name) facet = FieldFacet("price", reverse=True) results = searcher.search(myquery, limit=None, sortedby=facet) # print(list(results)) for result1 in results: # print(dict(result1)) new_list.append(dict(result1)) return new_list
def get_results(self): if self.searcher is None: self.search() facet = FieldFacet("verb_form") facet = TranslateFacet(self.sort_key, facet) results = self.searcher.search(self.query, limit=None, sortedby=facet, collapse_limit=1, collapse='verb_form') self.num_results = len(results) return results
def _get_sorting( self, sort: Optional[List[str]] = None, ) -> Optional[List[FieldFacet]]: """Get the appropriate field facets for sorting.""" if not sort: return None facets = [] allowed_sorters = {"type", "change"} for sorter in sort: _field = sorter.lstrip("+-") if _field not in allowed_sorters: continue reverse = sorter.startswith("-") facets.append(FieldFacet(_field, reverse=reverse)) return facets
def build_sort_args(sort_spec): if not sort_spec.fields: return {} if isinstance(sort_spec.reverse, Iterable): # Per-field reverse values require to use FieldFacet objects. return { 'sortedby': [ FieldFacet(field_key, reverse=reverse) for field_key, reverse in zip( sort_spec.get_field_keys(), sort_spec.reverse) ] } # With a single reverse value, we may specify the field names directly. return { 'sortedby': sort_spec.get_field_keys(), 'reverse': sort_spec.reverse, }
def _find_latest_revids(self, index, query=None): """ find the latest revids using the all-revs index :param index: an up-to-date and open ALL_REVS index :param query: query to search only specific revisions (optional, default: all items/revisions) :returns: a list of the latest revids """ if query is None: query = Every() with index.searcher() as searcher: result = searcher.search(query, groupedby=ITEMID, sortedby=FieldFacet(MTIME, reverse=True)) by_item = result.groups(ITEMID) latest_revids = [] for _, vals in by_item.items(): # XXX figure how whoosh can order, or get the best vals.sort(key=lambda docid: searcher.stored_fields(docid)[MTIME], reverse=True) latest_revid = searcher.stored_fields(vals[0])[REVID] latest_revids.append(latest_revid) return latest_revids
def search(self, query, dimension=None, locale=None, limit=None): """Peform search using Whoosh. If `dimension` is set then only the one dimension will be searched.""" # print "SEARCH IN %s QUERY '%s' LOCALE:%s" % (str(dimension), query, locale) qp = QueryParser("value", schema=self.index.schema) q = qp.parse(query) if dimension: q = q & Term('dimension', str(dimension)) if locale: q = q & Term('locale', str(locale)) # FIXME: set locale filter facet = FieldFacet("value") limit = limit or self.default_limit # print "QUERY: %s" % q results = self.searcher.search(q, limit=limit, sortedby=facet) # print "FOUND: %s results" % len(results) return WhooshSearchResult(self.browser, results)
def search( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs ): if not self.setup_complete: self.setup() # A zero length query should return no results. if len(query_string) == 0: return {"results": [], "hits": 0} query_string = force_text(query_string) # A one-character query (non-wildcard) gets nabbed by a stopwords # filter and should yield zero results. if len(query_string) <= 1 and query_string != "*": return {"results": [], "hits": 0} reverse = False if sort_by is not None: # Determine if we need to reverse the results and if Whoosh can # handle what it's being asked to sort by. Reversing is an # all-or-nothing action, unfortunately. sort_by_list = [] reverse_counter = 0 for order_by in sort_by: if order_by.startswith("-"): reverse_counter += 1 if reverse_counter and reverse_counter != len(sort_by): raise SearchBackendError( "Whoosh requires all order_by fields" " to use the same sort direction" ) for order_by in sort_by: if order_by.startswith("-"): sort_by_list.append(order_by[1:]) if len(sort_by_list) == 1: reverse = True else: sort_by_list.append(order_by) if len(sort_by_list) == 1: reverse = False sort_by = sort_by_list if facets is not None: facets = [FieldFacet(facet, allow_overlap=True) for facet in facets] if date_facets is not None: warnings.warn( "Whoosh does not handle date faceting.", Warning, stacklevel=2 ) if query_facets is not None: warnings.warn( "Whoosh does not handle query faceting.", Warning, stacklevel=2 ) narrowed_results = None self.index = self.index.refresh() if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True ) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add( " OR ".join(["%s:%s" % (DJANGO_CT, rm) for rm in model_choices]) ) narrow_searcher = None if narrow_queries is not None: # Potentially expensive? I don't see another way to do it in Whoosh... narrow_searcher = self.index.searcher() for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search( self.parser.parse(force_text(nq)), limit=None ) if len(recent_narrowed_results) <= 0: return {"results": [], "hits": 0} if narrowed_results is not None: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results self.index = self.index.refresh() if self.index.doc_count(): searcher = self.index.searcher() parsed_query = self.parser.parse(query_string) # In the event of an invalid/stopworded query, recover gracefully. if parsed_query is None: return {"results": [], "hits": 0} page_num, page_length = self.calculate_page(start_offset, end_offset) search_kwargs = { "pagelen": page_length, "sortedby": sort_by, "reverse": reverse, 'groupedby': facets, } # Handle the case where the results have been narrowed. if narrowed_results is not None: search_kwargs["filter"] = narrowed_results try: raw_page = searcher.search_page(parsed_query, page_num, **search_kwargs) except ValueError: if not self.silently_fail: raise return {"results": [], "hits": 0, "spelling_suggestion": None} # Because as of Whoosh 2.5.1, it will return the wrong page of # results if you request something too high. :( if raw_page.pagenum < page_num: return {"results": [], "hits": 0, "spelling_suggestion": None} results = self._process_results( raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class, ) searcher.close() if hasattr(narrow_searcher, "close"): narrow_searcher.close() return results else: if self.include_spelling: if spelling_query: spelling_suggestion = self.create_spelling_suggestion( spelling_query ) else: spelling_suggestion = self.create_spelling_suggestion(query_string) else: spelling_suggestion = None return { "results": [], "hits": 0, "spelling_suggestion": spelling_suggestion, }
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): if not self.setup_complete: self.setup() # A zero length query should return no results. if len(query_string) == 0: return { 'results': [], 'hits': 0, } try: from django.utils.encoding import force_text except ImportError: from django.utils.encoding import force_unicode as force_text query_string = force_text(query_string) # A one-character query (non-wildcard) gets nabbed by a stopwords # filter and should yield zero results. if len(query_string) <= 1 and query_string != u'*': return { 'results': [], 'hits': 0, } reverse = False if sort_by is not None: # Determine if we need to reverse the results and if Whoosh can # handle what it's being asked to sort by. Reversing is an # all-or-nothing action, unfortunately. sort_by_list = [] reverse_counter = 0 for order_by in sort_by: if order_by.startswith('-'): reverse_counter += 1 if reverse_counter != 0 and reverse_counter != len(sort_by): raise SearchBackendError( "Whoosh does not handle reverse sorting " "by some fields and not others.") for order_by in sort_by: if order_by.startswith('-'): sort_by_list.append(order_by[1:]) else: sort_by_list.append(order_by) sort_by = sort_by_list reverse = (reverse_counter > 0) if date_facets is not None: warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2) if query_facets is not None: warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2) narrowed_results = None self.index = self.index.refresh() if limit_to_registered_models is None: from django.conf import settings limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted([ '%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models ]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() from haystack.constants import ID, DJANGO_CT, DJANGO_ID narrow_queries.add(' OR '.join( ['%s:%s' % (DJANGO_CT, rm) for rm in model_choices])) narrow_searcher = None if narrow_queries is not None: # Potentially expensive? I don't see another way to do it in Whoosh... narrow_searcher = self.index.searcher() for nq in narrow_queries: # workaround for Haystack issue 575 recent_narrowed_results = narrow_searcher.search( self.parser.parse(force_text(nq)), limit=None) if len(recent_narrowed_results) <= 0: return { 'results': [], 'hits': 0, } if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results self.index = self.index.refresh() if self.index.doc_count(): searcher = self.index.searcher() parsed_query = self.parser.parse(query_string) # In the event of an invalid/stopworded query, recover gracefully. if parsed_query is None: return { 'results': [], 'hits': 0, } page_num, page_length = self.calculate_page( start_offset, end_offset) search_kwargs = { 'pagelen': page_length, 'sortedby': sort_by, 'reverse': reverse, } if facets is not None: search_kwargs['groupedby'] = [] for facet_fieldname, extra_options in facets.items(): from whoosh.sorting import FieldFacet facet = FieldFacet(facet_fieldname, allow_overlap=True) search_kwargs['groupedby'].append(facet) # Handle the case where the results have been narrowed. if narrowed_results is not None: search_kwargs['filter'] = narrowed_results try: raw_page = searcher.search_page(parsed_query, page_num, **search_kwargs) except ValueError: if not self.silently_fail: raise return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } # Because as of Whoosh 2.5.1, it will return the wrong page of # results if you request something too high. :( if raw_page.pagenum < page_num: return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } results = self._process_results(raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class, facets=facets) searcher.close() if hasattr(narrow_searcher, 'close'): narrow_searcher.close() return results else: if self.include_spelling: if spelling_query: spelling_suggestion = self.create_spelling_suggestion( spelling_query) else: spelling_suggestion = self.create_spelling_suggestion( query_string) else: spelling_suggestion = None return { 'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion, }
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): if not self.setup_complete: self.setup() # A zero length query should return no results. if len(query_string) == 0: return { 'results': [], 'hits': 0, } query_string = force_text(query_string) # A one-character query (non-wildcard) gets nabbed by a stopwords # filter and should yield zero results. if len(query_string) <= 1 and query_string != u'*': return { 'results': [], 'hits': 0, } reverse = False if sort_by is not None: # Determine if we need to reverse the results and if Whoosh can # handle what it's being asked to sort by. Reversing is an # all-or-nothing action, unfortunately. sort_by_list = [] reverse_counter = 0 for order_by in sort_by: if order_by.startswith('-'): reverse_counter += 1 if reverse_counter and reverse_counter != len(sort_by): raise SearchBackendError("Whoosh requires all order_by fields" " to use the same sort direction") for order_by in sort_by: if order_by.startswith('-'): sort_by_list.append(order_by[1:]) if len(sort_by_list) == 1: reverse = True else: sort_by_list.append(order_by) if len(sort_by_list) == 1: reverse = False sort_by = sort_by_list[0] if facets is not None: warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2) if date_facets is not None: warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2) if query_facets is not None: warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2) narrowed_results = None self.index = self.index.refresh() if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] narrow_searcher = None if narrow_queries is not None: # Potentially expensive? I don't see another way to do it in Whoosh... narrow_searcher = self.index.searcher() for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)), limit=None) if len(recent_narrowed_results) <= 0: return { 'results': [], 'hits': 0, } if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results self.index = self.index.refresh() if self.index.doc_count(): parsed_query = self.parser.parse(query_string) if len(model_choices) > 0: narrow_model = [Term(DJANGO_CT, rm) for rm in model_choices] parsed_query = And([Or(narrow_model), parsed_query]) searcher = self.index.searcher() # In the event of an invalid/stopworded query, recover gracefully. if parsed_query is None: return { 'results': [], 'hits': 0, } page_num, page_length = self.calculate_page(start_offset, end_offset) collapse_field = kwargs.get("collapse") collapse_limit = kwargs.get("collapse_limit") search_kwargs = { 'pagelen': page_length, 'sortedby': sort_by, 'reverse': reverse } if collapse_field is not None: search_kwargs['collapse'] = FieldFacet(collapse_field) search_kwargs['collapse_limit'] = 1 if kwargs.get("collapse_order") is not None: order = kwargs.get("collapse_order") collapse_order = FieldFacet(order.replace('-', ''), reverse=order.find('-') > -1) search_kwargs['collapse_order'] = collapse_order # Handle the case where the results have been narrowed. if narrowed_results is not None: search_kwargs['filter'] = narrowed_results try: raw_page = searcher.search_page(parsed_query, page_num, **search_kwargs) except ValueError: if not self.silently_fail: raise return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } # Because as of Whoosh 2.5.1, it will return the wrong page of # results if you request something too high. :( grouped_results = None if raw_page.pagenum < page_num: return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } if collapse_field is not None and collapse_limit > 1: search_kwargs = { 'sortedby': collapse_order } grouped_results = [] for result in raw_page: query = And([Term(collapse_field, result[collapse_field]), parsed_query]) results = searcher.search(query, limit=collapse_limit, **search_kwargs) grouped_results.append(results) results = self._process_results(raw_page, result_class=result_class, collapse_field=collapse_field, grouped_results=grouped_results) searcher.close() if hasattr(narrow_searcher, 'close'): narrow_searcher.close() return results else: if self.include_spelling: if spelling_query: spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: spelling_suggestion = self.create_spelling_suggestion(query_string) else: spelling_suggestion = None return { 'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion, }
import whoosh.index as index from whoosh import columns, fields, index, sorting from whoosh.qparser import QueryParser # ix = index.open_dir("./") # facet = sorting.FieldFacet("id", reverse=True) # searcher = ix.searcher() # # searchwords = "新西兰" # qp = QueryParser("gtitle", schema=ix.schema) # q = qp.parse(searchwords) # results = searcher.search(q, sortedby=facet) # for each in results: # print(each) from whoosh.qparser import QueryParser from whoosh.index import open_dir from whoosh.sorting import FieldFacet new_list = [] index = open_dir("./index/", indexname='goods') # 读取建立好的索引 with index.searcher() as searcher: parser = QueryParser("gtitle", index.schema) # 要搜索的项目,比如“phone_name myquery = parser.parse("鸭蛋") facet = FieldFacet("id", reverse=True) # 按序排列搜索结果 results = searcher.search( myquery, limit=None, sortedby=facet) # limit为搜索结果的限制,默认为10,详见博客开头的官方文档 for result1 in results: print(dict(result1)) new_list.append(dict(result1))
# coding=utf-8 from whoosh.qparser import QueryParser from whoosh import qparser, sorting from whoosh.index import open_dir from whoosh.sorting import FieldFacet index_filepath = "./index/" # source_filepath=index_filepath+"0407_songs_dr2.csv" default_index = open_dir(index_filepath, indexname='book') # 读取建立好的索引 # 默认排序为得分+album+song default_facet = [] default_facet.append(sorting.ScoreFacet()) # default_facet.append(FieldFacet("album_title", reverse=True)) # 按序排列搜索结果 default_facet.append(FieldFacet("book_tittle", reverse=True)) # 默认查询为and模式,默认范围为全选 default_group = qparser.syntax.AndGroup default_range = ['book_tittle', 'book_author', 'year', 'publisher', 'ISBN'] # 基本的单曲查询 def basic_search(query, query_parse, group=default_group, facet=default_facet, index=default_index): searcher = index.searcher() parser = QueryParser(query_parse, index.schema, group=group) myquery = parser.parse(query) parser.remove_plugin_class(qparser.PhrasePlugin)