def search(q, **search_args): """Interface to search indexes. :param q: unparsed search string. :param search_args: any valid parameter for :meth:`whoosh.searching.Search.search`. This includes `limit`, `groupedby` and `sortedby` """ index = whoosh.index fields = {"name": 1.5, "text": 1.0} parser = DisMaxParser(fields, index.schema) query = parser.parse(q) # security access filter user = get_current_profile() if not is_membre_dri(user): pass # TODO # roles = {f"user:{user.id}", "all"} # for role in user.get_roles(): # if role.type in [RoleType.DIRECTION.value, RoleType.GDL.value]: # structure = role.context # structures = [structure] + structure.descendants() # roles |= {f"org:{s.id}" for s in structures} # # terms = [wq.Term("allowed_roles_and_users", role) for role in roles] # query &= wq.Or(terms) with index.searcher(closereader=False) as searcher: # 'closereader' is needed, else results cannot by used outside 'with' # statement return searcher.search(query, **search_args)
def search_naics_disjoint(string, dirpath, limit=5): indx = get_index(dirpath) string = prepare_query(string) boosts = {"name": 5, "description": 2} qp = DisMaxParser(boosts, indx.schema) with indx.searcher() as searcher: return [add_score(obj) for obj in searcher.search(qp.parse(string))]
def search_gs1_disjoint(string, dirpath, limit=5): indx = get_index(dirpath) string = prepare_query(string) boosts = {"brick": 5, "klass": 3, "family": 2, "segment": 1, "definition": 2} qp = DisMaxParser(boosts, indx.schema) with indx.searcher() as searcher: return [add_score(obj) for obj in searcher.search(qp.parse(string))]
def search( self, q, index="default", fields=None, Models=(), object_types=(), prefix=True, facet_by_type=None, **search_args ): """Interface to search indexes. :param q: unparsed search string. :param index: name of index to use for search. :param fields: optionnal mapping of field names -> boost factor? :param Models: list of Model classes to limit search on. :param object_types: same as `Models`, but directly the model string. :param prefix: enable or disable search by prefix :param facet_by_type: if set, returns a dict of object_type: results with a max of `limit` matches for each type. :param search_args: any valid parameter for :meth:`whoosh.searching.Search.search`. This includes `limit`, `groupedby` and `sortedby` """ index = self.app_state.indexes[index] if not fields: fields = self.default_search_fields valid_fields = { f for f in index.schema.names(check_names=fields) if prefix or not f.endswith("_prefix") } for invalid in set(fields) - valid_fields: del fields[invalid] parser = DisMaxParser(fields, index.schema) query = parser.parse(q) filters = search_args.setdefault("filter", None) filters = [filters] if filters is not None else [] del search_args["filter"] if not hasattr(g, "is_manager") or not g.is_manager: # security access filter user = current_user roles = {indexable_role(user)} if not user.is_anonymous: roles.add(indexable_role(Anonymous)) roles.add(indexable_role(Authenticated)) roles |= {indexable_role(r) for r in security.get_roles(user)} filter_q = wq.Or( [wq.Term("allowed_roles_and_users", role) for role in roles] ) filters.append(filter_q) object_types = set(object_types) for m in Models: object_type = m.entity_type if not object_type: continue object_types.add(object_type) if object_types: object_types &= self.app_state.indexed_fqcn else: # ensure we don't show content types previously indexed but not yet # cleaned from index object_types = self.app_state.indexed_fqcn # limit object_type filter_q = wq.Or([wq.Term("object_type", t) for t in object_types]) filters.append(filter_q) for func in self.app_state.search_filter_funcs: filter_q = func() if filter_q is not None: filters.append(filter_q) if filters: filter_q = wq.And(filters) if len(filters) > 1 else filters[0] # search_args['filter'] = filter_q query = filter_q & query if facet_by_type: if not object_types: object_types = [t[0] for t in self.searchable_object_types()] # limit number of documents to score, per object type collapse_limit = 5 search_args["groupedby"] = "object_type" search_args["collapse"] = "object_type" search_args["collapse_limit"] = collapse_limit search_args["limit"] = search_args["collapse_limit"] * max( len(object_types), 1 ) with index.searcher(closereader=False) as searcher: # 'closereader' is needed, else results cannot by used outside 'with' # statement results = searcher.search(query, **search_args) if facet_by_type: positions = { doc_id: pos for pos, doc_id in enumerate(i[1] for i in results.top_n) } sr = results results = {} for typename, doc_ids in sr.groups("object_type").items(): results[typename] = [ sr[positions[oid]] for oid in doc_ids[:collapse_limit] ] return results
def search( self, q, index="default", fields=None, Models=(), object_types=(), prefix=True, facet_by_type=None, **search_args, ): """Interface to search indexes. :param q: unparsed search string. :param index: name of index to use for search. :param fields: optionnal mapping of field names -> boost factor? :param Models: list of Model classes to limit search on. :param object_types: same as `Models`, but directly the model string. :param prefix: enable or disable search by prefix :param facet_by_type: if set, returns a dict of object_type: results with a max of `limit` matches for each type. :param search_args: any valid parameter for :meth:`whoosh.searching.Search.search`. This includes `limit`, `groupedby` and `sortedby` """ index = self.app_state.indexes[index] if not fields: fields = self.default_search_fields valid_fields = { f for f in index.schema.names(check_names=fields) if prefix or not f.endswith("_prefix") } for invalid in set(fields) - valid_fields: del fields[invalid] parser = DisMaxParser(fields, index.schema) query = parser.parse(q) filters = search_args.setdefault("filter", None) filters = [filters] if filters is not None else [] del search_args["filter"] if not hasattr(g, "is_manager") or not g.is_manager: # security access filter user = current_user roles = {indexable_role(user)} if not user.is_anonymous: roles.add(indexable_role(Anonymous)) roles.add(indexable_role(Authenticated)) roles |= {indexable_role(r) for r in security.get_roles(user)} filter_q = wq.Or( [wq.Term("allowed_roles_and_users", role) for role in roles] ) filters.append(filter_q) object_types = set(object_types) for m in Models: object_type = m.entity_type if not object_type: continue object_types.add(object_type) if object_types: object_types &= self.app_state.indexed_fqcn else: # ensure we don't show content types previously indexed but not yet # cleaned from index object_types = self.app_state.indexed_fqcn # limit object_type filter_q = wq.Or([wq.Term("object_type", t) for t in object_types]) filters.append(filter_q) for func in self.app_state.search_filter_funcs: filter_q = func() if filter_q is not None: filters.append(filter_q) if filters: filter_q = wq.And(filters) if len(filters) > 1 else filters[0] # search_args['filter'] = filter_q query = filter_q & query if facet_by_type: if not object_types: object_types = [t[0] for t in self.searchable_object_types()] # limit number of documents to score, per object type collapse_limit = 5 search_args["groupedby"] = "object_type" search_args["collapse"] = "object_type" search_args["collapse_limit"] = collapse_limit search_args["limit"] = search_args["collapse_limit"] * max( len(object_types), 1 ) with index.searcher(closereader=False) as searcher: # 'closereader' is needed, else results cannot by used outside 'with' # statement results = searcher.search(query, **search_args) if facet_by_type: positions = { doc_id: pos for pos, doc_id in enumerate(i[1] for i in results.top_n) } sr = results results = {} for typename, doc_ids in sr.groups("object_type").items(): results[typename] = [ sr[positions[oid]] for oid in doc_ids[:collapse_limit] ] return results