Exemplo n.º 1
0
def search(q, **search_args):
    """Interface to search indexes.

    :param q: unparsed search string.
    :param search_args: any valid parameter for
        :meth:`whoosh.searching.Search.search`. This includes `limit`,
        `groupedby` and `sortedby`
    """
    index = whoosh.index

    fields = {"name": 1.5, "text": 1.0}
    parser = DisMaxParser(fields, index.schema)
    query = parser.parse(q)

    # security access filter
    user = get_current_profile()

    if not is_membre_dri(user):
        pass
        # TODO
        # roles = {f"user:{user.id}", "all"}
        # for role in user.get_roles():
        #     if role.type in [RoleType.DIRECTION.value, RoleType.GDL.value]:
        #         structure = role.context
        #         structures = [structure] + structure.descendants()
        #         roles |= {f"org:{s.id}" for s in structures}
        #
        # terms = [wq.Term("allowed_roles_and_users", role) for role in roles]
        # query &= wq.Or(terms)

    with index.searcher(closereader=False) as searcher:
        # 'closereader' is needed, else results cannot by used outside 'with'
        # statement
        return searcher.search(query, **search_args)
Exemplo n.º 2
0
def search_naics_disjoint(string, dirpath, limit=5):
    indx = get_index(dirpath)
    string = prepare_query(string)

    boosts = {"name": 5, "description": 2}

    qp = DisMaxParser(boosts, indx.schema)

    with indx.searcher() as searcher:
        return [add_score(obj) for obj in searcher.search(qp.parse(string))]
Exemplo n.º 3
0
Arquivo: gs1.py Projeto: cmutel/perdu
def search_gs1_disjoint(string, dirpath, limit=5):
    indx = get_index(dirpath)
    string = prepare_query(string)

    boosts = {"brick": 5, "klass": 3, "family": 2, "segment": 1, "definition": 2}

    qp = DisMaxParser(boosts, indx.schema)

    with indx.searcher() as searcher:
        return [add_score(obj) for obj in searcher.search(qp.parse(string))]
Exemplo n.º 4
0
    def search(
        self,
        q,
        index="default",
        fields=None,
        Models=(),
        object_types=(),
        prefix=True,
        facet_by_type=None,
        **search_args
    ):
        """Interface to search indexes.

        :param q: unparsed search string.
        :param index: name of index to use for search.
        :param fields: optionnal mapping of field names -> boost factor?
        :param Models: list of Model classes to limit search on.
        :param object_types: same as `Models`, but directly the model string.
        :param prefix: enable or disable search by prefix
        :param facet_by_type: if set, returns a dict of object_type: results with a
             max of `limit` matches for each type.
        :param search_args: any valid parameter for
            :meth:`whoosh.searching.Search.search`. This includes `limit`,
            `groupedby` and `sortedby`
        """
        index = self.app_state.indexes[index]
        if not fields:
            fields = self.default_search_fields

        valid_fields = {
            f
            for f in index.schema.names(check_names=fields)
            if prefix or not f.endswith("_prefix")
        }

        for invalid in set(fields) - valid_fields:
            del fields[invalid]

        parser = DisMaxParser(fields, index.schema)
        query = parser.parse(q)

        filters = search_args.setdefault("filter", None)
        filters = [filters] if filters is not None else []
        del search_args["filter"]

        if not hasattr(g, "is_manager") or not g.is_manager:
            # security access filter
            user = current_user
            roles = {indexable_role(user)}
            if not user.is_anonymous:
                roles.add(indexable_role(Anonymous))
                roles.add(indexable_role(Authenticated))
                roles |= {indexable_role(r) for r in security.get_roles(user)}

            filter_q = wq.Or(
                [wq.Term("allowed_roles_and_users", role) for role in roles]
            )
            filters.append(filter_q)

        object_types = set(object_types)
        for m in Models:
            object_type = m.entity_type
            if not object_type:
                continue
            object_types.add(object_type)

        if object_types:
            object_types &= self.app_state.indexed_fqcn
        else:
            # ensure we don't show content types previously indexed but not yet
            # cleaned from index
            object_types = self.app_state.indexed_fqcn

        # limit object_type
        filter_q = wq.Or([wq.Term("object_type", t) for t in object_types])
        filters.append(filter_q)

        for func in self.app_state.search_filter_funcs:
            filter_q = func()
            if filter_q is not None:
                filters.append(filter_q)

        if filters:
            filter_q = wq.And(filters) if len(filters) > 1 else filters[0]
            # search_args['filter'] = filter_q
            query = filter_q & query

        if facet_by_type:
            if not object_types:
                object_types = [t[0] for t in self.searchable_object_types()]

            # limit number of documents to score, per object type
            collapse_limit = 5
            search_args["groupedby"] = "object_type"
            search_args["collapse"] = "object_type"
            search_args["collapse_limit"] = collapse_limit
            search_args["limit"] = search_args["collapse_limit"] * max(
                len(object_types), 1
            )

        with index.searcher(closereader=False) as searcher:
            # 'closereader' is needed, else results cannot by used outside 'with'
            # statement
            results = searcher.search(query, **search_args)

            if facet_by_type:
                positions = {
                    doc_id: pos
                    for pos, doc_id in enumerate(i[1] for i in results.top_n)
                }
                sr = results
                results = {}
                for typename, doc_ids in sr.groups("object_type").items():
                    results[typename] = [
                        sr[positions[oid]] for oid in doc_ids[:collapse_limit]
                    ]

            return results
Exemplo n.º 5
0
    def search(
        self,
        q,
        index="default",
        fields=None,
        Models=(),
        object_types=(),
        prefix=True,
        facet_by_type=None,
        **search_args,
    ):
        """Interface to search indexes.

        :param q: unparsed search string.
        :param index: name of index to use for search.
        :param fields: optionnal mapping of field names -> boost factor?
        :param Models: list of Model classes to limit search on.
        :param object_types: same as `Models`, but directly the model string.
        :param prefix: enable or disable search by prefix
        :param facet_by_type: if set, returns a dict of object_type: results with a
             max of `limit` matches for each type.
        :param search_args: any valid parameter for
            :meth:`whoosh.searching.Search.search`. This includes `limit`,
            `groupedby` and `sortedby`
        """
        index = self.app_state.indexes[index]
        if not fields:
            fields = self.default_search_fields

        valid_fields = {
            f
            for f in index.schema.names(check_names=fields)
            if prefix or not f.endswith("_prefix")
        }

        for invalid in set(fields) - valid_fields:
            del fields[invalid]

        parser = DisMaxParser(fields, index.schema)
        query = parser.parse(q)

        filters = search_args.setdefault("filter", None)
        filters = [filters] if filters is not None else []
        del search_args["filter"]

        if not hasattr(g, "is_manager") or not g.is_manager:
            # security access filter
            user = current_user
            roles = {indexable_role(user)}
            if not user.is_anonymous:
                roles.add(indexable_role(Anonymous))
                roles.add(indexable_role(Authenticated))
                roles |= {indexable_role(r) for r in security.get_roles(user)}

            filter_q = wq.Or(
                [wq.Term("allowed_roles_and_users", role) for role in roles]
            )
            filters.append(filter_q)

        object_types = set(object_types)
        for m in Models:
            object_type = m.entity_type
            if not object_type:
                continue
            object_types.add(object_type)

        if object_types:
            object_types &= self.app_state.indexed_fqcn
        else:
            # ensure we don't show content types previously indexed but not yet
            # cleaned from index
            object_types = self.app_state.indexed_fqcn

        # limit object_type
        filter_q = wq.Or([wq.Term("object_type", t) for t in object_types])
        filters.append(filter_q)

        for func in self.app_state.search_filter_funcs:
            filter_q = func()
            if filter_q is not None:
                filters.append(filter_q)

        if filters:
            filter_q = wq.And(filters) if len(filters) > 1 else filters[0]
            # search_args['filter'] = filter_q
            query = filter_q & query

        if facet_by_type:
            if not object_types:
                object_types = [t[0] for t in self.searchable_object_types()]

            # limit number of documents to score, per object type
            collapse_limit = 5
            search_args["groupedby"] = "object_type"
            search_args["collapse"] = "object_type"
            search_args["collapse_limit"] = collapse_limit
            search_args["limit"] = search_args["collapse_limit"] * max(
                len(object_types), 1
            )

        with index.searcher(closereader=False) as searcher:
            # 'closereader' is needed, else results cannot by used outside 'with'
            # statement
            results = searcher.search(query, **search_args)

            if facet_by_type:
                positions = {
                    doc_id: pos
                    for pos, doc_id in enumerate(i[1] for i in results.top_n)
                }
                sr = results
                results = {}
                for typename, doc_ids in sr.groups("object_type").items():
                    results[typename] = [
                        sr[positions[oid]] for oid in doc_ids[:collapse_limit]
                    ]

            return results