Exemplo n.º 1
0
    def finalize_query(self, query, fltr, session, qstring=None, order_by=None):
        search_query = None
        ranked = False
        if qstring is not None:
            ft_query = and_(SearchObjectIndex.so_uuid == ObjectInfoIndex.uuid, query)
            q = session.query(
                ObjectInfoIndex,
                func.ts_rank_cd(
                    SearchObjectIndex.search_vector,
                    func.plainto_tsquery(qstring)
                ).label('rank'))\
            .options(subqueryload(ObjectInfoIndex.search_object))\
            .options(subqueryload(ObjectInfoIndex.properties)).filter(ft_query)

            query_result = search(q, qstring, vector=SearchObjectIndex.search_vector, sort=order_by is None, regconfig='simple')
            ranked = True
        else:
            query_result = session.query(ObjectInfoIndex).options(subqueryload(ObjectInfoIndex.properties)).filter(query)

        if order_by is not None:
            query_result = query_result.order_by(order_by)
        elif ranked is True:
            query_result = query_result.order_by(
                desc(
                    func.ts_rank_cd(
                        SearchObjectIndex.search_vector,
                        func.to_tsquery(search_query)
                    )
                )
            )
        if 'limit' in fltr:
            query_result = query_result.limit(fltr['limit'])
        return query_result, ranked
Exemplo n.º 2
0
 def handle_info_json(self, model, info, fulltextsearch=None):
     """Handle info JSON query filter."""
     clauses = []
     headlines = []
     order_by_ranks = []
     if '::' in info:
         pairs = info.split('|')
         for pair in pairs:
             if pair != '':
                 k, v = pair.split("::")
                 if fulltextsearch == '1':
                     vector = _entity_descriptor(model, 'info')[k].astext
                     clause = func.to_tsvector(vector).match(v)
                     clauses.append(clause)
                     if len(headlines) == 0:
                         headline = func.ts_headline(
                             self.language, vector, func.to_tsquery(v))
                         headlines.append(headline)
                         order = func.ts_rank_cd(func.to_tsvector(vector),
                                                 func.to_tsquery(v),
                                                 4).label('rank')
                         order_by_ranks.append(order)
                 else:
                     clauses.append(
                         _entity_descriptor(model, 'info')[k].astext == v)
     else:
         info = json.dumps(info)
         clauses.append(
             cast(_entity_descriptor(model, 'info'), Text) == info)
     return clauses, headlines, order_by_ranks
Exemplo n.º 3
0
def search(query, search_query, vector=None, regconfig=None, sort=False):
    """
    Search given query with full text search.

    :param search_query: the search query
    :param vector: search vector to use
    :param regconfig: postgresql regconfig to be used
    :param sort: order results by relevance (quality of hit)
    """
    if not search_query.strip():
        return query

    if vector is None:
        entity = query._entities[0].entity_zero.class_
        search_vectors = inspect_search_vectors(entity)
        vector = search_vectors[0]

    if regconfig is None:
        regconfig = search_manager.options["regconfig"]

    query = query.filter(
        vector.op("@@")(func.tsq_parse(regconfig, search_query)))
    if sort:
        query = query.order_by(
            desc(func.ts_rank_cd(vector, func.tsq_parse(search_query))))

    return query.params(term=search_query)
Exemplo n.º 4
0
 def handle_info_json(self, model, info, fulltextsearch=None):
     """Handle info JSON query filter."""
     clauses = []
     headlines = []
     order_by_ranks = []
     if '::' in info:
         pairs = info.split('|')
         for pair in pairs:
             if pair != '':
                 k,v = pair.split("::")
                 if fulltextsearch == '1':
                     vector = _entity_descriptor(model, 'info')[k].astext
                     clause = func.to_tsvector(vector).match(v)
                     clauses.append(clause)
                     if len(headlines) == 0:
                         headline = func.ts_headline(self.language, vector, func.to_tsquery(v))
                         headlines.append(headline)
                         order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank')
                         order_by_ranks.append(order)
                 else:
                     clauses.append(_entity_descriptor(model,
                                                       'info')[k].astext == v)
     else:
         info = json.dumps(info)
         clauses.append(cast(_entity_descriptor(model, 'info'),
                             Text) == info)
     return clauses, headlines, order_by_ranks
Exemplo n.º 5
0
def create_fulltext_ingredient_search(ingredients,
                                      limit=DEFAULT_SEARCH_RESULT_SIZE,
                                      op=and_,
                                      backup_search=False):
    """
    Function to create a fulltext query to filter out all recipes not containing <min_ingredients> ingredients. Ranks by
    recipe that contains the most ingredients, and then ranks by match of ingredients list to the title. This could
    probably be improved by adding additional search criteria similar to the previous fulltext search approach in
    create_fulltext_search_query.
    :param ingredients: List<string> ["onion", "chicken", "peppers"]
    :param limit: number of recipes to return
    :param order_by: the operation/func with which to order searches
    :return: List<Recipe>
    """
    ingredients = _clean_and_stringify_ingredients_query(ingredients)
    return db.session.query(Recipe). \
        join(IngredientRecipe). \
        join(Ingredient). \
        filter(
            op(
                *_apply_dynamic_fulltext_filters(ingredients, backup_search=backup_search)
            )
        ). \
        group_by(Recipe.pk). \
        order_by(desc(
            func.ts_rank_cd(
                func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.title)),
                func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)),
                32
            ) * RECIPE_TITLE_WEIGHT +
            func.ts_rank_cd(
                func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)),
                func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)),
                32
            ) * RECIPE_INGREDIENTS_WEIGHT +
            func.sum(
                func.ts_rank(
                    func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Ingredient.name)),
                    func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients))
                )
            ) * INGREDIENTS_WEIGHT +
            func.ts_rank_cd(
                func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)),
                func.to_tsquery(FULLTEXT_INDEX_CONFIG, '&'.join(i for i in ingredients)),
                32
            ) * RECIPE_MODIFIERS_WEIGHT
        )).limit(limit).all()
Exemplo n.º 6
0
def _full_text_search_query(session, model, tsquery):
    normalisation = 2  # TODO tweak
    rank = func.ts_rank_cd(model.search_tsv, tsquery, normalisation)
    fields = [
        literal(model.__table__.name[:-1]).label("type"),
        model.id.label("id"),
        rank.label("rank")
    ]
    return session.query(*fields).filter(model.search_tsv.op('@@')(tsquery))
Exemplo n.º 7
0
    def finalize_query(self,
                       query,
                       fltr,
                       session,
                       qstring=None,
                       order_by=None):
        search_query = None
        if qstring is not None:
            search_query = parse_search_query(qstring)
            ft_query = and_(
                SearchObjectIndex.search_vector.match(
                    search_query,
                    sort=order_by is None,
                    postgresql_regconfig='simple'),
                SearchObjectIndex.so_uuid == ObjectInfoIndex.uuid, query)
        else:
            ft_query = query

        ranked = False

        if search_query is not None:
            query_result = session.query(
                ObjectInfoIndex,
                func.ts_rank_cd(
                    SearchObjectIndex.search_vector,
                    func.to_tsquery(search_query)).label('rank')).options(
                        joinedload(ObjectInfoIndex.search_object)).options(
                            joinedload(
                                ObjectInfoIndex.properties)).filter(ft_query)
            ranked = True
        else:
            query_result = session.query(ObjectInfoIndex).options(
                joinedload(ObjectInfoIndex.properties)).filter(ft_query)

        if order_by is not None:
            query_result = query_result.order_by(order_by)
        elif ranked is True:
            query_result = query_result.order_by(
                desc(
                    func.ts_rank_cd(SearchObjectIndex.search_vector,
                                    func.to_tsquery(search_query))))
        if 'limit' in fltr:
            query_result = query_result.limit(fltr['limit'])
        return query_result, ranked
Exemplo n.º 8
0
 def get_order_by(self, query_select=''):
     column_name = self.request.GET.get('column', 'username')
     order = self.request.GET.get('order', 'asc')
     column = getattr(UserProfile, column_name, None)
     if column is None:
         column = func.ts_rank_cd(UserProfile.searchable_text,
                                  select([query_select.c.query]))
     try:
         order_func = {'asc': asc, 'desc': desc}[order]
     except KeyError:
         order_func = asc
     return order_func(column)
Exemplo n.º 9
0
 def get_order_by(self, query_select=''):
     column_name = self.request.GET.get('column', 'username')
     order = self.request.GET.get('order', 'asc')
     column = getattr(UserProfile, column_name, None)
     if column is None:
         column = func.ts_rank_cd(UserProfile.searchable_text,
                                   select([query_select.c.query]))
     try:
         order_func = {'asc': asc, 'desc': desc}[order]
     except KeyError:
         order_func = asc
     return order_func(column)
Exemplo n.º 10
0
def search(*, query_str: str, query: Query, model: str, sort=False):
    """Perform a search based on the query."""
    search_model = get_class_by_tablename(model)

    if not query_str.strip():
        return query

    vector = search_model.search_vector

    query = query.filter(vector.op("@@")(func.tsq_parse(query_str)))
    if sort:
        query = query.order_by(
            desc(func.ts_rank_cd(vector, func.tsq_parse(query_str))))

    return query.params(term=query_str)
Exemplo n.º 11
0
def search_products_by_param(search_query: str,
                             product_id: int = None,
                             category_id: int = None) -> list or None:
    vector = inspect_search_vectors(Product)[0]
    try:
        result = db.session.query(Product).filter(
            Product.search_vector.match(search_query))
    except exc.ProgrammingError:
        return None
    if product_id:
        result = result.filter_by(producer_id=product_id)
    if category_id:
        result = result.filter_by(category_id=category_id)
    return result.order_by(
        desc(func.ts_rank_cd(vector, func.tsq_parse(search_query))))
Exemplo n.º 12
0
def search(*, db_session, search_query: str, model: str, sort=False):
    """Perform a search based on the query."""
    search_model = get_class_by_tablename(model)
    query = db_session.query(search_model)

    if not search_query.strip():
        return query

    vector = search_model.search_vector

    query = query.filter(vector.op("@@")(func.tsq_parse(search_query)))
    if sort:
        query = query.order_by(
            desc(func.ts_rank_cd(vector, func.tsq_parse(search_query))))

    return query.params(term=search_query)
Exemplo n.º 13
0
    def handle_info_json(self, model, info, fulltextsearch=None):
        """Handle info JSON query filter."""
        clauses = []
        headlines = []
        order_by_ranks = []

        if info and '::' in info:
            pairs = info.split('|')
            for pair in pairs:
                if pair != '':
                    k, v = pair.split("::")
                    if fulltextsearch == '1':
                        vector = _entity_descriptor(model, 'info')[k].astext
                        clause = func.to_tsvector(vector).match(v)
                        clauses.append(clause)
                        if len(headlines) == 0:
                            headline = func.ts_headline(
                                self.language,
                                vector,
                                func.to_tsquery(v))
                            headlines.append(headline)
                            order = func.ts_rank_cd(
                                func.to_tsvector(vector),
                                func.to_tsquery(v), 4).label('rank')
                            order_by_ranks.append(order)
                    else:
                        clauses.append(
                            _entity_descriptor(model, 'info')[k].astext == v)
        else:
            if type(info) == dict:
                clauses.append(_entity_descriptor(model, 'info') == info)
            if type(info) == str or type(info) == str:
                try:
                    info = json.loads(info)
                    if type(info) == int or type(info) == float:
                        info = '"%s"' % info
                except ValueError:
                    info = '"%s"' % info
                clauses.append(_entity_descriptor(model,
                                                  'info').contains(info))
        return clauses, headlines, order_by_ranks
Exemplo n.º 14
0
 def resolve_search(self, info, title, types=None, result=None):
     tsquery = func.to_tsquery(f'\'{title}\'')
     query = (
         TitleModel
         .query
         .filter(TitleModel.title_search_col.op('@@')(tsquery))
     )
     query = (
         query.filter(TitleModel._type.in_(types))
         if types is not None else query
     )
     query = (
         query
         .join(TitleModel.rating)
         .order_by(
             desc(RatingModel.numVotes >= 1000),
             desc(TitleModel.primaryTitle.ilike(title)),
             desc(RatingModel.numVotes),
             desc(func.ts_rank_cd(TitleModel.title_search_col, tsquery, 1))
         )
         .limit(result)
     )
     return query
Exemplo n.º 15
0
    def handle_info_json(self, model, info, fulltextsearch=None):
        """Handle info JSON query filter."""
        clauses = []
        headlines = []
        order_by_ranks = []

        if info and '::' in info:
            pairs = info.split('|')
            for pair in pairs:
                if pair != '':
                    k,v = pair.split("::")
                    if fulltextsearch == '1':
                        vector = _entity_descriptor(model, 'info')[k].astext
                        clause = func.to_tsvector(vector).match(v)
                        clauses.append(clause)
                        if len(headlines) == 0:
                            headline = func.ts_headline(self.language, vector, func.to_tsquery(v))
                            headlines.append(headline)
                            order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank')
                            order_by_ranks.append(order)
                    else:
                        clauses.append(_entity_descriptor(model,
                                                          'info')[k].astext == v)
        else:
            if type(info) == dict:
                clauses.append(_entity_descriptor(model, 'info') == info)
            if type(info) == str or type(info) == unicode:
                try:
                    info = json.loads(info)
                    if type(info) == int or type(info) == float:
                        info = '"%s"' % info
                except ValueError:
                    info = '"%s"' % info
                clauses.append(_entity_descriptor(model,
                                                  'info').contains(info))
        return clauses, headlines, order_by_ranks
Exemplo n.º 16
0
    def fulltextsearch(self):
        lang = locale_negotiator(self.request)

        try:
            language = self.languages[lang]
        except KeyError:
            return HTTPInternalServerError(
                detail="%s not defined in languages" % lang)

        if "query" not in self.request.params:
            return HTTPBadRequest(detail="no query")
        terms = self.request.params.get("query")

        maxlimit = self.settings.get("maxlimit", 200)

        try:
            limit = int(self.request.params.get(
                "limit",
                self.settings.get("defaultlimit", 30)))
        except ValueError:
            return HTTPBadRequest(detail="limit value is incorrect")
        if limit > maxlimit:
            limit = maxlimit

        try:
            partitionlimit = int(self.request.params.get("partitionlimit", 0))
        except ValueError:
            return HTTPBadRequest(detail="partitionlimit value is incorrect")
        if partitionlimit > maxlimit:
            partitionlimit = maxlimit

        terms_ts = "&".join(w + ":*"
                            for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "")
        _filter = FullTextSearch.ts.op("@@")(func.to_tsquery(language, terms_ts))

        if self.request.user is None or self.request.user.role is None:
            _filter = and_(_filter, FullTextSearch.public.is_(True))
        else:
            _filter = and_(
                _filter,
                or_(
                    FullTextSearch.public.is_(True),
                    FullTextSearch.role_id.is_(None),
                    FullTextSearch.role_id == self.request.user.role.id
                )
            )

        if "interface" in self.request.params:
            _filter = and_(_filter, or_(
                FullTextSearch.interface_id.is_(None),
                FullTextSearch.interface_id == self._get_interface_id(
                    self.request.params["interface"]
                )
            ))
        else:
            _filter = and_(_filter, FullTextSearch.interface_id.is_(None))

        _filter = and_(_filter, or_(
            FullTextSearch.lang.is_(None),
            FullTextSearch.lang == lang,
        ))

        # The numbers used in ts_rank_cd() below indicate a normalization method.
        # Several normalization methods can be combined using |.
        # 2 divides the rank by the document length
        # 8 divides the rank by the number of unique words in document
        # By combining them, shorter results seem to be preferred over longer ones
        # with the same ratio of matching words. But this relies only on testing it
        # and on some assumptions about how it might be calculated
        # (the normalization is applied two times with the combination of 2 and 8,
        # so the effect on at least the one-word-results is therefore stronger).
        rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8)

        if partitionlimit:
            # Here we want to partition the search results based on
            # layer_name and limit each partition.
            row_number = func.row_number().over(
                partition_by=FullTextSearch.layer_name,
                order_by=(desc(rank), FullTextSearch.label)
            ).label("row_number")
            subq = DBSession.query(FullTextSearch) \
                .add_columns(row_number).filter(_filter).subquery()
            query = DBSession.query(
                subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name,
                subq.c.the_geom, subq.c.actions
            )
            query = query.filter(subq.c.row_number <= partitionlimit)
        else:
            query = DBSession.query(FullTextSearch).filter(_filter)
            query = query.order_by(desc(rank))
            query = query.order_by(FullTextSearch.label)

        query = query.limit(limit)
        objs = query.all()

        features = []
        for o in objs:
            properties = {
                "label": o.label,
            }
            if o.layer_name is not None:
                properties["layer_name"] = o.layer_name
            if o.params is not None:
                properties["params"] = o.params
            if o.actions is not None:
                properties["actions"] = o.actions
            if o.actions is None and o.layer_name is not None:
                properties["actions"] = [{
                    "action": "add_layer",
                    "data": o.layer_name,
                }]

            if o.the_geom is not None:
                geom = to_shape(o.the_geom)
                feature = Feature(
                    id=o.id, geometry=geom,
                    properties=properties, bbox=geom.bounds
                )
                features.append(feature)
            else:
                feature = Feature(
                    id=o.id, properties=properties
                )
                features.append(feature)

        # TODO: add callback function if provided in self.request, else return geojson
        return FeatureCollection(features)
Exemplo n.º 17
0
    def fulltextsearch(self):
        lang = locale_negotiator(self.request)

        try:
            language = self.languages[lang]
        except KeyError:
            return HTTPInternalServerError(
                detail="{0!s} not defined in languages".format(lang))

        if "query" not in self.request.params:
            return HTTPBadRequest(detail="no query")
        terms = self.request.params.get("query")

        maxlimit = self.settings.get("maxlimit", 200)

        try:
            limit = int(
                self.request.params.get("limit",
                                        self.settings.get("defaultlimit", 30)))
        except ValueError:
            return HTTPBadRequest(detail="limit value is incorrect")
        if limit > maxlimit:
            limit = maxlimit

        try:
            partitionlimit = int(self.request.params.get("partitionlimit", 0))
        except ValueError:
            return HTTPBadRequest(detail="partitionlimit value is incorrect")
        if partitionlimit > maxlimit:
            partitionlimit = maxlimit

        terms_ts = "&".join(
            w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ")
            if w != "")
        _filter = FullTextSearch.ts.op("@@")(func.to_tsquery(
            language, terms_ts))

        if self.request.user is None:
            _filter = and_(_filter, FullTextSearch.public.is_(True))
        else:
            _filter = and_(
                _filter,
                or_(
                    FullTextSearch.public.is_(True),
                    FullTextSearch.role_id.is_(None),
                    FullTextSearch.role_id.in_(
                        [r.id for r in self.request.user.roles]),
                ),
            )

        if "interface" in self.request.params:
            _filter = and_(
                _filter,
                or_(
                    FullTextSearch.interface_id.is_(None),
                    FullTextSearch.interface_id == self._get_interface_id(
                        self.request.params["interface"]),
                ),
            )
        else:
            _filter = and_(_filter, FullTextSearch.interface_id.is_(None))

        _filter = and_(
            _filter,
            or_(FullTextSearch.lang.is_(None), FullTextSearch.lang == lang))

        rank_system = self.request.params.get("ranksystem")
        if rank_system == "ts_rank_cd":
            # The numbers used in ts_rank_cd() below indicate a normalization method.
            # Several normalization methods can be combined using |.
            # 2 divides the rank by the document length
            # 8 divides the rank by the number of unique words in document
            # By combining them, shorter results seem to be preferred over longer ones
            # with the same ratio of matching words. But this relies only on testing it
            # and on some assumptions about how it might be calculated
            # (the normalization is applied two times with the combination of 2 and 8,
            # so the effect on at least the one-word-results is therefore stronger).
            rank = func.ts_rank_cd(FullTextSearch.ts,
                                   func.to_tsquery(language, terms_ts), 2 | 8)
        else:
            # Use similarity ranking system from module pg_trgm.
            rank = func.similarity(FullTextSearch.label, terms)

        if partitionlimit:
            # Here we want to partition the search results based on
            # layer_name and limit each partition.
            row_number = (func.row_number().over(
                partition_by=FullTextSearch.layer_name,
                order_by=(desc(rank),
                          FullTextSearch.label)).label("row_number"))
            subq = DBSession.query(FullTextSearch).add_columns(
                row_number).filter(_filter).subquery()
            query = DBSession.query(subq.c.id, subq.c.label, subq.c.params,
                                    subq.c.layer_name, subq.c.the_geom,
                                    subq.c.actions)
            query = query.filter(subq.c.row_number <= partitionlimit)
        else:
            query = DBSession.query(FullTextSearch).filter(_filter)
            query = query.order_by(desc(rank))
            query = query.order_by(FullTextSearch.label)

        query = query.limit(limit)
        objs = query.all()

        features = []
        for o in objs:
            properties = {"label": o.label}
            if o.layer_name is not None:
                properties["layer_name"] = o.layer_name
            if o.params is not None:
                properties["params"] = o.params
            if o.actions is not None:
                properties["actions"] = o.actions
            if o.actions is None and o.layer_name is not None:
                properties["actions"] = [{
                    "action": "add_layer",
                    "data": o.layer_name
                }]

            if o.the_geom is not None:
                geom = to_shape(o.the_geom)
                feature = Feature(id=o.id,
                                  geometry=geom,
                                  properties=properties,
                                  bbox=geom.bounds)
                features.append(feature)
            else:
                feature = Feature(id=o.id, properties=properties)
                features.append(feature)

        return FeatureCollection(features)