def finalize_query(self, query, fltr, session, qstring=None, order_by=None): search_query = None ranked = False if qstring is not None: ft_query = and_(SearchObjectIndex.so_uuid == ObjectInfoIndex.uuid, query) q = session.query( ObjectInfoIndex, func.ts_rank_cd( SearchObjectIndex.search_vector, func.plainto_tsquery(qstring) ).label('rank'))\ .options(subqueryload(ObjectInfoIndex.search_object))\ .options(subqueryload(ObjectInfoIndex.properties)).filter(ft_query) query_result = search(q, qstring, vector=SearchObjectIndex.search_vector, sort=order_by is None, regconfig='simple') ranked = True else: query_result = session.query(ObjectInfoIndex).options(subqueryload(ObjectInfoIndex.properties)).filter(query) if order_by is not None: query_result = query_result.order_by(order_by) elif ranked is True: query_result = query_result.order_by( desc( func.ts_rank_cd( SearchObjectIndex.search_vector, func.to_tsquery(search_query) ) ) ) if 'limit' in fltr: query_result = query_result.limit(fltr['limit']) return query_result, ranked
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k, v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline( self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append( _entity_descriptor(model, 'info')[k].astext == v) else: info = json.dumps(info) clauses.append( cast(_entity_descriptor(model, 'info'), Text) == info) return clauses, headlines, order_by_ranks
def search(query, search_query, vector=None, regconfig=None, sort=False): """ Search given query with full text search. :param search_query: the search query :param vector: search vector to use :param regconfig: postgresql regconfig to be used :param sort: order results by relevance (quality of hit) """ if not search_query.strip(): return query if vector is None: entity = query._entities[0].entity_zero.class_ search_vectors = inspect_search_vectors(entity) vector = search_vectors[0] if regconfig is None: regconfig = search_manager.options["regconfig"] query = query.filter( vector.op("@@")(func.tsq_parse(regconfig, search_query))) if sort: query = query.order_by( desc(func.ts_rank_cd(vector, func.tsq_parse(search_query)))) return query.params(term=search_query)
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k,v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline(self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append(_entity_descriptor(model, 'info')[k].astext == v) else: info = json.dumps(info) clauses.append(cast(_entity_descriptor(model, 'info'), Text) == info) return clauses, headlines, order_by_ranks
def create_fulltext_ingredient_search(ingredients, limit=DEFAULT_SEARCH_RESULT_SIZE, op=and_, backup_search=False): """ Function to create a fulltext query to filter out all recipes not containing <min_ingredients> ingredients. Ranks by recipe that contains the most ingredients, and then ranks by match of ingredients list to the title. This could probably be improved by adding additional search criteria similar to the previous fulltext search approach in create_fulltext_search_query. :param ingredients: List<string> ["onion", "chicken", "peppers"] :param limit: number of recipes to return :param order_by: the operation/func with which to order searches :return: List<Recipe> """ ingredients = _clean_and_stringify_ingredients_query(ingredients) return db.session.query(Recipe). \ join(IngredientRecipe). \ join(Ingredient). \ filter( op( *_apply_dynamic_fulltext_filters(ingredients, backup_search=backup_search) ) ). \ group_by(Recipe.pk). \ order_by(desc( func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.title)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)), 32 ) * RECIPE_TITLE_WEIGHT + func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)), 32 ) * RECIPE_INGREDIENTS_WEIGHT + func.sum( func.ts_rank( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Ingredient.name)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)) ) ) * INGREDIENTS_WEIGHT + func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '&'.join(i for i in ingredients)), 32 ) * RECIPE_MODIFIERS_WEIGHT )).limit(limit).all()
def _full_text_search_query(session, model, tsquery): normalisation = 2 # TODO tweak rank = func.ts_rank_cd(model.search_tsv, tsquery, normalisation) fields = [ literal(model.__table__.name[:-1]).label("type"), model.id.label("id"), rank.label("rank") ] return session.query(*fields).filter(model.search_tsv.op('@@')(tsquery))
def finalize_query(self, query, fltr, session, qstring=None, order_by=None): search_query = None if qstring is not None: search_query = parse_search_query(qstring) ft_query = and_( SearchObjectIndex.search_vector.match( search_query, sort=order_by is None, postgresql_regconfig='simple'), SearchObjectIndex.so_uuid == ObjectInfoIndex.uuid, query) else: ft_query = query ranked = False if search_query is not None: query_result = session.query( ObjectInfoIndex, func.ts_rank_cd( SearchObjectIndex.search_vector, func.to_tsquery(search_query)).label('rank')).options( joinedload(ObjectInfoIndex.search_object)).options( joinedload( ObjectInfoIndex.properties)).filter(ft_query) ranked = True else: query_result = session.query(ObjectInfoIndex).options( joinedload(ObjectInfoIndex.properties)).filter(ft_query) if order_by is not None: query_result = query_result.order_by(order_by) elif ranked is True: query_result = query_result.order_by( desc( func.ts_rank_cd(SearchObjectIndex.search_vector, func.to_tsquery(search_query)))) if 'limit' in fltr: query_result = query_result.limit(fltr['limit']) return query_result, ranked
def get_order_by(self, query_select=''): column_name = self.request.GET.get('column', 'username') order = self.request.GET.get('order', 'asc') column = getattr(UserProfile, column_name, None) if column is None: column = func.ts_rank_cd(UserProfile.searchable_text, select([query_select.c.query])) try: order_func = {'asc': asc, 'desc': desc}[order] except KeyError: order_func = asc return order_func(column)
def get_order_by(self, query_select=''): column_name = self.request.GET.get('column', 'username') order = self.request.GET.get('order', 'asc') column = getattr(UserProfile, column_name, None) if column is None: column = func.ts_rank_cd(UserProfile.searchable_text, select([query_select.c.query])) try: order_func = {'asc': asc, 'desc': desc}[order] except KeyError: order_func = asc return order_func(column)
def search(*, query_str: str, query: Query, model: str, sort=False): """Perform a search based on the query.""" search_model = get_class_by_tablename(model) if not query_str.strip(): return query vector = search_model.search_vector query = query.filter(vector.op("@@")(func.tsq_parse(query_str))) if sort: query = query.order_by( desc(func.ts_rank_cd(vector, func.tsq_parse(query_str)))) return query.params(term=query_str)
def search_products_by_param(search_query: str, product_id: int = None, category_id: int = None) -> list or None: vector = inspect_search_vectors(Product)[0] try: result = db.session.query(Product).filter( Product.search_vector.match(search_query)) except exc.ProgrammingError: return None if product_id: result = result.filter_by(producer_id=product_id) if category_id: result = result.filter_by(category_id=category_id) return result.order_by( desc(func.ts_rank_cd(vector, func.tsq_parse(search_query))))
def search(*, db_session, search_query: str, model: str, sort=False): """Perform a search based on the query.""" search_model = get_class_by_tablename(model) query = db_session.query(search_model) if not search_query.strip(): return query vector = search_model.search_vector query = query.filter(vector.op("@@")(func.tsq_parse(search_query))) if sort: query = query.order_by( desc(func.ts_rank_cd(vector, func.tsq_parse(search_query)))) return query.params(term=search_query)
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if info and '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k, v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline( self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd( func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append( _entity_descriptor(model, 'info')[k].astext == v) else: if type(info) == dict: clauses.append(_entity_descriptor(model, 'info') == info) if type(info) == str or type(info) == str: try: info = json.loads(info) if type(info) == int or type(info) == float: info = '"%s"' % info except ValueError: info = '"%s"' % info clauses.append(_entity_descriptor(model, 'info').contains(info)) return clauses, headlines, order_by_ranks
def resolve_search(self, info, title, types=None, result=None): tsquery = func.to_tsquery(f'\'{title}\'') query = ( TitleModel .query .filter(TitleModel.title_search_col.op('@@')(tsquery)) ) query = ( query.filter(TitleModel._type.in_(types)) if types is not None else query ) query = ( query .join(TitleModel.rating) .order_by( desc(RatingModel.numVotes >= 1000), desc(TitleModel.primaryTitle.ilike(title)), desc(RatingModel.numVotes), desc(func.ts_rank_cd(TitleModel.title_search_col, tsquery, 1)) ) .limit(result) ) return query
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if info and '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k,v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline(self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append(_entity_descriptor(model, 'info')[k].astext == v) else: if type(info) == dict: clauses.append(_entity_descriptor(model, 'info') == info) if type(info) == str or type(info) == unicode: try: info = json.loads(info) if type(info) == int or type(info) == float: info = '"%s"' % info except ValueError: info = '"%s"' % info clauses.append(_entity_descriptor(model, 'info').contains(info)) return clauses, headlines, order_by_ranks
def fulltextsearch(self): lang = locale_negotiator(self.request) try: language = self.languages[lang] except KeyError: return HTTPInternalServerError( detail="%s not defined in languages" % lang) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") terms = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int(self.request.params.get( "limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms_ts = "&".join(w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "") _filter = FullTextSearch.ts.op("@@")(func.to_tsquery(language, terms_ts)) if self.request.user is None or self.request.user.role is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id == self.request.user.role.id ) ) if "interface" in self.request.params: _filter = and_(_filter, or_( FullTextSearch.interface_id.is_(None), FullTextSearch.interface_id == self._get_interface_id( self.request.params["interface"] ) )) else: _filter = and_(_filter, FullTextSearch.interface_id.is_(None)) _filter = and_(_filter, or_( FullTextSearch.lang.is_(None), FullTextSearch.lang == lang, )) # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8) if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = func.row_number().over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label) ).label("row_number") subq = DBSession.query(FullTextSearch) \ .add_columns(row_number).filter(_filter).subquery() query = DBSession.query( subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom, subq.c.actions ) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: properties = { "label": o.label, } if o.layer_name is not None: properties["layer_name"] = o.layer_name if o.params is not None: properties["params"] = o.params if o.actions is not None: properties["actions"] = o.actions if o.actions is None and o.layer_name is not None: properties["actions"] = [{ "action": "add_layer", "data": o.layer_name, }] if o.the_geom is not None: geom = to_shape(o.the_geom) feature = Feature( id=o.id, geometry=geom, properties=properties, bbox=geom.bounds ) features.append(feature) else: feature = Feature( id=o.id, properties=properties ) features.append(feature) # TODO: add callback function if provided in self.request, else return geojson return FeatureCollection(features)
def fulltextsearch(self): lang = locale_negotiator(self.request) try: language = self.languages[lang] except KeyError: return HTTPInternalServerError( detail="{0!s} not defined in languages".format(lang)) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") terms = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int( self.request.params.get("limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms_ts = "&".join( w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "") _filter = FullTextSearch.ts.op("@@")(func.to_tsquery( language, terms_ts)) if self.request.user is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id.in_( [r.id for r in self.request.user.roles]), ), ) if "interface" in self.request.params: _filter = and_( _filter, or_( FullTextSearch.interface_id.is_(None), FullTextSearch.interface_id == self._get_interface_id( self.request.params["interface"]), ), ) else: _filter = and_(_filter, FullTextSearch.interface_id.is_(None)) _filter = and_( _filter, or_(FullTextSearch.lang.is_(None), FullTextSearch.lang == lang)) rank_system = self.request.params.get("ranksystem") if rank_system == "ts_rank_cd": # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8) else: # Use similarity ranking system from module pg_trgm. rank = func.similarity(FullTextSearch.label, terms) if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = (func.row_number().over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label)).label("row_number")) subq = DBSession.query(FullTextSearch).add_columns( row_number).filter(_filter).subquery() query = DBSession.query(subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom, subq.c.actions) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: properties = {"label": o.label} if o.layer_name is not None: properties["layer_name"] = o.layer_name if o.params is not None: properties["params"] = o.params if o.actions is not None: properties["actions"] = o.actions if o.actions is None and o.layer_name is not None: properties["actions"] = [{ "action": "add_layer", "data": o.layer_name }] if o.the_geom is not None: geom = to_shape(o.the_geom) feature = Feature(id=o.id, geometry=geom, properties=properties, bbox=geom.bounds) features.append(feature) else: feature = Feature(id=o.id, properties=properties) features.append(feature) return FeatureCollection(features)