def execute_search(app, search_results, search_term, search_terms, type, type_config, limit_results=None): with app.app_context(): for term in search_terms: if len(term) > 2: for column in type_config['columns_to_search']: # Query should return with the calculated column "score", as well as the columns defined in the configuration similarity = db.session.query( type_config['model']).with_entities( func.similarity(column, term).label('score'), *type_config['entities_to_return']).filter( column.ilike(f'%{term}%')) if type_config['has_deleted_ind']: similarity = similarity.filter_by(deleted_ind=False) similarity = similarity.order_by( desc(func.similarity(column, term))) if limit_results: similarity = similarity.limit(limit_results) similarity = similarity.all() for item in similarity: append_result(search_results, search_term, type, item, type_config['id_field'], type_config['value_field'], type_config['score_multiplier'])
def search_lectures(): search_by_tag = False lectures = [] if request.method == 'GET': return redirect(url_for('main.lectures')) if 'query_tags' in request.form: query = request.form['query_tags'] search_by_tag = True # Search in tags table tags = Tag.query \ .filter(func.similarity(Tag.name, query) > 0.6) \ .order_by(func.similarity(Tag.name, query).desc())\ .limit(3)\ .all() tag_ids = [t.id for t in tags] tag_map = TagLectureMap.query\ .filter(TagLectureMap.tag_id.in_(tag_ids))\ .limit(3)\ .all() lecture_ids = [t.lecture_id for t in tag_map] lectures = Lecture.query.filter(Lecture.id.in_(lecture_ids)).all() if not search_by_tag: query = request.form['query'] search_query = "'%s'" % query lectures = Lecture.query \ .filter( or_( Lecture.__ts_vector__.match(search_query), Lecture.title.match(search_query) ) ) \ .all() context = { 'lectures': lectures, 'query': query, 'search_by_tag': search_by_tag } return render_template('lectures.html', **context)
async def _search(db, guild_id, query, alias=None, limit=10): """ Search for a sound. :param guild_id: The guild ID. :param query: The sound to search. :param alias: True for only aliases, False for no aliases, None for any. :param limit: Maximum number of results to produce. :return: """ whereclause = and_( # The first % escapes the second % here. sound_names.c.name.op('%%')(query), sound_names.c.guild_id == guild_id ) if alias is not None: whereclause.append( sound_names.c.is_alias == alias ) similarity = func.similarity(sound_names.c.name, query).label('similarity') return await db.fetch_all( select([ sound_names.c.name, sound_names.c.is_alias, # similarity ]) .where(whereclause) .order_by(similarity.desc()) .limit(limit) )
def get_autocomplete_taxons_synthese(): """ Autocomplete taxon for web search (based on all taxon in Synthese) The request use trigram algorithm to get relevent results .. :quickref: Synthese; :query str search_name: the search name (use sql ilike statement and puts "%" for spaces) :query str regne: filter with kingdom :query str group2_inpn : filter with INPN group 2 """ search_name = request.args.get("search_name", "") q = DB.session.query( VMTaxonsSyntheseAutocomplete, func.similarity(VMTaxonsSyntheseAutocomplete.search_name, search_name).label( "idx_trgm" ), ) search_name = search_name.replace(" ", "%") q = q.filter( VMTaxonsSyntheseAutocomplete.search_name.ilike("%" + search_name + "%") ) regne = request.args.get("regne") if regne: q = q.filter(VMTaxonsSyntheseAutocomplete.regne == regne) group2_inpn = request.args.get("group2_inpn") if group2_inpn: q = q.filter(VMTaxonsSyntheseAutocomplete.group2_inpn == group2_inpn) q = q.order_by( desc(VMTaxonsSyntheseAutocomplete.cd_nom == VMTaxonsSyntheseAutocomplete.cd_ref) ) limit = request.args.get("limit", 20) data = q.order_by(desc("idx_trgm")).limit(20).all() return [d[0].as_dict() for d in data]
def listeTaxonsSearch(session, search, limit=50): """ Recherche dans la VmSearchTaxon en ilike Utilisé pour l'autocomplétion de la recherche de taxon :query SQLA_Session session :query str search : chaine de charactere pour la recherche :query int limit: limite des résultats **Returns:** list: retourne un tableau {'label':'str': 'value': 'int'} label = search_name value = cd_ref """ req = session.query( VmSearchTaxon.search_name, VmSearchTaxon.cd_ref, func.similarity(VmSearchTaxon.search_name, search).label("idx_trgm"), ).distinct() search = search.replace(" ", "%") req = (req.filter( VmSearchTaxon.search_name.ilike("%" + search + "%")).order_by( desc("idx_trgm")).order_by( VmSearchTaxon.cd_ref == VmSearchTaxon.cd_nom).limit(limit)) data = req.all() return [{"label": d[0], "value": d[1]} for d in data]
def search_order_by_similarity(cls, q: str): """ SELECT * FROM devices WHERE name % 'SEARCH_KW' ORDER BY SIMILARITY(name, 'IPHONE 11') DESC; """ query = cls.search(q) query = query.order_by(desc(func.similarity(cls.name, q))) return query
def getSearchInField(field, ilike): """ Get the first 20 result of Habref table for a given field with an ilike query Use trigram algo to add relevance .. :quickref: Habref; :params field: a Habref column :type field: str :param ilike: the ilike where expression to filter :type ilike:str :returns: Array of dict """ habref_columns = Habref.__table__.columns if field in habref_columns: value = unquote(ilike) value = value.replace(" ", "%") column = habref_columns[field] q = (DB.session.query( Habref, func.similarity(column, value).label("idx_trgm")).filter( column.ilike("%" + value + "%")).order_by(desc("idx_trgm"))) data = q.limit(20).all() return [d[0].as_dict() for d in data] else: "No column found in Taxref for {}".format(field), 500
def getSearchInField(field, ilike): """ Get the first 20 result of Taxref table for a given field with an ilike query Use trigram algo to add relevance :params field: a Taxref column :type field: str :param ilike: the ilike where expression to filter :type ilike:str :query str add_rank: join on table BibTaxrefRank and add the column 'nom_rang' to the result :query str rank_limit: return only the taxon where rank <= of the given rank (id_rang of BibTaxrefRang table) :returns: Array of dict """ taxrefColumns = Taxref.__table__.columns if field in taxrefColumns: value = unquote(ilike) value = value.replace(" ", "%") column = taxrefColumns[field] q = ( db.session.query( column, Taxref.cd_nom, Taxref.cd_ref, func.similarity(column, value).label("idx_trgm"), ) .filter(column.ilike("%" + value + "%")) .order_by(desc("idx_trgm")) ) if request.args.get("is_inbibnoms"): q = q.join(BibNoms, BibNoms.cd_nom == Taxref.cd_nom) join_on_bib_rang = False if request.args.get("add_rank"): q = q.join(BibTaxrefRangs, Taxref.id_rang == BibTaxrefRangs.id_rang) q = q.add_columns(BibTaxrefRangs.nom_rang) join_on_bib_rang = True if "rank_limit" in request.args: if not join_on_bib_rang: q = q.join(BibTaxrefRangs, Taxref.id_rang == BibTaxrefRangs.id_rang) try: sub_q_id_rang = ( db.session.query(BibTaxrefRangs.tri_rang) .filter(BibTaxrefRangs.id_rang == request.args["rank_limit"]) .one() ) except NoResultFound: return ( jsonify("No rank found for {}".format(request.args["rank_limit"])), 500, ) q = q.filter(BibTaxrefRangs.tri_rang <= sub_q_id_rang[0]) results = q.limit(20).all() return jsonify(serializeQuery(results, q.column_descriptions)) else: jsonify("No column found in Taxref for {}".format(field)), 500
def search(): search_by_tag = False questions = [] if 'query_tags' in request.form: query = request.form['query_tags'] search_by_tag = True # Search in tags table tags = Tag.query \ .filter(func.similarity(Tag.name, query) > 0.4) \ .order_by(func.similarity(Tag.name, query).desc())\ .limit(3)\ .all() tag_ids = [t.id for t in tags] tag_map = TagQuestionMap.query\ .filter(TagQuestionMap.tag_id.in_(tag_ids))\ .limit(3)\ .all() question_ids = [t.question_id for t in tag_map] questions = Question.query.filter(Question.id.in_(question_ids)).all() if not search_by_tag: query = request.form['query'] search_query = "%{}%".format(query) # order_by_str = text("LEVENSHTEIN(Question.question,'" + query "')" # questions = Question.query.filter(Question.question.ilike(search_query))\ questions = Question.query \ .order_by(func.similarity(Question.question, query).desc())\ .limit(3)\ .all() context = { 'questions': questions, 'query': query, 'search_by_tag': search_by_tag } return render_template('home.html', **context)
def get_id_by_search_terms(cls,search_emotion, session = None): rank = func.similarity(cast(cls.name,TEXT), search_emotion) qry = select([cls.id, rank]).order_by(rank.desc()).limit(1) if not session: session = db.session results = session.execute(qry) for row in results: id, rank = row if rank > 0.4: return id return False
def get_id_by_search_terms(cls, search_emotion, session=None): rank = func.similarity(cast(cls.name, TEXT), search_emotion) qry = select([cls.id, rank]).order_by(rank.desc()).limit(1) if not session: session = db.session results = session.execute(qry) for row in results: id, rank = row if rank > 0.4: return id return False
def resolve_by_synonym( args: Dict, info: graphene.ResolveInfo, synonym: str, limit: Union[int, None] = None, ) -> List[ModelDescriptor]: """Retrieves a list of `ModelDescriptor` objects with a tree-number prefix-matching `tree_number_prefix`. Args: args (dict): The resolver arguments. info (graphene.ResolveInfo): The resolver info. synonym (str): The synonym query by which to perform the search. limit (int, optional): The number of closest-matching descriptors to return. Defaults to `None`. Returns: list[DescriptorModel]: The list of matched `ModelDescriptor` objects or an empty list if no match was found. """ # Retrieve the session out of the context as the `get_query` method # automatically selects the model. session = info.context.get("session") # type: sqlalchemy.orm.Session # Define a function to calculate the maximum similarity between a # descriptor's synonyms and the synonym query. func_similarity = sqlalchemy_func.max( sqlalchemy_func.similarity( ModelDescriptorSynonym.synonym, synonym, )).label("synonym_similarity") # Query out `ModelDescriptor`. query = session.query(ModelDescriptor) # type: sqlalchemy.orm.Query query = query.join(ModelDescriptor.synonyms) query = query.filter(ModelDescriptorSynonym.synonym.op("%%")(synonym)) query = query.order_by(func_similarity.desc()) query = query.group_by(ModelDescriptor.descriptor_id) if limit is not None: query = query.limit(limit=limit) # Limit query to fields requested in the GraphQL query adding # `load_only` and `joinedload` options as required. query = apply_requested_fields( info=info, query=query, orm_class=ModelDescriptor, ) objs = query.all() return objs
def _apply_dynamic_fulltext_filters(ingredients, backup_search=False): """ Applies full text filters similar to v1 but uses full text search approach for lexemes and speed up with index usage. N.B. that spaces in ingredient names will be replaced with '&' :param ingredients: List<string>: ["onion", "chicken", "pork tenderloin"] :return: """ dynamic_filters = [] max_ingredients = 500 if len( ingredients ) > 2 else 25 # 5 if backup_search else 250 # limits the amount of match ingredients; necessary in large or backup search title_subquery = lambda _: IngredientRecipe.recipe.in_( [-1]) # function to add title checking fo backup query if backup_search: max_ingredients = 10 # searches for some combination of ingredients instead of individual ingredients in the backup search ingredients = ['|'.join(i) for i in combinations(ingredients, 3)] \ if len(ingredients) >= 3 else ['|'.join(ingredients)] # print(len(ingredients)) random.shuffle(ingredients) ingredients = ingredients[:5] title_subquery = lambda ingredient: (IngredientRecipe.recipe.in_( db.session.query(Recipe.pk).filter( func.to_tsquery(FULLTEXT_INDEX_CONFIG, ingredient).op('@@') (func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.title)))).limit(50))) # print(ingredients) for ingredient in ingredients: dynamic_filters.append( or_( IngredientRecipe.recipe.in_( db.session.query(IngredientRecipe.recipe).filter( IngredientRecipe.ingredient.in_( db.session.query(Ingredient.pk).filter( func.to_tsquery(FULLTEXT_INDEX_CONFIG, func.coalesce(ingredient)).op('@@')( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Ingredient.name)) ) ).\ order_by( desc( func.similarity( ingredient, Ingredient.name ) ) ).limit(max_ingredients) ) ), ), title_subquery(ingredient) ) ) return dynamic_filters
def my_view(request): schema = Galaxy() gform = Form(schema, buttons=('Search', )) if 'submit' in request.POST: controls = request.POST.items() try: appstruct = gform.validate(controls) system_name = appstruct.pop('system_name', "InvalidSystem") search_area = appstruct.pop('search_area', "InvalidSearchArea") if search_area == 'Populated': res = request.dbsession.query(PopulatedSystem, func.similarity(PopulatedSystem.name, system_name).\ label('similarity')).filter(PopulatedSystem.name % system_name).\ order_by(func.similarity(PopulatedSystem.name, system_name).desc()) else: res = request.dbsession.query(System, func.similarity(System.name, system_name).\ label('similarity')).filter(System.name % system_name).\ order_by(func.similarity(System.name, system_name).desc()) results = [] for candidate in res: results.append() except: print("OOps?!")
def search_by_binomial_name(self, name: str) -> Dict[Bird, float]: subquery = self.session.query( Bird.id, func.similarity(Bird.binomial_name, name).label('similarity')).subquery() result = self.session.query(Bird, subquery.c.similarity) \ .outerjoin(subquery, Bird.id==subquery.c.id) \ .filter(subquery.c.similarity > 0.2) \ .all() matches = dict() for bird, similarity in result: matches[bird] = similarity return matches
def get_AllTaxrefNameByListe(id_liste): """ Route utilisée pour les autocompletes Si le paramètre search_name est passé, la requête SQL utilise l'algorithme des trigrames pour améliorer la pertinence des résultats params URL: - id_liste : identifiant de la liste params GET: - search_name : nom recherché. Recherche basé sur la fonction ilike de sql avec un remplacement des espaces par % - regne : filtre sur le regne INPN - group2_inpn : filtre sur le groupe 2 de l'INPN """ q = db.session.query( VMTaxrefListForautocomplete ).filter( VMTaxrefListForautocomplete.id_liste == id_liste ) search_name = request.args.get('search_name') if search_name: q = db.session.query( VMTaxrefListForautocomplete, func.similarity( VMTaxrefListForautocomplete.search_name, search_name ).label('idx_trgm') ).filter( VMTaxrefListForautocomplete.id_liste == id_liste ) search_name = search_name.replace(' ', '%') q = q.filter( VMTaxrefListForautocomplete.search_name.ilike('%'+search_name+"%") ).order_by(desc('idx_trgm')) regne = request.args.get('regne') if regne: q = q.filter(VMTaxrefListForautocomplete.regne == regne) group2_inpn = request.args.get('group2_inpn') if group2_inpn: q = q.filter(VMTaxrefListForautocomplete.group2_inpn == group2_inpn) q = q.order_by(desc( VMTaxrefListForautocomplete.cd_nom == VMTaxrefListForautocomplete.cd_ref )) limit = request.args.get('limit', 20) data = q.limit(limit).all() if search_name: return [d[0].as_dict() for d in data] return [d.as_dict() for d in data]
def search_by_language_names(self, name: str) -> Dict[Bird, float]: subquery = self.session.query(BirdCommonName.id, func.similarity(BirdCommonName.name, name).label('similarity')) \ .filter(BirdCommonName.name.ilike(fr'%{name}%')) \ .subquery() result = self.session.query(BirdCommonName, subquery.c.similarity) \ .outerjoin(subquery, BirdCommonName.id==subquery.c.id) \ .options(joinedload(BirdCommonName.bird)) \ .filter(subquery.c.similarity > 0.01) \ .all() matches = dict() for common_name, similarity in result: bird = common_name.bird matches[bird] = max(matches.get(bird, 0), similarity) return matches
def search(): query = request.args["query"] if not query: return flask.redirect('/') norm_query = query.lower() session = get_db() db_query = session.query(Player).filter( or_(Player.normalized_display_name == norm_query, Player.player_id == norm_query)) search_results = [player for player in db_query.all()] if not search_results: db_query = session.query(Player) \ .filter(Player.visibility_restricted == False) \ .filter(func.similarity(Player.normalized_display_name, norm_query) > 0.2) \ .order_by(func.similarity(Player.normalized_display_name, norm_query).desc()) search_results = [player for player in db_query.all()] elif len(search_results) == 1: return flask.redirect( flask.url_for('show_user', userid=search_results[0].player_id)) return flask.render_template("search.jinja2", query=query, search_results=search_results)
def get_station_list(args, id, **kwargs): """Private""" q = Station.query.filter(Station.supplier.has(id=id)) if args.get('filter', None): filter_str = '%{}%'.format(args['filter']) q = q.filter(Station.name.ilike(filter_str)).order_by( func.similarity(Station.name, args['filter']).desc()) q, count = create_filter(Station, q, kwargs) stations = q.all() return make_response(status_code=200, status='success', message=None, count=count, data=StationSchema(many=True).dump(stations).data)
def get_AllTaxrefNameByListe(id_liste): """ Route utilisée pour les autocompletes Si le paramètre search_name est passé, la requête SQL utilise l'algorithme des trigrames pour améliorer la pertinence des résultats params URL: - id_liste : identifiant de la liste params GET: - search_name : nom recherché. Recherche basé sur la fonction ilike de sql avec un remplacement des espaces par % - regne : filtre sur le regne INPN - group2_inpn : filtre sur le groupe 2 de l'INPN """ q = db.session.query(VMTaxrefListForautocomplete).filter( VMTaxrefListForautocomplete.id_liste == id_liste) search_name = request.args.get('search_name') if search_name: q = db.session.query( VMTaxrefListForautocomplete, func.similarity( VMTaxrefListForautocomplete.search_name, search_name).label('idx_trgm')).filter( VMTaxrefListForautocomplete.id_liste == id_liste) search_name = search_name.replace(' ', '%') q = q.filter( VMTaxrefListForautocomplete.search_name.ilike('%' + search_name + "%")).order_by( desc('idx_trgm')) regne = request.args.get('regne') if regne: q = q.filter(VMTaxrefListForautocomplete.regne == regne) group2_inpn = request.args.get('group2_inpn') if group2_inpn: q = q.filter(VMTaxrefListForautocomplete.group2_inpn == group2_inpn) q = q.order_by( desc(VMTaxrefListForautocomplete.cd_nom == VMTaxrefListForautocomplete.cd_ref)) limit = request.args.get('limit', 20) data = q.limit(limit).all() if search_name: return [d[0].as_dict() for d in data] return [d.as_dict() for d in data]
def get_station_list(args, **kwargs): """Private""" filter_str = '%{}%'.format(args['filter']) q = Station.query.filter(Station.supplier.has(key=args['supplier_key']), Station.name.ilike(filter_str)).order_by( func.similarity(Station.name, args['filter']).desc()) q, count = create_filter(Station, q, kwargs) stations = q.all() return make_response( status_code=200, status='success', message= 'This method has been replaced by "../supplier/<id>/station/ and should be considered deprecated.', count=count, data=stations_schema.dump(stations).data)
def get_habref_autocomplete(): """ Get all habref items of a list for autocomplete .. :quickref: Habref; :query id_list int: the id of the habref list :query search_name str: the pattern to filter with :query cd_typo int: filter by typology :query limit int: number of results, default = 20 :returns: Array<AutoCompleteHabitat> """ params = request.args search_name = params.get("search_name") q = DB.session.query( AutoCompleteHabitat, func.similarity(AutoCompleteHabitat.search_name, search_name).label("idx_trgm"), ) if "id_list" in params: q = q.join(CorListHabitat, CorListHabitat.cd_hab == AutoCompleteHabitat.cd_hab).filter( CorListHabitat.id_list == params.get("id_list")) search_name = search_name.replace(" ", "%") q = q.filter(AutoCompleteHabitat.search_name.ilike("%" + search_name + "%")) # filter by typology if "cd_typo" in params: q = q.filter(AutoCompleteHabitat.cd_typo == params.get("cd_typo")) limit = request.args.get("limit", 20) # order by lb_code to have first high hierarchy hab first q = q.order_by(AutoCompleteHabitat.lb_code.asc()) # order by trigram q = q.order_by(desc("idx_trgm")) print(q) data = q.limit(limit).all() if data: return [d[0].as_dict() for d in data] else: return "No Result", 404
def get_autocomplete_taxons_synthese(): """ Route utilisée pour les autocompletes de la synthese (basé sur tous les taxon présent dans la synthese) La requête SQL utilise l'algorithme des trigrames pour améliorer la pertinence des résultats params GET: - search_name : nom recherché. Recherche basé sur la fonction ilike de sql avec un remplacement des espaces par % - regne : filtre sur le regne INPN - group2_inpn : filtre sur le groupe 2 de l'INPN """ search_name = request.args.get("search_name", "") q = DB.session.query( VMTaxonsSyntheseAutocomplete, func.similarity(VMTaxonsSyntheseAutocomplete.search_name, search_name).label( "idx_trgm" ), ) search_name = search_name.replace(" ", "%") q = q.filter( VMTaxonsSyntheseAutocomplete.search_name.ilike("%" + search_name + "%") ) regne = request.args.get("regne") if regne: q = q.filter(VMTaxonsSyntheseAutocomplete.regne == regne) group2_inpn = request.args.get("group2_inpn") if group2_inpn: q = q.filter(VMTaxonsSyntheseAutocomplete.group2_inpn == group2_inpn) q = q.order_by( desc(VMTaxonsSyntheseAutocomplete.cd_nom == VMTaxonsSyntheseAutocomplete.cd_ref) ) limit = request.args.get("limit", 20) data = q.order_by(desc("idx_trgm")).limit(20).all() return [d[0].as_dict() for d in data]
def search(request): """ Multi-purpose search endpoint, taking various search types. :param request: The Pyramid request object :return: A JSON response """ if 'type' in request.params: searchtype = request.params['type'] if searchtype not in valid_searches: return exc.HTTPBadRequest( detail=f"Invalid search type '{searchtype}'.") else: searchtype = 'lev' if 'term' in request.params: xhr = True request.response.headers.update({ 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'POST,GET,DELETE,PUT,OPTIONS', 'Access-Control-Allow-Headers': 'Origin, Content-Type, Accept, Authorization', 'Access-Control-Allow-Credentials': 'true', 'Access-Control-Max-Age': '1728000', }) name = unquote(request.params['term']).upper() searchtype = "lev" else: xhr = False if 'name' not in request.params: return exc.HTTPBadRequest(detail="No name in search request.") name = unquote(request.params['name']) if 'limit' not in request.params: limit = 20 else: if int(request.params['limit']) > 200: return exc.HTTPBadRequest(detail="Limit too high (Over 200)") limit = int(request.params['limit']) if len('name') < 3: return exc.HTTPBadRequest(detail="Name too short.") permsystems = request.dbsession.query(Permits) perm_systems = [] candidates = [] result = None for system in permsystems: perm_systems.append(system.id64) # Ensure we're not wasting cycles on someone searching an exact system name on this endpoint. # func.similarity(System.name, name).label('similarity')) match = request.dbsession.query(System, func.similarity(System.name, name).label('similarity')). \ filter(System.name.ilike(name)).order_by(func.similarity(System.name, name).desc()).limit(1) for candidate in match: candidates.append({ 'name': candidate[0].name, 'similarity': 1, 'id64': candidate[0].id64, 'permit_required': True if candidate[0].id64 in perm_systems else False, 'permit_name': checkpermitname(candidate[0].id64, permsystems, perm_systems) }) if match.count() > 0: return { 'meta': { 'name': candidate[0].name, 'type': 'Perfect match' }, 'data': candidates } if len(name) < 3: return exc.HTTPBadRequest( detail="Search term too short (Minimum 3 characters)") if searchtype == 'lev': result = request.dbsession.query(System, func.similarity(System.name, name).label('similarity')). \ filter(System.name.ilike(f"{name}%")).order_by(func.similarity(System.name, name).desc()).limit(limit) for row in result: candidates.append({ 'name': row[0].name, 'similarity': row[1], 'id64': row[0].id64, 'permit_required': True if row[0].id64 in perm_systems else False, 'permit_name': checkpermitname(row[0].id64, permsystems, perm_systems) }) return { 'meta': { 'name': name, 'type': searchtype, 'limit': limit }, 'data': candidates } if searchtype == 'soundex': sql = text( f"SELECT *, similarity(name, '{name}') AS similarity FROM systems " f"WHERE soundex(name) = soundex('{name}') ORDER BY " f"similarity(name, '{name}') DESC LIMIT {limit}") if searchtype == 'meta': if 'sensitivity' not in request.params: sensitivity = 5 else: sensitivity = request.params['sensitivity'] sql = text( f"SELECT *, similarity(name, {name}) AS similarity FROM systems " f"WHERE metaphone(name, '{str(sensitivity)}') = metaphone('{name}', " f"'{str(sensitivity)}') ORDER BY similarity DESC LIMIT {str(limit)}" ) if searchtype == 'dmeta': sql = text( f"SELECT *, similarity(name, '{name}') AS similarity FROM systems " f"WHERE dmetaphone(name) = dmetaphone('{name}') ORDER BY similarity DESC LIMIT {str(limit)}" ) if searchtype == "fulltext": sql = text( f"SELECT name, id64, similarity(name, '{name}') AS similarity FROM systems " f"WHERE name LIKE '{name}%' ORDER BY similarity DESC LIMIT {str(limit)}" ) if not result: # We haven't gotten a ORM result yet, execute manual SQL. result = request.dbsession.execute(sql) if xhr: for row in result: candidates.append({row['name']}) return candidates else: for row in result: candidates.append({ 'name': row['name'], 'similarity': row['similarity'], 'id64': row['id64'], 'permit_required': True if row.id64 in perm_systems else False, 'permit_name': checkpermitname(row.id64, permsystems, perm_systems) }) return { 'meta': { 'name': name, 'type': searchtype, 'limit': limit }, 'data': candidates }
def mecha(request): """ Mecha dedicated endpoint that tries to be smrt about searching. :param request: The Pyramid request object :return: A JSON response """ if 'name' not in request.params: return exc.HTTPBadRequest(detail="Missing 'name' parameter.") name = unquote(request.params['name']) candidates = [] permsystems = request.dbsession.query(Permits) perm_systems = [] for system in permsystems: perm_systems.append(system.id64) if len(name) < 3: # Too short for trigram searches. Either return an exact match, or fail. query = request.dbsession.query(System).filter(System.name == name) for candidate in query: candidates.append({'name': candidate.name, 'similarity': 1, 'id64': candidate.id64, 'coords': candidate.coords, 'permit_required': True if candidate.id64 in perm_systems else False, 'permit_name': checkpermitname(candidate.id64, permsystems, perm_systems) }) if len(candidates) > 0: return {'meta': {'name': name, 'type': 'Perfect match'}, 'data': candidates} else: return exc.HTTPBadRequest(detail="Search term too short (Minimum 3 characters)") # Prevent SAPI from choking on a search that contains just a PG sector's mass code. m = pg_system_regex.match(name.strip()) if m: return {'meta': {'error': 'Incomplete PG system name.', 'type': 'incomplete_name'}} # Check for immediate match, case insensitive. query = request.dbsession.query(System).filter(System.name.ilike(name)) for candidate in query: candidates.append({'name': candidate.name, 'similarity': 1, 'id64': candidate.id64, 'coords': candidate.coords, 'permit_required': True if candidate.id64 in perm_systems else False, 'permit_name': checkpermitname(candidate.id64, permsystems, perm_systems) }) if len(candidates) > 0: return {'meta': {'name': name, 'type': 'Perfect match'}, 'data': candidates} if 'fast' in request.params: return {'meta': {'error': 'System not found. Query again without fast flag for in-depth search.', 'type': 'notfound'}} if is_pg_system_name(name): # If the system is a PGName, don't try soundex and dmeta first, as they are most likely to fail. qtext = text("select *, similarity(lower(name), lower(:name)) as lev from systems where name % :name" " ORDER BY lev DESC LIMIT 10") pmatch = request.dbsession.query(System, "lev").from_statement(qtext).params(name=name).all() for candidate in pmatch: # candidates.append({'name': candidate[0].name, 'similarity': "1.0"} candidates.append({'name': candidate[0].name, 'similarity': candidate[1], 'id64': candidate[0].id64, 'coords': candidate[0].coords, 'permit_required': True if candidate[0].id64 in perm_systems else False, 'permit_name': checkpermitname(candidate[0].id64, permsystems, perm_systems) }) if len(candidates) > 10: break if len(candidates) > 1: return {'meta': {'name': name, 'type': 'gin_trgm'}, 'data': candidates} # Try soundex and dmetaphone matches on the name, look for low levenshtein distances. qtext = text("select *, levenshtein(lower(name), lower(:name)) as lev from systems where dmetaphone(name) " "= dmetaphone(:name) OR soundex(name) = soundex(:name) order by lev limit 10") query = request.dbsession.query(System, "lev").from_statement(qtext).params(name=name).all() for candidate in query: print(candidate) if candidate[1] < 3: candidates.append({'name': candidate[0].name, 'distance': candidate[1], 'id64': candidate[0].id64, 'coords': candidate[0].coords, 'permit_required': True if candidate[0].id64 in perm_systems else False, 'permit_name': checkpermitname(candidate[0].id64, permsystems, perm_systems) }) if len(candidates) > 0: return {'meta': {'name': name, 'type': 'dmeta+soundex'}, 'data': candidates} # Try an ILIKE with wildcard on end. Slower. query = request.dbsession.query(System, func.similarity(System.name, name).label('similarity')).\ filter(System.name.ilike(name+"%")).limit(5000).from_self().order_by(func.similarity(System.name, name).desc()) for candidate in query: candidates.append({'name': candidate[0].name, 'similarity': candidate[1], 'id64': candidate[0].id64, 'coords': candidate[0].coords, 'permit_required': True if candidate[0].id64 in perm_systems else False, 'permit_name': checkpermitname(candidate[0].id64, permsystems, perm_systems) }) if len(candidates) > 10: break if len(candidates) > 0: return {'meta': {'name': name, 'type': 'wildcard'}, 'data': candidates} # Try a GIN trigram similarity search on the entire database. Slow as hell. qtext = text("select *, similarity(lower(name), lower(:name)) as lev from systems where name % :name" " ORDER BY lev DESC LIMIT 10") pmatch = request.dbsession.query(System, text("lev")).from_statement(qtext).params(name=name).all() try: if pmatch.count() > 0: for candidate in pmatch: # candidates.append({'name': candidate[0].name, 'similarity': "1.0"} candidates.append({'name': candidate[0].name, 'similarity': candidate[1], 'id64': candidate[0].id64, 'coords': candidate[0].coords, 'permit_required': True if candidate[0].id64 in perm_systems else False, 'permit_name': checkpermitname(candidate[0].id64, permsystems, perm_systems) }) if len(candidates) > 10: break except TypeError: # pmatch.count() isn't set, this is bad. return {'meta': {'error': 'System not found.', 'type': 'no_dbrows'}} if len(candidates) < 1: # We ain't got shit. Give up. return {'meta': {'error': 'System not found.', 'type': 'notfound'}} return {'meta': {'name': name, 'type': 'gin_trgm'}, 'data': candidates}
def similar(self): """Find similar objects. Returns ------- list of :obj:`ExternalObject` other objects that are similar to this one """ from .value import Value session = object_session(self) # FIXME: use other_value aliased name instead of value_1 session.execute("SELECT set_limit(0.6)") other_value = aliased(Value) matches = (session.query( other_value.external_object_id, func.sum(func.similarity(Value.text, other_value.text)), ).join( Value, and_( Value.type == ValueType.TITLE, Value.external_object == self, Value.text % other_value.text, ), ).join(other_value.external_object).filter( ExternalObject.type == self.type).filter( other_value.type == ValueType.TITLE).group_by( other_value.external_object_id)) @trace(logger) def links_overlap(a, b): platforms = set([li.platform for li in a]) & set([li.platform for li in b]) return [ p for p in platforms if p.type != PlatformType.GLOBAL and not p.allow_links_overlap ] objects = [ MergeCandidate(obj=self.id, into=v[0], score=v[1]) for v in matches if not links_overlap( list( session.query(ObjectLink).filter( ObjectLink.external_object_id == v[0])), self.links, ) ] def into_year(text): m = re.search(r"(\d{4})", text) return int(m.group(1)) if m else None def into_float(i): try: return float(i) except ValueError: return None def curve(target, max_factor=3, min_factor=0.2): return lambda x: math.pow(2, -(x / target)) * ( max_factor - min_factor) + min_factor def filter_and_pick(iterable, filter_, count, process=lambda x: x): return itertools.islice( filter( None.__ne__, # Filter out None values map( lambda x: process(x.text), sorted(filter(filter_, iterable), key=attrgetter('cached_score'), reverse=True), ), ), count) @trace(logger) def numeric_attr(mine, their, type, curve, process=into_float, count=3): my_attrs = set( filter_and_pick( mine.values, filter_=lambda attr: attr.type == type, count=count, process=process, )) their_attrs = set( filter_and_pick( their.values, filter_=lambda attr: attr.type == type, count=count, process=process, )) if not my_attrs or not their_attrs: # One of the object does not have the attribute, this should # not influence anything return 1 min_diff = min(abs(x - y) for x in my_attrs for y in their_attrs) return curve(min_diff) @trace(logger) def text_attr(mine, their, type, process=lambda n: n.lower(), filter_=lambda n: True, count=3): my_attrs = list( filter_and_pick( mine.values, filter_=lambda attr: attr.type == type and filter_(attr. text), count=count, process=process, )) their_attrs = list( filter_and_pick( their.values, filter_=lambda attr: attr.type == type and filter_(attr. text), count=count, process=process, )) if not my_attrs or not their_attrs: # One of the object does not have the attribute, this should # not influence anything return 1 def sanitize(s): return "".join(filter(str.isalnum, unidecode(s).lower())) matching = len([ True for x in my_attrs for y in their_attrs if x is not None and y is not None and sanitize(x) == sanitize(y) ]) return math.log2((matching + 1) / 3) + 2 criterias = [ lambda self, their: numeric_attr( self, their, ValueType.DATE, curve(2), into_year, count=2), lambda self, their: numeric_attr( self, their, ValueType.DURATION, curve(5 ), into_float, count=2), lambda self, their: text_attr(self, their, ValueType.COUNTRY, filter_=lambda x: len(x) == 2, count=3), lambda self, their: text_attr(self, their, ValueType.NAME, count=3 ), lambda self, their: text_attr( self, their, ValueType.TITLE, count=5), ] for candidate in objects: if candidate.obj == candidate.into: continue factor = 1 their = session.query(ExternalObject).options( joinedload(ExternalObject.values).undefer( Value.cached_score), ).get(candidate.into) for criteria in criterias: factor *= criteria(self, their) yield MergeCandidate(obj=candidate.obj, into=candidate.into, score=factor)
def get_fuzzy_matching_query(session, context): """Get the query for fuzzy tag matching. All stickers that have been found in strict search are excluded via left outer join. The stickers are sorted by score, StickerSet.name and Sticker.file_id in this respective order. Score is calculated like this: + 'similarity_value' (0-1) for each similar tags + 'similarity_value' (0-1) 0.75 if a similar tag is contained in StickerSet name or title + 0.3 if text similar to a tag found in OCR text """ user = context.user tags = context.tags nsfw = context.nsfw furry = context.furry threshold = 0.3 # Create a query for each tag, which fuzzy matches all tags and computes the distance # Todo: # 1. Fuzzy check on tag table # 2. Join with similarity sum on sticker similarities = [] threshold_check = [] for tag in tags: similarities.append(func.similarity(Tag.name, tag)) threshold_check.append(func.similarity(Tag.name, tag) >= threshold) tag_query = session.query( Tag.name, greatest(*similarities).label('tag_similarity'), ) \ .filter(or_(*threshold_check)) \ .filter(or_(Tag.international == user.international, Tag.international.is_(False))) \ .group_by(Tag.name) \ .subquery('tag_query') # Get all stickers which match a tag, together with the accumulated score of the fuzzy matched tags. tag_score = func.avg(tag_query.c.tag_similarity).label("tag_score") tag_score_subq = session.query(sticker_tag.c.sticker_file_id, tag_score) \ .join(tag_query, sticker_tag.c.tag_name == tag_query.c.name) \ .group_by(sticker_tag.c.sticker_file_id) \ .subquery("tag_score_subq") # Condition for matching sticker set names and titles # Create a subquery which get's the greatest similarity for name and title # Default to 0 if no element is found sticker_set_score = [] sticker_set_subqs = [] for tag in tags: set_score_subq = session.query( greatest( func.similarity(StickerSet.name, tag), func.similarity(StickerSet.title, tag), ).label('set_score'), StickerSet.name) \ .filter(or_( func.similarity(StickerSet.name, tag) >= threshold, func.similarity(StickerSet.title, tag) >= threshold, )) \ .filter(StickerSet.deleted.is_(False)) \ .filter(StickerSet.banned.is_(False)) \ .filter(StickerSet.reviewed.is_(True)) # Handle default nsfw/furry stuff search if nsfw: set_score_subq = set_score_subq.filter(StickerSet.nsfw.is_(True)) elif user.nsfw is False: set_score_subq = set_score_subq.filter(StickerSet.nsfw.is_(False)) if furry: set_score_subq = set_score_subq.filter(StickerSet.furry.is_(True)) elif user.furry is False: set_score_subq = set_score_subq.filter(StickerSet.furry.is_(False)) set_score_subq = set_score_subq.subquery() sticker_set_subqs.append(set_score_subq) sticker_set_score.append(func.coalesce(set_score_subq.c.set_score, 0)) # Condition for matching sticker text text_score = [] for tag in tags: text_score.append( case( [(func.similarity(Sticker.text, tag) >= threshold, threshold)], else_=0)) # Compute the whole score score = cast(func.coalesce(tag_score_subq.c.tag_score, 0), Numeric) for condition in sticker_set_score + text_score: score = score + condition score = score.label('score') # Query all strict matching results to exclude them. strict_subquery = get_strict_matching_query(session, context) \ .subquery('strict_subquery') # Compute the score for all stickers and filter nsfw stuff # We do the score computation in a subquery, since it would otherwise be recomputed for statement. matching_stickers = session.query(Sticker.file_id, StickerSet.name, score) \ .outerjoin(tag_score_subq, Sticker.file_id == tag_score_subq.c.sticker_file_id) \ .outerjoin(strict_subquery, Sticker.file_id == strict_subquery.c.file_id) \ .join(Sticker.sticker_set) # Add the sticker sets with matching name/title via outer join (performance) for subq in sticker_set_subqs: matching_stickers = matching_stickers.outerjoin( subq, Sticker.sticker_set_name == subq.c.name) matching_stickers = matching_stickers.filter(Sticker.banned.is_(False)) \ .filter(strict_subquery.c.file_id.is_(None)) \ .filter(StickerSet.deleted.is_(False)) \ .filter(StickerSet.banned.is_(False)) \ .filter(StickerSet.reviewed.is_(True)) \ .filter(score > 0) # Handle default nsfw/furry stuff search if nsfw: matching_stickers = matching_stickers.filter(StickerSet.nsfw.is_(True)) elif user.nsfw is False: matching_stickers = matching_stickers.filter( StickerSet.nsfw.is_(False)) if furry: matching_stickers = matching_stickers.filter( StickerSet.furry.is_(True)) elif user.furry is False: matching_stickers = matching_stickers.filter( StickerSet.furry.is_(False)) # Only query default language sticker sets if not user.international: matching_stickers = matching_stickers.filter( StickerSet.international.is_(False)) # Only query deluxe sticker sets if user.deluxe: matching_stickers = matching_stickers.filter( StickerSet.deluxe.is_(True)) matching_stickers = matching_stickers.order_by(score.desc(), StickerSet.name, Sticker.file_id) return matching_stickers
def similar(self): """Find similar objects. Returns ------- list of :obj:`ExternalObject` other objects that are similar to this one """ # FIXME: use other_value aliased name instead of value_1 db.session.execute('SELECT set_limit(0.6)') other_value = aliased(Value) matches = db.session.query( other_value.external_object_id, func.sum(func.similarity(Value.text, other_value.text)) )\ .join(Value, and_( Value.type == ValueType.TITLE, Value.external_object == self, Value.text % other_value.text, ))\ .join(other_value.external_object)\ .filter(ExternalObject.type == self.type)\ .filter(other_value.type == ValueType.TITLE)\ .group_by(other_value.external_object_id) def links_overlap(a, b): platforms = set([l.platform for l in a]) & set([l.platform for l in b]) return [p for p in platforms if p.type != PlatformType.GLOBAL] objects = [ MergeCandidate(obj=self.id, into=v[0], score=v[1]) for v in matches if not links_overlap( ObjectLink.query.filter( ObjectLink.external_object_id == v[0]), self.links) ] def into_year(text): m = re.search(r'(\d{4})', text) return int(m.group(1)) if m else None def into_float(i): try: return float(i) except ValueError: return None def numeric_attr(mine, their, type, process=into_float): my_attrs = set([ process(attr.text) for attr in mine.attributes if attr.type == type ]) their_attrs = set([ process(attr.text) for attr in their.attributes if attr.type == type ]) return len([ True for x in my_attrs for y in their_attrs if x is not None and y is not None and abs(x - y) < 1 ]) def text_attr(mine, their, type, process=lambda n: n.lower()): my_attrs = set([ process(attr.text) for attr in mine.attributes if attr.type == type ]) their_attrs = set([ process(attr.text) for attr in their.attributes if attr.type == type ]) return len([ True for x in my_attrs for y in their_attrs if x is not None and y is not None and x == y ]) criterias = [ lambda self, their: numeric_attr(self, their, ValueType.DATE, into_year), lambda self, their: numeric_attr(self, their, ValueType.DURATION, into_float), lambda self, their: text_attr(self, their, ValueType.COUNTRY), lambda self, their: text_attr(self, their, ValueType.TITLE), ] for candidate in objects: factor = 1 their = ExternalObject.query.get(candidate.into) for criteria in criterias: factor *= math.pow(2, math.log2(1 + criteria(self, their))) yield MergeCandidate(obj=candidate.obj, into=candidate.into, score=candidate.score * factor)
def get_AllTaxrefNameByListe(id_liste): """ Route utilisée pour les autocompletes Si le paramètre search_name est passé, la requête SQL utilise l'algorithme des trigrames pour améliorer la pertinence des résultats Route utilisé par le mobile pour remonter la liste des taxons params URL: - id_liste : identifiant de la liste params GET (facultatifs): - search_name : nom recherché. Recherche basé sur la fonction ilike de sql avec un remplacement des espaces par % - regne : filtre sur le regne INPN - group2_inpn : filtre sur le groupe 2 de l'INPN - limit: nombre de résultat - offset: numéro de la page """ search_name = request.args.get("search_name") q = ( db.session.query(VMTaxrefListForautocomplete) .join(BibNoms, BibNoms.cd_nom == VMTaxrefListForautocomplete.cd_nom) .join( CorNomListe, and_( CorNomListe.id_nom == BibNoms.id_nom, CorNomListe.id_liste == id_liste ), ) ) if search_name: q = q.add_columns( func.similarity( VMTaxrefListForautocomplete.search_name, search_name ).label("idx_trgm") ) search_name = search_name.replace(" ", "%") q = q.filter( VMTaxrefListForautocomplete.search_name.ilike("%" + search_name + "%") ).order_by(desc("idx_trgm")) regne = request.args.get("regne") if regne: q = q.filter(VMTaxrefListForautocomplete.regne == regne) group2_inpn = request.args.get("group2_inpn") if group2_inpn: q = q.filter(VMTaxrefListForautocomplete.group2_inpn == group2_inpn) q = q.order_by( desc(VMTaxrefListForautocomplete.cd_nom == VMTaxrefListForautocomplete.cd_ref) ) limit = request.args.get("limit", 20, int) page = request.args.get("page", 1, int) offset = request.args.get("offset", 0, int) (limit, offset, page) = calculate_offset_page(limit, offset, page) data = q.limit(limit).offset(offset).all() if search_name: return [d[0].as_dict() for d in data] else: return [d.as_dict() for d in data]
def get_fuzzy_matching_query(session, context): """Query all fuzzy matching stickers.""" user = context.user tags = context.tags nsfw = context.nsfw furry = context.furry threshold = 0.3 # Create a query for each tag, which fuzzy matches all tags and computes the distance matching_tags = [] for tag in tags: tag_query = session.query( sticker_tag.c.tag_name, func.similarity(sticker_tag.c.tag_name, tag).label('tag_similarity') ) \ .join(Tag, sticker_tag.c.tag_name == Tag.name) \ .filter(func.similarity(sticker_tag.c.tag_name, tag) >= threshold) \ .filter(or_(Tag.is_default_language == user.is_default_language, Tag.is_default_language.is_(True))) matching_tags.append(tag_query) # Union all fuzzy matched tags if len(matching_tags) > 1: matching_tags = matching_tags[0].union(*matching_tags[1:]) matching_tags = matching_tags.subquery('matching_tags') # Due to using a union, we need to use another column name as below tag_name_column = matching_tags.c.sticker_tag_tag_name.label( 'tag_name') else: matching_tags = matching_tags[0] matching_tags = matching_tags.subquery('matching_tags') # Normal single tag search column tag_name_column = matching_tags.c.tag_name.label('tag_name') # Group all matching tags to get the max score of the best matching searched tag. fuzzy_subquery = session.query(tag_name_column, func.max(matching_tags.c.tag_similarity).label('tag_similarity')) \ .group_by(tag_name_column) \ .subquery() # Get all stickers which match a tag, together with the accumulated score of the fuzzy matched tags. fuzzy_score = func.sum( fuzzy_subquery.c.tag_similarity).label("fuzzy_score") tag_subq = session.query(sticker_tag.c.sticker_file_id, fuzzy_score) \ .join(fuzzy_subquery, sticker_tag.c.tag_name == fuzzy_subquery.c.tag_name) \ .group_by(sticker_tag.c.sticker_file_id) \ .subquery("tag_subq") # Condition for matching sticker set names and titles set_conditions = [] for tag in tags: set_conditions.append( case([ (func.similarity(StickerSet.name, tag) >= threshold, func.similarity(StickerSet.name, tag)), (func.similarity(StickerSet.title, tag) >= threshold, func.similarity(StickerSet.title, tag)), ], else_=0)) # Condition for matching sticker text text_conditions = [] for tag in tags: text_conditions.append( case([(func.similarity(Sticker.text, tag) >= threshold, 0.30)], else_=0)) # Compute the whole score score = cast(func.coalesce(tag_subq.c.fuzzy_score, 0), Numeric) for condition in set_conditions + text_conditions: score = score + condition score = score.label('score') # Query all strict matching results to exclude them. strict_subquery = get_strict_matching_query(session, context) \ .subquery('strict_subquery') # Compute the score for all stickers and filter nsfw stuff # We do the score computation in a subquery, since it would otherwise be recomputed for statement. intermediate_query = session.query(Sticker.file_id, StickerSet.title, score) \ .outerjoin(tag_subq, Sticker.file_id == tag_subq.c.sticker_file_id) \ .outerjoin(strict_subquery, Sticker.file_id == strict_subquery.c.file_id) \ .join(Sticker.sticker_set) \ .filter(Sticker.banned.is_(False)) \ .filter(strict_subquery.c.file_id.is_(None)) \ .filter(StickerSet.deleted.is_(False)) \ .filter(StickerSet.banned.is_(False)) \ .filter(StickerSet.reviewed.is_(True)) \ .filter(StickerSet.nsfw.is_(nsfw)) \ .filter(StickerSet.furry.is_(furry)) # Only query default language sticker sets if user.is_default_language: intermediate_query = intermediate_query.filter( StickerSet.is_default_language.is_(True)) # Only query deluxe sticker sets if user.deluxe: intermediate_query = intermediate_query.filter( StickerSet.deluxe.is_(True)) intermediate_query = intermediate_query.subquery('fuzzy_intermediate') # Now filter and sort by the score. Ignore the score threshold when searching for nsfw matching_stickers = session.query(intermediate_query.c.file_id, intermediate_query.c.score, intermediate_query.c.title) \ .filter(or_(intermediate_query.c.score > 0, nsfw, furry)) \ .order_by(intermediate_query.c.score.desc(), intermediate_query.c.title, intermediate_query.c.file_id) \ return matching_stickers
def fulltextsearch(self): lang = locale_negotiator(self.request) try: language = self.languages[lang] except KeyError: return HTTPInternalServerError( detail="{0!s} not defined in languages".format(lang)) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") terms = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int( self.request.params.get("limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms_ts = "&".join( w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "") _filter = FullTextSearch.ts.op("@@")(func.to_tsquery( language, terms_ts)) if self.request.user is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id.in_( [r.id for r in self.request.user.roles]), ), ) if "interface" in self.request.params: _filter = and_( _filter, or_( FullTextSearch.interface_id.is_(None), FullTextSearch.interface_id == self._get_interface_id( self.request.params["interface"]), ), ) else: _filter = and_(_filter, FullTextSearch.interface_id.is_(None)) _filter = and_( _filter, or_(FullTextSearch.lang.is_(None), FullTextSearch.lang == lang)) rank_system = self.request.params.get("ranksystem") if rank_system == "ts_rank_cd": # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8) else: # Use similarity ranking system from module pg_trgm. rank = func.similarity(FullTextSearch.label, terms) if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = (func.row_number().over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label)).label("row_number")) subq = DBSession.query(FullTextSearch).add_columns( row_number).filter(_filter).subquery() query = DBSession.query(subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom, subq.c.actions) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: properties = {"label": o.label} if o.layer_name is not None: properties["layer_name"] = o.layer_name if o.params is not None: properties["params"] = o.params if o.actions is not None: properties["actions"] = o.actions if o.actions is None and o.layer_name is not None: properties["actions"] = [{ "action": "add_layer", "data": o.layer_name }] if o.the_geom is not None: geom = to_shape(o.the_geom) feature = Feature(id=o.id, geometry=geom, properties=properties, bbox=geom.bounds) features.append(feature) else: feature = Feature(id=o.id, properties=properties) features.append(feature) return FeatureCollection(features)
async def get_similar_idol(session, idol): statement = select(Idol).order_by( func.similarity(Idol.group + " " + Idol.name, str(idol)).desc()) result = (await session.execute(statement)).first() return result[0] if result else None