def search_locations(search_string): q = db.session.query(Location).filter(db.text('location.search_column @@ to_tsquery(:terms)')) q = q.params(terms=search_string) q = q.order_by(db.text('ts_rank_cd(location.search_column, to_tsquery(:terms)) DESC')) q = q.add_column(func.ts_headline('pg_catalog.english',db.text('location.body_text'),func.to_tsquery(search_string),'MaxFragments=1, StartSel = <strong>, StopSel = </strong>')) return [{'object':location_object_to_dict(location), 'fragment':fragment} for location, fragment in q]
def index(self): current_page = 'page' in request.params \ and int(request.params['page']) or 1 q = request.params['q'] c.results = webhelpers.paginate.Page( Session.query(Post, Topic, User). \ options(orm.joinedload(Post.avatar)). \ filter(Topic.id==Post.topic_id). \ filter(User.id==Post.user_id). \ filter('tsv @@ plainto_tsquery(:terms)'). \ params(terms=q). \ add_column(func.ts_headline('pg_catalog.simple', Post.body, func.plainto_tsquery(q), 'HighlightAll=TRUE, ' \ 'StartSel=<strong>, ' \ 'StopSel=</strong>')). \ order_by(Post.created_at.desc()), page=current_page, items_per_page=40, url=webhelpers.paginate.PageURL_WebOb(request)) c.title = 'rezultati iskanja' return render('/search.mako')
def highlight_request (self, query, args): highlighted_fields = [(Release.description, 'h_description'), (Supplier.name, 'h_supplier_name'), (Buyer.name, 'h_buyer_name'), (Release.dossier, 'h_dossier'), (Release.decision, 'h_decision') ] if 'q' in args and args['q'] != None and 'highlight' in args and args['highlight'] == True: for (field, label) in highlighted_fields: query = query.add_column(func.ts_headline(app.config["FTS_LANG"], field, func.plainto_tsquery(app.config["FTS_LANG"], args['q']), 'HighlightAll=TRUE, StartSel="%s", StopSel = "%s"' % (app.config["START_HIGHLIGHT"], app.config["END_HIGHLIGHT"])) .label(label)) return query
def _gen_search_elements(query, title_only, next_rank, page_size, A, B=[], C=[], D=[]): """ Given a query and four sets of fields, (A, B, C, D), generates a bunch of postgres expressions for full text search. The four sets are in decreasing order of "importance" for ranking. A should be the "title", the others can be anything. If title_only=True, we only perform a trigram search against A only """ if not title_only: # a postgres tsquery object that can be used to match against a tsvector tsq = func.websearch_to_tsquery(REGCONFIG, query) # the tsvector object that we want to search against with our tsquery tsv = _build_tsv(A, B, C, D) # document to generate snippet from doc = _build_doc(A, B, C, D) title = _build_doc(A) # trigram based text similarity between title and query string sim = _similarity(query, title) # ranking algo, weigh the similarity a lot, the text-based ranking less rank = (TRI_SIMILARITY_WEIGHT * sim + func.ts_rank_cd(tsv, tsq)).label("rank") # the snippet with results highlighted snippet = func.ts_headline(REGCONFIG, doc, tsq, "StartSel=**,StopSel=**").label("snippet") def do_search_query(orig_query): """ Does the right search filtering, limiting, and ordering for the query """ return (orig_query.filter( or_(tsv.op("@@")(tsq), sim > TRI_SIMILARITY_THRESHOLD)).filter( rank <= next_rank if next_rank is not None else True). order_by(rank.desc()).limit(page_size + 1).all()) else: title = _build_doc(A) # trigram based text similarity between title and query string sim = _similarity(query, title) # ranking algo, weigh the similarity a lot, the text-based ranking less rank = sim.label("rank") # used only for headline tsq = func.websearch_to_tsquery(REGCONFIG, query) doc = _build_doc(A, B, C, D) # the snippet with results highlighted snippet = func.ts_headline(REGCONFIG, doc, tsq, "StartSel=**,StopSel=**").label("snippet") def do_search_query(orig_query): """ Does the right search filtering, limiting, and ordering for the query """ return (orig_query.filter(sim > TRI_SIMILARITY_THRESHOLD).filter( rank <= next_rank if next_rank is not None else True).order_by( rank.desc()).limit(page_size + 1).all()) return rank, snippet, do_search_query
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.filter(SupplierDomain.status == 'assessed') q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg( cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get( 'data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400( request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [ desc( func.ts_rank_cd( func.to_tsvector( func.concat( Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery)) ] + ob condition = func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{ '_source': r } for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat( CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [desc(func.ts_rank_cd(func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery))] + ob condition = func.to_tsvector(func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{'_source': r} for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response