Пример #1
0
def search_locations(search_string):
    q = db.session.query(Location).filter(db.text('location.search_column @@ to_tsquery(:terms)'))
    q = q.params(terms=search_string)
    q = q.order_by(db.text('ts_rank_cd(location.search_column, to_tsquery(:terms)) DESC'))
    q = q.add_column(func.ts_headline('pg_catalog.english',db.text('location.body_text'),func.to_tsquery(search_string),'MaxFragments=1, StartSel = <strong>, StopSel = </strong>'))

    return [{'object':location_object_to_dict(location), 'fragment':fragment} for location, fragment in q]
Пример #2
0
    def index(self):
        current_page = 'page' in request.params \
            and int(request.params['page']) or 1
        q = request.params['q']

        c.results = webhelpers.paginate.Page(
            Session.query(Post, Topic, User). \
                options(orm.joinedload(Post.avatar)). \
                filter(Topic.id==Post.topic_id). \
                filter(User.id==Post.user_id). \

                filter('tsv @@ plainto_tsquery(:terms)'). \
                params(terms=q). \

                add_column(func.ts_headline('pg_catalog.simple',
                                            Post.body,
                                            func.plainto_tsquery(q),
                                            'HighlightAll=TRUE, ' \
                                            'StartSel=<strong>, ' \
                                            'StopSel=</strong>')). \

                order_by(Post.created_at.desc()),
            page=current_page,
            items_per_page=40,
            url=webhelpers.paginate.PageURL_WebOb(request))

        c.title = 'rezultati iskanja'
        return render('/search.mako')
Пример #3
0
    def highlight_request (self, query, args):

        highlighted_fields = [(Release.description, 'h_description'), 
                                (Supplier.name, 'h_supplier_name'),
                                (Buyer.name, 'h_buyer_name'),
                                (Release.dossier, 'h_dossier'),
                                (Release.decision, 'h_decision')
        ]

        if 'q' in args and args['q'] != None and 'highlight' in args and args['highlight'] == True: 
            for (field, label) in highlighted_fields:
                query = query.add_column(func.ts_headline(app.config["FTS_LANG"], 
                           field, 
                           func.plainto_tsquery(app.config["FTS_LANG"], args['q']),
                           'HighlightAll=TRUE, StartSel="%s", StopSel = "%s"' % (app.config["START_HIGHLIGHT"], app.config["END_HIGHLIGHT"]))
                            .label(label))   
                            
        return query     
Пример #4
0
def _gen_search_elements(query,
                         title_only,
                         next_rank,
                         page_size,
                         A,
                         B=[],
                         C=[],
                         D=[]):
    """
    Given a query and four sets of fields, (A, B, C, D), generates a bunch of postgres expressions for full text search.

    The four sets are in decreasing order of "importance" for ranking.

    A should be the "title", the others can be anything.

    If title_only=True, we only perform a trigram search against A only
    """
    if not title_only:
        # a postgres tsquery object that can be used to match against a tsvector
        tsq = func.websearch_to_tsquery(REGCONFIG, query)

        # the tsvector object that we want to search against with our tsquery
        tsv = _build_tsv(A, B, C, D)

        # document to generate snippet from
        doc = _build_doc(A, B, C, D)

        title = _build_doc(A)

        # trigram based text similarity between title and query string
        sim = _similarity(query, title)

        # ranking algo, weigh the similarity a lot, the text-based ranking less
        rank = (TRI_SIMILARITY_WEIGHT * sim +
                func.ts_rank_cd(tsv, tsq)).label("rank")

        # the snippet with results highlighted
        snippet = func.ts_headline(REGCONFIG, doc, tsq,
                                   "StartSel=**,StopSel=**").label("snippet")

        def do_search_query(orig_query):
            """
            Does the right search filtering, limiting, and ordering for the query
            """
            return (orig_query.filter(
                or_(tsv.op("@@")(tsq), sim > TRI_SIMILARITY_THRESHOLD)).filter(
                    rank <= next_rank if next_rank is not None else True).
                    order_by(rank.desc()).limit(page_size + 1).all())

    else:
        title = _build_doc(A)

        # trigram based text similarity between title and query string
        sim = _similarity(query, title)

        # ranking algo, weigh the similarity a lot, the text-based ranking less
        rank = sim.label("rank")

        # used only for headline
        tsq = func.websearch_to_tsquery(REGCONFIG, query)
        doc = _build_doc(A, B, C, D)

        # the snippet with results highlighted
        snippet = func.ts_headline(REGCONFIG, doc, tsq,
                                   "StartSel=**,StopSel=**").label("snippet")

        def do_search_query(orig_query):
            """
            Does the right search filtering, limiting, and ordering for the query
            """
            return (orig_query.filter(sim > TRI_SIMILARITY_THRESHOLD).filter(
                rank <= next_rank if next_rank is not None else True).order_by(
                    rank.desc()).limit(page_size + 1).all())

    return rank, snippet, do_search_query
Пример #5
0
def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(
        Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(Supplier.summary, ' ',
                            Supplier.data['tools'].astext, ' ',
                            Supplier.data['methodologies'].astext, ' ',
                            Supplier.data['technologies'].astext, ''), tsquery,
                'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
            ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.filter(SupplierDomain.status == 'assessed')
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(
                cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get(
            'data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)
Пример #6
0
def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(
        request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(
        Supplier.status != 'deleted',
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(CaseStudy.data['approach'].astext, ' ',
                            CaseStudy.data['role'].astext), tsquery,
                'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
            ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [
            desc(
                func.ts_rank_cd(
                    func.to_tsvector(
                        func.concat(
                            Supplier.name, CaseStudy.data['title'].astext,
                            CaseStudy.data['approach'].astext)), tsquery))
        ] + ob

        condition = func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{
                '_source': r
            } for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response
def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
                 or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(Supplier.summary,
                         ' ',
                         Supplier.data['tools'].astext,
                         ' ',
                         Supplier.data['methodologies'].astext,
                         ' ',
                         Supplier.data['technologies'].astext, ''),
            tsquery,
            'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
        ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)
def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(
                CaseStudy.data['approach'].astext,
                ' ',
                CaseStudy.data['role'].astext),
            tsquery,
            'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
        ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [desc(func.ts_rank_cd(func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)), tsquery))] + ob

        condition = func.to_tsvector(func.concat(Supplier.name,
                                                 CaseStudy.data['title'].astext,
                                                 CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{'_source': r} for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response