Exemplo n.º 1
0
def tag_search(text, count=5):
    """Search in the tag_search_items table (for location tags)."""
    QTag = aliased(LocationTag)
    QParent = aliased(LocationTag)
    text = text.lower().strip()
    query = meta.Session.query(TagSearchItem)\
        .join(QTag)\
        .outerjoin((QParent, QParent.id==QTag.parent_id))\
        .filter(TagSearchItem.terms.op('@@')(func.plainto_tsquery(text)))\
        .order_by(or_(func.lower(func.btrim(QParent.title)) == text, func.lower(func.btrim(QParent.title_short)) == text).desc())\
        .order_by(or_(func.lower(func.btrim(QTag.title)) == text, func.lower(func.btrim(QTag.title_short)) == text).desc())\
        .order_by(func.ts_rank_cd(TagSearchItem.terms, func.plainto_tsquery(text)))
    if count is not None:
        query = query.limit(count)
    return query.all()
Exemplo n.º 2
0
def search_foods(search_query):
    query = FoodDetail.query.filter(
        func.to_tsvector("english",
                         FoodDetail.food_desc).op("@@")(func.plainto_tsquery(
                             "english", search_query)))
    foods = [food.search_result_view() for food in query.all()]
    return foods
Exemplo n.º 3
0
    def index(self):
        current_page = 'page' in request.params \
            and int(request.params['page']) or 1
        q = request.params['q']

        c.results = webhelpers.paginate.Page(
            Session.query(Post, Topic, User). \
                options(orm.joinedload(Post.avatar)). \
                filter(Topic.id==Post.topic_id). \
                filter(User.id==Post.user_id). \

                filter('tsv @@ plainto_tsquery(:terms)'). \
                params(terms=q). \

                add_column(func.ts_headline('pg_catalog.simple',
                                            Post.body,
                                            func.plainto_tsquery(q),
                                            'HighlightAll=TRUE, ' \
                                            'StartSel=<strong>, ' \
                                            'StopSel=</strong>')). \

                order_by(Post.created_at.desc()),
            page=current_page,
            items_per_page=40,
            url=webhelpers.paginate.PageURL_WebOb(request))

        c.title = 'rezultati iskanja'
        return render('/search.mako')
Exemplo n.º 4
0
def construct_fulltext_query_and_rank(searched):
    # Text version of the text search query directly constructed from
    # the searched string
    simple_ts_query_text = cast(func.plainto_tsquery('english', searched),
                                TEXT)
    # Text version of the search query using prefix search for the last
    # word in the base query
    # Case expression is necessary to capture the case of empty base query
    # (arising e.g. when the query consists entirely of special characters
    # and stop words)
    prefix_ts_query_text = case([(simple_ts_query_text == "", cast("", TEXT))],
                                else_=simple_ts_query_text.op('||')(cast(
                                    ":*", TEXT)))
    # Final text search query
    ts_query = func.to_tsquery('english', prefix_ts_query_text)
    # Rank for each search result
    ts_rank = func.ts_rank_cd(Post.__ts_vector__, ts_query).label("rank")
    return (ts_query, ts_rank)
Exemplo n.º 5
0
    def highlight_request (self, query, args):

        highlighted_fields = [(Release.description, 'h_description'), 
                                (Supplier.name, 'h_supplier_name'),
                                (Buyer.name, 'h_buyer_name'),
                                (Release.dossier, 'h_dossier'),
                                (Release.decision, 'h_decision')
        ]

        if 'q' in args and args['q'] != None and 'highlight' in args and args['highlight'] == True: 
            for (field, label) in highlighted_fields:
                query = query.add_column(func.ts_headline(app.config["FTS_LANG"], 
                           field, 
                           func.plainto_tsquery(app.config["FTS_LANG"], args['q']),
                           'HighlightAll=TRUE, StartSel="%s", StopSel = "%s"' % (app.config["START_HIGHLIGHT"], app.config["END_HIGHLIGHT"]))
                            .label(label))   
                            
        return query     
Exemplo n.º 6
0
    def get(self):
        """
        Searches and returns a list of items.
        """
        args = p.pub_search_args.parse_args(request)

        publisher_id = args.get('publisher_id')
        q = args.get('q')
        bbox = args.get('bbox')
        time = args.get('time')
        page = args.get('page')
        per_page = args.get('per_page')

        # Initialize items query
        items = Item.query

        # Search by publisher
        if publisher_id:
            items = items.filter(Item.publisher_id == publisher_id)

        # Add filtering (optional)
        if q and q.strip() != "":
            items = items.filter(
                Item.ts_vector.op('@@')(func.plainto_tsquery(q)))

        # Spatial query
        if bbox:
            xmin, ymin, xmax, ymax = bbox.split(',')
            bbox_polygon = 'POLYGON (({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1} ))'.format(
                xmin, ymin, xmax, ymax)
            items = items.filter(
                Item.geographic_location.ST_Within(bbox_polygon))

        # Time extent
        if time:
            time_start, time_end = time.split('/')
            items = items.filter(Item.date_start >= time_start).filter(
                Item.date_end <= time_end)

        # Add pagination
        result = items.paginate(page, per_page, error_out=False)

        items_geojson = []

        # Post-process to get item_geojson
        if result.items:
            for i in result.items:
                items_geojson.append(i.item_geojson)
            result.items = items_geojson
            return {
                'result':  marshal(result, page_of_items),
                'success': True,
                'message': {
                    'code': 200,
                    'description': 'Items found'
                }
            }, 200
        else:
            return {
                'success': False,
                'message': {
                    'code': 404,
                    'description': 'No items found for this query'
                }
            }, 404
Exemplo n.º 7
0
def search_mv_foods(search_query):
    query = TotalNutritionView.query.filter(
        func.to_tsvector("english", TotalNutritionView.food_desc).op("@@")(
            func.plainto_tsquery("english", search_query)))
    foods = [food.search_result_view() for food in query.all()]
    return foods
Exemplo n.º 8
0
 def search_all(query):
     rank = func.ts_rank(IndexLine.weighted, func.plainto_tsquery(query))
     maxrank = func.max(rank).label('maxrank')
     stmt = s.query(IndexLine.page_id, maxrank).group_by(IndexLine.almanac_id, IndexLine.page_id).filter(rank > 0).subquery()
     return s.query(Page).join(stmt).filter(Page.published==True).order_by(stmt.c.maxrank.desc())
Exemplo n.º 9
0
def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(
        Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(Supplier.summary, ' ',
                            Supplier.data['tools'].astext, ' ',
                            Supplier.data['methodologies'].astext, ' ',
                            Supplier.data['technologies'].astext, ''), tsquery,
                'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
            ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.filter(SupplierDomain.status == 'assessed')
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(
                cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get(
            'data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)
Exemplo n.º 10
0
def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(
        request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(
        Supplier.status != 'deleted',
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(CaseStudy.data['approach'].astext, ' ',
                            CaseStudy.data['role'].astext), tsquery,
                'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
            ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [
            desc(
                func.ts_rank_cd(
                    func.to_tsvector(
                        func.concat(
                            Supplier.name, CaseStudy.data['title'].astext,
                            CaseStudy.data['approach'].astext)), tsquery))
        ] + ob

        condition = func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{
                '_source': r
            } for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response
Exemplo n.º 11
0
def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
                 or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(Supplier.summary,
                         ' ',
                         Supplier.data['tools'].astext,
                         ' ',
                         Supplier.data['methodologies'].astext,
                         ' ',
                         Supplier.data['technologies'].astext, ''),
            tsquery,
            'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
        ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)
Exemplo n.º 12
0
def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(
                CaseStudy.data['approach'].astext,
                ' ',
                CaseStudy.data['role'].astext),
            tsquery,
            'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
        ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [desc(func.ts_rank_cd(func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)), tsquery))] + ob

        condition = func.to_tsvector(func.concat(Supplier.name,
                                                 CaseStudy.data['title'].astext,
                                                 CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{'_source': r} for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response