示例#1
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    i = 0
    ret = {}
    for index in ['terms', 'concepts']:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == 'terms':
            boolquery.filter(Terms(field='provisional', terms=['false']))

        query.add_query(boolquery)
        base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
        top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
        conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
        max_score_agg = MaxAgg(name='max_score', script='_score')

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        for result in results['aggregations']['value_agg']['buckets']:
            if len(result['top_concept']['buckets']) > 0:
                for top_concept in result['top_concept']['buckets']:
                    top_concept_id = top_concept['key']
                    top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                    for concept in top_concept['conceptid']['buckets']:
                        ret[index].append({
                            'type': 'concept',
                            'context': top_concept_id,
                            'context_label': top_concept_label,
                            'id': i,
                            'text': result['key'],
                            'value': concept['key']
                        })
                    i = i + 1
            else:
                ret[index].append({
                    'type': 'term',
                    'context': '',
                    'context_label': get_resource_model_label(result),
                    'id': i,
                    'text': result['key'],
                    'value': result['key']
                })
                i = i + 1

    return JSONResponse(ret)
示例#2
0
文件: search.py 项目: tavitm/arches
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)

    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)

    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': '',
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
示例#3
0
def search_terms(request):
    lang = request.GET.get("lang", settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get("q", "")
    user_is_reviewer = user_is_resource_reviewer(request.user)

    i = 0
    ret = {}
    for index in ["terms", "concepts"]:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(
            Match(field="value",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  fuzziness="AUTO",
                  prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == "terms":
            boolquery.filter(Terms(field="provisional", terms=["false"]))

        query.add_query(boolquery)
        base_agg = Aggregation(name="value_agg",
                               type="terms",
                               field="value.raw",
                               size=settings.SEARCH_DROPDOWN_LENGTH,
                               order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name="nodegroupid",
                                      type="terms",
                                      field="nodegroupid")
        top_concept_agg = Aggregation(name="top_concept",
                                      type="terms",
                                      field="top_concept")
        conceptid_agg = Aggregation(name="conceptid",
                                    type="terms",
                                    field="conceptid")
        max_score_agg = MaxAgg(name="max_score", script="_score")

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        if results is not None:
            for result in results["aggregations"]["value_agg"]["buckets"]:
                if len(result["top_concept"]["buckets"]) > 0:
                    for top_concept in result["top_concept"]["buckets"]:
                        top_concept_id = top_concept["key"]
                        top_concept_label = get_preflabel_from_conceptid(
                            top_concept["key"], lang)["value"]
                        for concept in top_concept["conceptid"]["buckets"]:
                            ret[index].append({
                                "type": "concept",
                                "context": top_concept_id,
                                "context_label": top_concept_label,
                                "id": i,
                                "text": result["key"],
                                "value": concept["key"],
                            })
                        i = i + 1
                else:
                    ret[index].append({
                        "type":
                        "term",
                        "context":
                        "",
                        "context_label":
                        get_resource_model_label(result),
                        "id":
                        i,
                        "text":
                        result["key"],
                        "value":
                        result["key"],
                    })
                    i = i + 1

    return JSONResponse(ret)
示例#4
0
文件: search.py 项目: k-int/arches
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    nested_agg = NestedAgg(path='dates', name='min_max_agg')
    nested_agg.add_aggregation(MinAgg(field='dates.date'))
    nested_agg.add_aggregation(MaxAgg(field='dates.date'))
    query.add_aggregation(nested_agg)
    results = query.search(index='resource')

    if results is not None and results['aggregations']['min_max_agg'][
            'min_dates.date']['value'] is not None and results['aggregations'][
                'min_max_agg']['max_dates.date']['value'] is not None:
        min_date = int(results['aggregations']['min_max_agg']['min_dates.date']
                       ['value']) / 10000
        max_date = int(results['aggregations']['min_max_agg']['max_dates.date']
                       ['value']) / 10000
        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date) /
                             1000) * -1000 if min_date < 0 else math.floor(
                                 min_date / 1000) * 1000
        max_date = math.floor(math.fabs(max_date) /
                              1000) * -1000 if max_date < 0 else math.ceil(
                                  max_date / 1000) * 1000
        query = Query(se, limit=0)
        range_lookup = {}

        def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))
            date_ranges_query = Bool()
            date_ranges_query.filter(
                Range(field='date_ranges.date_range',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))
            wrapper_query = Bool()
            wrapper_query.should(
                Nested(path='date_ranges', query=date_ranges_query))
            wrapper_query.should(Nested(path='dates', query=date_query))
            return wrapper_query

        for millennium in range(int(min_date), int(max_date) + 1000, 1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_name = "Millennium (%s - %s)" % (min_millenium,
                                                       max_millenium)
            mill_boolquery = gen_range_agg(
                gte=ExtendedDateFormat(min_millenium).lower,
                lte=ExtendedDateFormat(max_millenium).lower,
                permitted_nodegroups=get_permitted_nodegroups(request.user))
            millenium_agg = FiltersAgg(name=millenium_name)
            millenium_agg.add_filter(mill_boolquery)
            range_lookup[millenium_name] = [min_millenium, max_millenium]

            for century in range(min_millenium, max_millenium, 100):
                min_century = century
                max_century = century + 100
                century_name = "Century (%s - %s)" % (min_century, max_century)
                cent_boolquery = gen_range_agg(
                    gte=ExtendedDateFormat(min_century).lower,
                    lte=ExtendedDateFormat(max_century).lower)
                century_agg = FiltersAgg(name=century_name)
                century_agg.add_filter(cent_boolquery)
                millenium_agg.add_aggregation(century_agg)
                range_lookup[century_name] = [min_century, max_century]

                for decade in range(min_century, max_century, 10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_name = "Decade (%s - %s)" % (min_decade, max_decade)
                    dec_boolquery = gen_range_agg(
                        gte=ExtendedDateFormat(min_decade).lower,
                        lte=ExtendedDateFormat(max_decade).lower)
                    decade_agg = FiltersAgg(name=decade_name)
                    decade_agg.add_filter(dec_boolquery)
                    century_agg.add_aggregation(decade_agg)
                    range_lookup[decade_name] = [min_decade, max_decade]

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        results = {'buckets': [query.search(index='resource')['aggregations']]}
        results_with_ranges = appendDateRanges(results, range_lookup)
        transformESAggToD3Hierarchy(results_with_ranges, root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(
            _('Error retrieving the time wheel config'))
示例#5
0
    def time_wheel_config(self, user):
        se = SearchEngineFactory().create()
        query = Query(se, limit=0)
        nested_agg = NestedAgg(path="dates", name="min_max_agg")
        nested_agg.add_aggregation(MinAgg(field="dates.date"))
        nested_agg.add_aggregation(MaxAgg(field="dates.date"))
        query.add_aggregation(nested_agg)
        results = query.search(index=RESOURCES_INDEX)

        if (results is not None and results["aggregations"]["min_max_agg"]
            ["min_dates.date"]["value"] is not None and results["aggregations"]
            ["min_max_agg"]["max_dates.date"]["value"] is not None):
            min_date = int(results["aggregations"]["min_max_agg"]
                           ["min_dates.date"]["value"]) / 10000
            max_date = int(results["aggregations"]["min_max_agg"]
                           ["max_dates.date"]["value"]) / 10000
            # round min and max date to the nearest 1000 years
            min_date = math.ceil(math.fabs(min_date) /
                                 1000) * -1000 if min_date < 0 else math.floor(
                                     min_date / 1000) * 1000
            max_date = math.floor(math.fabs(max_date) /
                                  1000) * -1000 if max_date < 0 else math.ceil(
                                      max_date / 1000) * 1000
            query = Query(se, limit=0)
            range_lookup = {}

            def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
                date_query = Bool()
                date_query.filter(
                    Range(field="dates.date",
                          gte=gte,
                          lte=lte,
                          relation="intersects"))
                if permitted_nodegroups is not None:
                    date_query.filter(
                        Terms(field="dates.nodegroup_id",
                              terms=permitted_nodegroups))
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field="date_ranges.date_range",
                          gte=gte,
                          lte=lte,
                          relation="intersects"))
                if permitted_nodegroups is not None:
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))
                wrapper_query = Bool()
                wrapper_query.should(
                    Nested(path="date_ranges", query=date_ranges_query))
                wrapper_query.should(Nested(path="dates", query=date_query))
                return wrapper_query

            date_tiers = {
                "name": "Millennium",
                "interval": 1000,
                "root": True,
                "child": {
                    "name": "Century",
                    "interval": 100,
                    "child": {
                        "name": "Decade",
                        "interval": 10
                    }
                },
            }

            if abs(int(min_date) - int(max_date)) > 1000:
                date_tiers = {
                    "name": "Millennium",
                    "interval": 1000,
                    "root": True,
                    "child": {
                        "name": "Half-millennium",
                        "interval": 500,
                        "child": {
                            "name": "Century",
                            "interval": 100
                        }
                    },
                }

            if settings.TIMEWHEEL_DATE_TIERS is not None:
                date_tiers = settings.TIMEWHEEL_DATE_TIERS

            def add_date_tier(date_tier,
                              low_date,
                              high_date,
                              previous_period_agg=None):
                interval = date_tier["interval"]
                name = date_tier["name"]
                within_range = True
                if "root" in date_tier:
                    high_date = int(high_date) + interval
                for period in range(int(low_date), int(high_date), interval):
                    min_period = period
                    max_period = period + interval
                    if "range" in date_tier:
                        within_range = min_period >= date_tier["range"][
                            "min"] and max_period <= date_tier["range"]["max"]
                    period_name = "{0} ({1} - {2})".format(
                        name, min_period, max_period)
                    nodegroups = self.get_permitted_nodegroups(
                        user) if "root" in date_tier else None
                    period_boolquery = gen_range_agg(
                        gte=ExtendedDateFormat(min_period).lower,
                        lte=ExtendedDateFormat(max_period).lower,
                        permitted_nodegroups=nodegroups)
                    period_agg = FiltersAgg(name=period_name)
                    period_agg.add_filter(period_boolquery)
                    if "root" not in date_tier:
                        if within_range is True:
                            previous_period_agg.add_aggregation(period_agg)
                    range_lookup[period_name] = [min_period, max_period]
                    if "child" in date_tier:
                        add_date_tier(date_tier["child"], min_period,
                                      max_period, period_agg)
                    if "root" in date_tier:
                        query.add_aggregation(period_agg)

            add_date_tier(date_tiers, min_date, max_date)

            root = d3Item(name="root")
            results = {
                "buckets":
                [query.search(index=RESOURCES_INDEX)["aggregations"]]
            }
            results_with_ranges = self.appendDateRanges(results, range_lookup)
            self.transformESAggToD3Hierarchy(results_with_ranges, root)

            # calculate total number of docs
            for child in root.children:
                root.size = root.size + child.size

            if user.username in settings.CACHE_BY_USER:
                key = "time_wheel_config_{0}".format(user.username)
                cache.set(key, root, settings.CACHE_BY_USER[user.username])

            return root
示例#6
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    query.add_aggregation(MinAgg(field='dates', format='y'))
    query.add_aggregation(MaxAgg(field='dates', format='y'))
    results = query.search(index='resource')
    if results is not None and results['aggregations']['min_dates'][
            'value'] is not None and results['aggregations']['max_dates'][
                'value'] is not None:
        min_date = int(results['aggregations']['min_dates']['value_as_string'])
        max_date = int(results['aggregations']['max_dates']['value_as_string'])

        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date) /
                             1000) * -1000 if min_date < 0 else math.floor(
                                 min_date / 1000) * 1000
        max_date = math.floor(math.fabs(max_date) /
                              1000) * -1000 if max_date < 0 else math.ceil(
                                  max_date / 1000) * 1000

        query = Query(se, limit=0)
        for millennium in range(int(min_date), int(max_date) + 1000, 1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_agg = DateRangeAgg(name="Millennium (%s-%s)" %
                                         (min_millenium, max_millenium),
                                         field='dates',
                                         format='y',
                                         min_date=str(min_millenium),
                                         max_date=str(max_millenium))

            for century in range(min_millenium, max_millenium, 100):
                min_century = century
                max_century = century + 100
                century_aggregation = DateRangeAgg(name="Century (%s-%s)" %
                                                   (min_century, max_century),
                                                   field='dates',
                                                   format='y',
                                                   min_date=str(min_century),
                                                   max_date=str(max_century))
                millenium_agg.add_aggregation(century_aggregation)

                for decade in range(min_century, max_century, 10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_aggregation = DateRangeAgg(name="Decade (%s-%s)" %
                                                      (min_decade, max_decade),
                                                      field='dates',
                                                      format='y',
                                                      min_date=str(min_decade),
                                                      max_date=str(max_decade))
                    century_aggregation.add_aggregation(decade_aggregation)

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        transformESAggToD3Hierarchy(
            {'buckets': [query.search(index='resource')['aggregations']]},
            root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(
            _('Error retrieving the time wheel config'))