Пример #1
0
 def append_to_document(self, document, nodevalue, nodeid, tile, provisional=False):
     try:
         assert isinstance(nodevalue, (list, tuple))  # assert nodevalue is an array
     except AssertionError:
         nodevalue = [nodevalue]
     for valueid in nodevalue:
         value = self.get_value(valueid)
         date_range = self.get_concept_dates(value.concept)
         if date_range is not None:
             min_date = ExtendedDateFormat(date_range["min_year"]).lower
             max_date = ExtendedDateFormat(date_range["max_year"]).upper
             if {"gte": min_date, "lte": max_date} not in document["date_ranges"]:
                 document["date_ranges"].append(
                     {"date_range": {"gte": min_date, "lte": max_date}, "nodegroup_id": tile.nodegroup_id, "provisional": provisional}
                 )
         document["domains"].append(
             {
                 "label": value.value,
                 "conceptid": value.concept_id,
                 "valueid": valueid,
                 "nodegroup_id": tile.nodegroup_id,
                 "provisional": provisional,
             }
         )
         document["strings"].append({"string": value.value, "nodegroup_id": tile.nodegroup_id, "provisional": provisional})
Пример #2
0
    def parse(self, test_case):
        i = test_case[0]
        if isinstance(i, tuple):
            i, o = i
        else:
            o = i

        print("parsing '%s'" % i)
        f = ExtendedDateFormat(i)

        if len(test_case) == 2:
            if f.result_set:
                for i, item in enumerate(f.result_set):
                    self.assertEqual(str(item.lower), test_case[1][i][0])
                    self.assertEqual(str(item.upper), test_case[1][i][1])
            else:
                self.assertEqual(str(f.lower), test_case[1])
        elif len(test_case) == 3:
            self.assertEqual(str(f.lower), test_case[1])
            self.assertEqual(str(f.upper), test_case[2])
        elif len(test_case) == 4:
            self.assertEqual(str(f.lower), test_case[1])
            self.assertEqual(str(f.upper), test_case[1])
            self.assertEqual(str(f.lower_fuzzy), test_case[2])
            self.assertEqual(str(f.upper_fuzzy), test_case[3])
        elif len(test_case) == 5:
            self.assertEqual(str(f.lower), test_case[1])
            self.assertEqual(str(f.upper), test_case[2])
            self.assertEqual(str(f.lower_fuzzy), test_case[3])
            self.assertEqual(str(f.upper_fuzzy), test_case[4])
        elif len(test_case) == 1:
            self.assertEqual(f.is_valid(), False)
        else:
            raise Exception(test_case)
Пример #3
0
 def add_date_tier(date_tier,
                   low_date,
                   high_date,
                   previous_period_agg=None):
     interval = date_tier["interval"]
     name = date_tier["name"]
     within_range = True
     if "root" in date_tier:
         high_date = int(high_date) + interval
     for period in range(int(low_date), int(high_date), interval):
         min_period = period
         max_period = period + interval
         if "range" in date_tier:
             within_range = min_period >= date_tier["range"][
                 "min"] and max_period <= date_tier["range"]["max"]
         period_name = "{0} ({1} - {2})".format(
             name, min_period, max_period)
         nodegroups = self.get_permitted_nodegroups(
             user) if "root" in date_tier else None
         period_boolquery = gen_range_agg(
             gte=ExtendedDateFormat(min_period).lower,
             lte=ExtendedDateFormat(max_period).lower,
             permitted_nodegroups=nodegroups)
         period_agg = FiltersAgg(name=period_name)
         period_agg.add_filter(period_boolquery)
         if "root" not in date_tier:
             if within_range is True:
                 previous_period_agg.add_aggregation(period_agg)
         range_lookup[period_name] = [min_period, max_period]
         if "child" in date_tier:
             add_date_tier(date_tier["child"], min_period,
                           max_period, period_agg)
         if "root" in date_tier:
             query.add_aggregation(period_agg)
Пример #4
0
 def append_to_document(self, document, nodevalue, nodeid, tile, provisional=False):
     try:
         assert isinstance(nodevalue, (list, tuple)) #assert nodevalue is an array
     except AssertionError:
         nodevalue = [nodevalue]
     for valueid in nodevalue:
         value = self.get_value(valueid)
         date_range = self.get_concept_dates(value.concept)
         if date_range is not None:
             min_date = ExtendedDateFormat(date_range['min_year']).lower
             max_date = ExtendedDateFormat(date_range['max_year']).upper
             if {'gte': min_date, 'lte': max_date} not in document['date_ranges']:
                 document['date_ranges'].append({'date_range': {'gte': min_date, 'lte': max_date}, 'nodegroup_id': tile.nodegroup_id, 'provisional': provisional})
         document['domains'].append({'label': value.value, 'conceptid': value.concept_id, 'valueid': valueid, 'nodegroup_id': tile.nodegroup_id, 'provisional': provisional})
         document['strings'].append({'string': value.value, 'nodegroup_id': tile.nodegroup_id, 'provisional': provisional})
Пример #5
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    nested_agg = NestedAgg(path='dates', name='min_max_agg')
    nested_agg.add_aggregation(MinAgg(field='dates.date'))
    nested_agg.add_aggregation(MaxAgg(field='dates.date'))
    query.add_aggregation(nested_agg)
    results = query.search(index='resource')

    if results is not None and results['aggregations']['min_max_agg'][
            'min_dates.date']['value'] is not None and results['aggregations'][
                'min_max_agg']['max_dates.date']['value'] is not None:
        min_date = int(results['aggregations']['min_max_agg']['min_dates.date']
                       ['value']) / 10000
        max_date = int(results['aggregations']['min_max_agg']['max_dates.date']
                       ['value']) / 10000
        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date) /
                             1000) * -1000 if min_date < 0 else math.floor(
                                 min_date / 1000) * 1000
        max_date = math.floor(math.fabs(max_date) /
                              1000) * -1000 if max_date < 0 else math.ceil(
                                  max_date / 1000) * 1000
        query = Query(se, limit=0)
        range_lookup = {}

        def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))
            date_ranges_query = Bool()
            date_ranges_query.filter(
                Range(field='date_ranges.date_range',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))
            wrapper_query = Bool()
            wrapper_query.should(
                Nested(path='date_ranges', query=date_ranges_query))
            wrapper_query.should(Nested(path='dates', query=date_query))
            return wrapper_query

        for millennium in range(int(min_date), int(max_date) + 1000, 1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_name = "Millennium (%s - %s)" % (min_millenium,
                                                       max_millenium)
            mill_boolquery = gen_range_agg(
                gte=ExtendedDateFormat(min_millenium).lower,
                lte=ExtendedDateFormat(max_millenium).lower,
                permitted_nodegroups=get_permitted_nodegroups(request.user))
            millenium_agg = FiltersAgg(name=millenium_name)
            millenium_agg.add_filter(mill_boolquery)
            range_lookup[millenium_name] = [min_millenium, max_millenium]

            for century in range(min_millenium, max_millenium, 100):
                min_century = century
                max_century = century + 100
                century_name = "Century (%s - %s)" % (min_century, max_century)
                cent_boolquery = gen_range_agg(
                    gte=ExtendedDateFormat(min_century).lower,
                    lte=ExtendedDateFormat(max_century).lower)
                century_agg = FiltersAgg(name=century_name)
                century_agg.add_filter(cent_boolquery)
                millenium_agg.add_aggregation(century_agg)
                range_lookup[century_name] = [min_century, max_century]

                for decade in range(min_century, max_century, 10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_name = "Decade (%s - %s)" % (min_decade, max_decade)
                    dec_boolquery = gen_range_agg(
                        gte=ExtendedDateFormat(min_decade).lower,
                        lte=ExtendedDateFormat(max_decade).lower)
                    decade_agg = FiltersAgg(name=decade_name)
                    decade_agg.add_filter(dec_boolquery)
                    century_agg.add_aggregation(decade_agg)
                    range_lookup[decade_name] = [min_decade, max_decade]

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        results = {'buckets': [query.search(index='resource')['aggregations']]}
        results_with_ranges = appendDateRanges(results, range_lookup)
        transformESAggToD3Hierarchy(results_with_ranges, root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(
            _('Error retrieving the time wheel config'))
Пример #6
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
Пример #7
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")
        temporal_filter = JSONDeserializer().deserialize(querysting_params)
        if "fromDate" in temporal_filter and "toDate" in temporal_filter:
            # now = str(datetime.utcnow())
            start_date = ExtendedDateFormat(temporal_filter["fromDate"])
            end_date = ExtendedDateFormat(temporal_filter["toDate"])
            date_nodeid = (str(temporal_filter["dateNodeId"])
                           if "dateNodeId" in temporal_filter
                           and temporal_filter["dateNodeId"] != "" else None)
            query_inverted = False if "inverted" not in temporal_filter else temporal_filter[
                "inverted"]

            temporal_query = Bool()

            if query_inverted:
                # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
                # eg: less than START_DATE OR greater than END_DATE
                inverted_date_query = Bool()
                inverted_date_ranges_query = Bool()

                if start_date.is_valid():
                    inverted_date_query.should(
                        Range(field="dates.date", lt=start_date.lower))
                    inverted_date_ranges_query.should(
                        Range(field="date_ranges.date_range",
                              lt=start_date.lower))
                if end_date.is_valid():
                    inverted_date_query.should(
                        Range(field="dates.date", gt=end_date.upper))
                    inverted_date_ranges_query.should(
                        Range(field="date_ranges.date_range",
                              gt=end_date.upper))

                date_query = Bool()
                date_query.filter(inverted_date_query)
                date_query.filter(
                    Terms(field="dates.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["false"]))

                elif include_provisional == "only provisional":
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["true"]))

                if date_nodeid:
                    date_query.filter(
                        Term(field="dates.nodeid", term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(inverted_date_ranges_query)
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["false"]))

                    elif include_provisional == "only provisional":
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["true"]))

                    temporal_query.should(
                        Nested(path="date_ranges", query=date_ranges_query))
                temporal_query.should(Nested(path="dates", query=date_query))

            else:
                date_query = Bool()
                date_query.filter(
                    Range(field="dates.date",
                          gte=start_date.lower,
                          lte=end_date.upper))
                date_query.filter(
                    Terms(field="dates.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["false"]))
                elif include_provisional == "only provisional":
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["true"]))

                if date_nodeid:
                    date_query.filter(
                        Term(field="dates.nodeid", term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(
                        Range(field="date_ranges.date_range",
                              gte=start_date.lower,
                              lte=end_date.upper,
                              relation="intersects"))
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["false"]))
                    if include_provisional == "only provisional":
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["true"]))

                    temporal_query.should(
                        Nested(path="date_ranges", query=date_ranges_query))
                temporal_query.should(Nested(path="dates", query=date_query))

            search_query.filter(temporal_query)

            search_results_object["query"].add_query(search_query)
Пример #8
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        temporal_filter = JSONDeserializer().deserialize(querysting_params)
        if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
            #now = str(datetime.utcnow())
            start_date = ExtendedDateFormat(temporal_filter['fromDate'])
            end_date = ExtendedDateFormat(temporal_filter['toDate'])
            date_nodeid = str(
                temporal_filter['dateNodeId']
            ) if 'dateNodeId' in temporal_filter and temporal_filter[
                'dateNodeId'] != '' else None
            query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
                'inverted']

            temporal_query = Bool()

            if query_inverted:
                # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
                # eg: less than START_DATE OR greater than END_DATE
                inverted_date_query = Bool()
                inverted_date_ranges_query = Bool()

                if start_date.is_valid():
                    inverted_date_query.should(
                        Range(field='dates.date', lt=start_date.lower))
                    inverted_date_ranges_query.should(
                        Range(field='date_ranges.date_range',
                              lt=start_date.lower))
                if end_date.is_valid():
                    inverted_date_query.should(
                        Range(field='dates.date', gt=end_date.upper))
                    inverted_date_ranges_query.should(
                        Range(field='date_ranges.date_range',
                              gt=end_date.upper))

                date_query = Bool()
                date_query.filter(inverted_date_query)
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['false']))

                elif include_provisional == 'only provisional':
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['true']))

                if date_nodeid:
                    date_query.filter(
                        Term(field='dates.nodeid', term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(inverted_date_ranges_query)
                    date_ranges_query.filter(
                        Terms(field='date_ranges.nodegroup_id',
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['false']))

                    elif include_provisional == 'only provisional':
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['true']))

                    temporal_query.should(
                        Nested(path='date_ranges', query=date_ranges_query))
                temporal_query.should(Nested(path='dates', query=date_query))

            else:
                date_query = Bool()
                date_query.filter(
                    Range(field='dates.date',
                          gte=start_date.lower,
                          lte=end_date.upper))
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['false']))
                elif include_provisional == 'only provisional':
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['true']))

                if date_nodeid:
                    date_query.filter(
                        Term(field='dates.nodeid', term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(
                        Range(field='date_ranges.date_range',
                              gte=start_date.lower,
                              lte=end_date.upper,
                              relation='intersects'))
                    date_ranges_query.filter(
                        Terms(field='date_ranges.nodegroup_id',
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['false']))
                    if include_provisional == 'only provisional':
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['true']))

                    temporal_query.should(
                        Nested(path='date_ranges', query=date_ranges_query))
                temporal_query.should(Nested(path='dates', query=date_query))

            search_query.filter(temporal_query)

            search_results_object['query'].add_query(search_query)
Пример #9
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(Terms(field='points.provisional', terms=['false','true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)


    nested_agg_filter.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(Match(field='strings.string', query=term['value'], type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix'))
                    string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(Match(field='strings.provisional', query='true', type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(Match(field='strings.provisional', query='false', type='phrase'))

                string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings', query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(Match(field='domains.provisional', query='true', type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(Match(field='domains.provisional', query='false', type='phrase'))

                nested_conceptid_filter = Nested(path='domains', query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )


            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(Terms(field='date_ranges.provisional', terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=start_date.lower, lte=end_date.upper))
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.lower, lte=end_date.upper, relation='intersects'))
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(Terms(field='date_ranges.provisional', terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query, request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer':search_buffer}