Пример #1
0
def append_instance_permission_filter_dsl(request, search_results_object):
    if request.user.is_superuser is False:
        has_access = Bool()
        terms = Terms(field="permissions.users_with_no_access",
                      terms=[str(request.user.id)])
        has_access.must_not(terms)
        search_results_object["query"].add_query(has_access)
Пример #2
0
def build_search_results_dsl(request):
    temporal_filters = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    query = build_base_search_results_dsl(request)  
    boolfilter = Bool()
    
    if 'filters' in temporal_filters:
        for temporal_filter in temporal_filters['filters']:
            terms = Terms(field='date_groups.conceptid', terms=temporal_filter['date_types__value'])
            boolfilter.must(terms)

            date_value = datetime.strptime(temporal_filter['date'], '%d/%m/%Y').isoformat()

            if temporal_filter['date_operators__value'] == '1': # equals query
                range = Range(field='date_groups.value', gte=date_value, lte=date_value)
            elif temporal_filter['date_operators__value'] == '0': # greater than query 
                range = Range(field='date_groups.value', lt=date_value)
            elif temporal_filter['date_operators__value'] == '2': # less than query
                range = Range(field='date_groups.value', gt=date_value)

            if 'inverted' not in temporal_filters:
                temporal_filters['inverted'] = False

            if temporal_filters['inverted']:
                boolfilter.must_not(range)
            else:
                boolfilter.must(range)

            query.add_filter(boolfilter)

    return query
Пример #3
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")
        spatial_filter = JSONDeserializer().deserialize(querysting_params)
        if "features" in spatial_filter:
            if len(spatial_filter["features"]) > 0:
                feature_geom = spatial_filter["features"][0]["geometry"]
                feature_properties = {}
                if "properties" in spatial_filter["features"][0]:
                    feature_properties = spatial_filter["features"][0][
                        "properties"]
                buffer = {"width": 0, "unit": "ft"}
                if "buffer" in feature_properties:
                    buffer = feature_properties["buffer"]
                search_buffer = _buffer(feature_geom, buffer["width"],
                                        buffer["unit"])
                feature_geom = JSONDeserializer().deserialize(
                    search_buffer.geojson)
                geoshape = GeoShape(field="geometries.geom.features.geometry",
                                    type=feature_geom["type"],
                                    coordinates=feature_geom["coordinates"])

                invert_spatial_search = False
                if "inverted" in feature_properties:
                    invert_spatial_search = feature_properties["inverted"]

                spatial_query = Bool()
                if invert_spatial_search is True:
                    spatial_query.must_not(geoshape)
                else:
                    spatial_query.filter(geoshape)

                # get the nodegroup_ids that the user has permission to search
                spatial_query.filter(
                    Terms(field="geometries.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    spatial_query.filter(
                        Terms(field="geometries.provisional", terms=["false"]))

                elif include_provisional == "only provisional":
                    spatial_query.filter(
                        Terms(field="geometries.provisional", terms=["true"]))

                search_query.filter(
                    Nested(path="geometries", query=spatial_query))

        search_results_object["query"].add_query(search_query)

        if details["componentname"] not in search_results_object:
            search_results_object[details["componentname"]] = {}

        try:
            search_results_object[
                details["componentname"]]["search_buffer"] = feature_geom
        except NameError:
            logger.info(_("Feature geometry is not defined"))
Пример #4
0
    def append_null_search_filters(self, value, node, query, request):
        """
        Appends the search query dsl to search for fields that have not been populated
        """
        base_query = Bool()
        base_query.filter(Terms(field="graph_id", terms=[str(node.graph_id)]))

        null_query = Bool()
        data_exists_query = Exists(field="tiles.data.%s" % (str(node.pk)))
        nested_query = Nested(path="tiles", query=data_exists_query)
        null_query.must(nested_query)
        if value["op"] == "null":
            # search for tiles that don't exist
            exists_query = Bool()
            exists_query.must_not(null_query)
            base_query.should(exists_query)

            # search for tiles that do exist, but that have null or [] as values
            func_query = Dsl()
            func_query.dsl = {
                "function_score": {
                    "min_score": 1,
                    "query": {"match_all": {}},
                    "functions": [
                        {
                            "script_score": {
                                "script": {
                                    "source": """
                                    int null_docs = 0;
                                    for(tile in params._source.tiles){
                                        if(tile.data.containsKey(params.node_id)){
                                            def val = tile.data.get(params.node_id);
                                            if (val == null || (val instanceof List && val.length==0)) {
                                                null_docs++;
                                                break;
                                            }
                                        }
                                    }
                                    return null_docs;
                                """,
                                    "lang": "painless",
                                    "params": {"node_id": "%s" % (str(node.pk))},
                                }
                            }
                        }
                    ],
                    "score_mode": "max",
                    "boost": 1,
                    "boost_mode": "replace",
                }
            }
            base_query.should(func_query)
        elif value["op"] == "not_null":
            base_query.must(null_query)
        query.must(base_query)
Пример #5
0
def build_search_results_dsl(request):
    temporal_filters = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', None))
    sorting = {
        "child_entities.label": {
            "order": "asc",
            "nested_path": "child_entities",
            "nested_filter": {
                "term": {
                    "child_entities.entitytypeid": "EAMENA_ID.E42"
                }
            }
        }
    }
    query = build_base_search_results_dsl(request)
    boolfilter = Bool()

    if 'filters' in temporal_filters:
        for temporal_filter in temporal_filters['filters']:
            date_type = ''
            date = ''
            date_operator = ''
            for node in temporal_filter['nodes']:
                if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                    date_operator = node['value']
                elif node['entitytypeid'] == 'date':
                    date = node['value']
                else:
                    date_type = node['value']

            date_value = datetime.strptime(date, '%Y-%m-%d').isoformat()

            if date_operator == '1':  # equals query
                range = Range(field='dates.value',
                              gte=date_value,
                              lte=date_value)
            elif date_operator == '0':  # greater than query
                range = Range(field='dates.value', lt=date_value)
            elif date_operator == '2':  # less than query
                range = Range(field='dates.value', gt=date_value)

            nested = Nested(path='dates', query=range)
            if 'inverted' not in temporal_filters:
                temporal_filters['inverted'] = False

            if temporal_filters['inverted']:
                boolfilter.must_not(nested)
            else:
                boolfilter.must(nested)

            query.add_filter(boolfilter)
    #  Sorting criterion added to query (AZ 08/02/17)
    query.dsl.update({'sort': sorting})

    return query
Пример #6
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        spatial_filter = JSONDeserializer().deserialize(querysting_params)
        if 'features' in spatial_filter:
            if len(spatial_filter['features']) > 0:
                feature_geom = spatial_filter['features'][0]['geometry']
                feature_properties = {}
                if 'properties' in spatial_filter['features'][0]:
                    feature_properties = spatial_filter['features'][0][
                        'properties']
                buffer = {'width': 0, 'unit': 'ft'}
                if 'buffer' in feature_properties:
                    buffer = feature_properties['buffer']
                search_buffer = _buffer(feature_geom, buffer['width'],
                                        buffer['unit'])
                feature_geom = JSONDeserializer().deserialize(
                    search_buffer.json)
                geoshape = GeoShape(field='geometries.geom.features.geometry',
                                    type=feature_geom['type'],
                                    coordinates=feature_geom['coordinates'])

                invert_spatial_search = False
                if 'inverted' in feature_properties:
                    invert_spatial_search = feature_properties['inverted']

                spatial_query = Bool()
                if invert_spatial_search is True:
                    spatial_query.must_not(geoshape)
                else:
                    spatial_query.filter(geoshape)

                # get the nodegroup_ids that the user has permission to search
                spatial_query.filter(
                    Terms(field='geometries.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    spatial_query.filter(
                        Terms(field='geometries.provisional', terms=['false']))

                elif include_provisional == 'only provisional':
                    spatial_query.filter(
                        Terms(field='geometries.provisional', terms=['true']))

                search_query.filter(
                    Nested(path='geometries', query=spatial_query))

        search_results_object['query'].add_query(search_query)

        if details['componentname'] not in search_results_object:
            search_results_object[details['componentname']] = {}
        search_results_object[
            details['componentname']]['search_buffer'] = search_buffer.geojson
Пример #7
0
 def append_null_search_filters(self, value, node, query, request):
     """
     Appends the search query dsl to search for fields that haven't been populated
     """
     base_query = Bool()
     null_query = Bool()
     data_exists_query = Exists(field="tiles.data.%s" % (str(node.pk)))
     nested_query = Nested(path="tiles", query=data_exists_query)
     null_query.must(nested_query)
     base_query.filter(Terms(field="graph_id", terms=[str(node.graph_id)]))
     if value["op"] == "null":
         base_query.must_not(null_query)
     elif value["op"] == "not_null":
         base_query.must(null_query)
     query.must(base_query)
Пример #8
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')

        for resouceTypeFilter in JSONDeserializer().deserialize(
                querysting_params):
            term = Term(field='graph_id',
                        term=str(resouceTypeFilter['graphid']))
            if resouceTypeFilter['inverted'] is True:
                search_query.must_not(term)
            else:
                search_query.must(term)

        search_results_object['query'].add_query(search_query)
Пример #9
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")

        graph_ids = []
        for resouceTypeFilter in JSONDeserializer().deserialize(
                querysting_params):
            graph_ids.append(str(resouceTypeFilter["graphid"]))

        terms = Terms(field="graph_id", terms=graph_ids)
        if resouceTypeFilter["inverted"] is True:
            search_query.must_not(terms)
        else:
            search_query.filter(terms)

        search_results_object["query"].add_query(search_query)
Пример #10
0
def add_neg_filter(query):
    """adds a boolfilter that omits any resource that is protected with a certain
conceptid, this is simply a negative test for a specific conceptid"""

    # get all the protection level conceptid
    conceptid = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE)

    # create boolfilter
    boolfilter = Bool()
    terms = Terms(field="domains.conceptid", terms=conceptid)
    nested = Nested(path="domains", query=terms)
    boolfilter.must_not(nested)

    # add filter to query
    query.add_filter(boolfilter)

    return query
Пример #11
0
def build_search_results_dsl(request):
    temporal_filters = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', None))

    query = build_base_search_results_dsl(request)
    boolfilter = Bool()

    if 'filters' in temporal_filters:
        for temporal_filter in temporal_filters['filters']:
            date_type = ''
            date = ''
            date_operator = ''
            for node in temporal_filter['nodes']:
                if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                    date_operator = node['value']
                elif node['entitytypeid'] == 'date':
                    date = node['value']
                else:
                    date_type = node['value']

            terms = Terms(field='date_groups.conceptid', terms=date_type)
            boolfilter.must(terms)

            date_value = datetime.strptime(date, '%Y-%m-%d').isoformat()

            if date_operator == '1':  # equals query
                range = Range(field='date_groups.value',
                              gte=date_value,
                              lte=date_value)
            elif date_operator == '0':  # greater than query
                range = Range(field='date_groups.value', lt=date_value)
            elif date_operator == '2':  # less than query
                range = Range(field='date_groups.value', gt=date_value)

            if 'inverted' not in temporal_filters:
                temporal_filters['inverted'] = False

            if temporal_filters['inverted']:
                boolfilter.must_not(range)
            else:
                boolfilter.must(range)

            query.add_filter(boolfilter)

    return query
Пример #12
0
def build_search_results_dsl(request):
    temporal_filters = JSONDeserializer().deserialize(request.GET.get("temporalFilter", None))

    query = build_base_search_results_dsl(request)
    boolfilter = Bool()

    if "filters" in temporal_filters:
        for temporal_filter in temporal_filters["filters"]:
            date_type = ""
            date = ""
            date_operator = ""
            for node in temporal_filter["nodes"]:
                if node["entitytypeid"] == "DATE_COMPARISON_OPERATOR.E55":
                    date_operator = node["value"]
                elif node["entitytypeid"] == "date":
                    date = node["value"]
                else:
                    date_type = node["value"]

            terms = Terms(field="date_groups.conceptid", terms=date_type)
            boolfilter.must(terms)

            date_value = datetime.strptime(date, "%Y-%m-%d").isoformat()

            if date_operator == "1":  # equals query
                range = Range(field="date_groups.value", gte=date_value, lte=date_value)
            elif date_operator == "0":  # greater than query
                range = Range(field="date_groups.value", lt=date_value)
            elif date_operator == "2":  # less than query
                range = Range(field="date_groups.value", gt=date_value)

            if "inverted" not in temporal_filters:
                temporal_filters["inverted"] = False

            if temporal_filters["inverted"]:
                boolfilter.must_not(range)
            else:
                boolfilter.must(range)

            query.add_filter(boolfilter)

    return query
Пример #13
0
    def append_search_filters(self, value, node, query, request):
        try:
            if value["op"] == "null" or value["op"] == "not_null":
                self.append_null_search_filters(value, node, query, request)
            elif value["val"] != "":
                base_query = Bool()
                base_query.filter(
                    Terms(field="graph_id", terms=[str(node.graph_id)]))
                match_query = Nested(path="tiles",
                                     query=Match(field="tiles.data.%s" %
                                                 (str(node.pk)),
                                                 type="phrase",
                                                 query=value["val"]))
                if "!" in value["op"]:
                    base_query.must_not(match_query)
                    # base_query.filter(Exists(field="tiles.data.%s" % (str(node.pk))))
                else:
                    base_query.must(match_query)
                query.must(base_query)

        except KeyError as e:
            pass
Пример #14
0
def build_search_results_dsl(request):
#    Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. 
#    Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion.

    sorting = {
		"child_entities.label":  {
			"order" : "asc",
			"nested_path": "child_entities",
			"nested_filter": {
				"term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"}
			}
		}
	}
    
    term_filter = request.GET.get('termFilter', '')
    
    
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))
    boolean_search = request.GET.get('booleanSearch', '')
    filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', ''))
    filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', ''))
    
    filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', ''))
    #Ignore first entry as it is a dummy
    filter_combine_flags = filter_combine_flags[1:]
    # filter_combine_flags = [False, True, False, False, False]
    
    # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', ''))
    # Not here yet, so put in some bogus data
    # filter_groups = [
    #     'NAME.E41',
    #     'NAME.E41',
    #     'DISTURBANCE_STATE.E3',
    #     'THREAT_STATE.E3'
    # ]
    
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    is_empty_temporal_filter = True

    # store each search term in an initially. These will be combined based on the global and/or and the optional groupings
    terms_queries = [];

    # logging.warning("-------QUERY-------")

    if term_filter != '' or not is_empty_temporal_filter:
        for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)):
            selectbox_boolfilter = Bool()
            
            groupid = filter_grouping[index]
            if not groupid == 'No group':
                # build a nested query against the nested_entities

                # build a nested query for each resource type
                for resourcetype in settings.RESOURCE_TYPE_CONFIGS().keys():
                    # trace the path from each term to the group root
                    term_paths = []
                    for term in select_box:

                        # trace path from group root to this term
                        if term['type'] == 'concept':

                            # get all the parent concepts for this value i.e. the field
                            concept_relations = models.ConceptRelations.objects.filter(conceptidto=term['value'], relationtype="member")
                            for relation in concept_relations:
                                term_parent_concept = models.Concepts.objects.get(conceptid=relation.conceptidfrom)

                                # get the steps from the root to that concept
                                if term_parent_concept.nodetype.nodetype == "Collection":
                                    term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid)
                                elif term_parent_concept.nodetype.nodetype == 'Concept':
                                    # need to get at the parent until we reach the root collection. concepts are arranged hierarchically
                                    parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member')
                                    grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom)
                                    term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid)

                                #this path begins at the root, and ends up at the node in question
                                if resourcetype in term_schema:
                                    term_path = term_schema[resourcetype]['steps']

                                    term_paths.append({
                                        'term': term,
                                        'path': term_path
                                    })
                                    break

                        elif term['type'] == 'term':

                            concept = models.Concepts.objects.get(conceptid=term['context'])
                            term_schema = Entity.get_mapping_schema_to(concept.legacyoid)
                            if resourcetype in term_schema:
                                term_path = term_schema[resourcetype]['steps']

                                term_paths.append({
                                    'term': term,
                                    'path': term_path
                                })

                        elif term['type'] == 'string':
                            term_schema = Entity.get_mapping_schema_to(groupid)
                            if resourcetype in term_schema:
                                term_path = term_schema[resourcetype]['steps']

                                term_paths.append({
                                    'term': term,
                                    'path': term_path
                                })

                    if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                        start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                        end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                        if start_date:
                            start_date = start_date.isoformat()
                        if end_date:
                            end_date = end_date.isoformat()

                        if 'inverted' not in temporal_filter[index]:
                            inverted_temporal_filter = False
                        else:
                            if temporal_filter[index]['inverted']:
                                inverted_temporal_filter = True
                            else:
                                inverted_temporal_filter = False

                        term_paths.append({
                            'term': {
                                'date_operator': '3',
                                'start_date': start_date,
                                'end_date': end_date,
                                'type': 'date',
                                'inverted': inverted_temporal_filter
                            },
                            'path': term_path
                        })


                    if 'filters' in temporal_filter[index]:
                        term_schema = Entity.get_mapping_schema_to(groupid)
                        if resourcetype in term_schema:
                            term_path = term_schema[resourcetype]['steps']

                            for temporal_filter_item in temporal_filter[index]['filters']:
                                date_type = ''
                                searchdate = ''
                                date_operator = ''
                                for node in temporal_filter_item['nodes']:
                                    if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                        date_operator = node['value']
                                    elif node['entitytypeid'] == 'date':
                                        searchdate = node['value']
                                    else:
                                        date_type = node['value']

                                date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()
                                if 'inverted' not in temporal_filter[index]:
                                    inverted_temporal_filter = False
                                else:
                                    if temporal_filter[index]['inverted']:
                                        inverted_temporal_filter = True
                                    else:
                                        inverted_temporal_filter = False

                                term_paths.append({
                                    'term': {
                                        'date_operator': date_operator,
                                        'date_value': date_value,
                                        'type': 'date',
                                        'inverted': inverted_temporal_filter
                                    },
                                    'path': term_path
                                })

                    # combine the traced path to build a nested query
                    group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities')


                    # add nested query to overall query
                    selectbox_boolfilter.should(group_query)
                
                # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2))

            else:    
                for term in select_box:
                    
                    if term['type'] == 'term':
                        entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                        boolfilter_nested = Bool()
                        boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                        boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                        nested = Nested(path='child_entities', query=boolfilter_nested)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:    
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'concept':
                        concept_ids = _get_child_concepts(term['value'])
                        terms = Terms(field='domains.conceptid', terms=concept_ids)
                        nested = Nested(path='domains', query=terms)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                    selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'string':
                        boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths
                        boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed
                        boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed
                        boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested = Nested(path='child_entities', query=boolfilter_folded)
                        boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested2 = Nested(path='domains', query=boolfilter_folded2)
                        boolquery2.should(nested)
                        boolquery2.should(nested2)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                # use boolfilter here instead of boolquery because boolquery
                                # can't be combined with other boolfilters using boolean OR
                                selectbox_boolfilter.should(boolquery2)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(boolquery2)
                            else:    
                                selectbox_boolfilter.must(boolquery2)
                            
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()
                    range = Range(field='dates.value', gte=start_date, lte=end_date)
                    nested = Nested(path='dates', query=range)
            
                    if 'inverted' not in temporal_filter[index]:
                        temporal_filter[index]['inverted'] = False

                    if temporal_filter[index]['inverted']:
                        selectbox_boolfilter.must_not(nested)
                    else:
                        selectbox_boolfilter.must(nested)
                        
                if 'filters' in temporal_filter[index]:
                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']


                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()

                        if date_operator == '1': # equals query
                            range = Range(field='dates.value', gte=date_value, lte=date_value)
                        elif date_operator == '0': # greater than query 
                            range = Range(field='dates.value', lt=date_value)
                        elif date_operator == '2': # less than query
                            range = Range(field='dates.value', gt=date_value)
                        
                        nested = Nested(path='dates', query=range)
                        if 'inverted' not in temporal_filter[index]:
                            temporal_filter[index]['inverted'] = False

                        if temporal_filter[index]['inverted']:
                            selectbox_boolfilter.must_not(nested)
                        else:
                            selectbox_boolfilter.must(nested)


            terms_queries.append(selectbox_boolfilter)
            # if not selectbox_boolfilter.empty:
            #     if boolean_search == 'or':
            #         boolfilter.should(selectbox_boolfilter)
            #     else:
            #         boolfilter.must(selectbox_boolfilter)
        
        # We now have individual query terms for each of the search components. Combine into one group now
        # Start by building a an array of groups which will be combined according to the global And/Or
        # Queries within one of these groups will be combined by the complement of the global And/Or
        # We may end up with [ [A,B], [C], [D,E] ], which would translate to either:
        #    (A || B) && C && (D || E)
        #       or
        #    (A && B) || C || (D && E)
        # for global AND or OR respectively
        
        # logging.warning("TERMS QUERIES %s", terms_queries)
        
        bool_components = [];
        
        for i, term_query in enumerate(terms_queries):
            if i is 0:
                bool_components.append([term_query])
            else:
                should_group_with_previous = filter_combine_flags[i-1]
                if should_group_with_previous:
                    bool_components[-1].append(term_query)
                else:
                    bool_components.append([term_query])
            
        # logging.warning("BOOL COMPONENTS %s", bool_components)
        # Now build the ES queries
        for bool_component in bool_components:
            if len(bool_component) is 1:
                # just combine this on its own
                q = bool_component[0]
            else:
                q = Bool()
                for sub_component in bool_component:
                    if boolean_search == 'or':
                        #apply the OPPOSITE of the global boolean operator
                        q.must(sub_component)
                    else:
                        q.should(sub_component)
                        
            # combine to the overall query according to the global boolean operator
            if boolean_search == 'or':
                boolfilter.should(q)
            else:
                boolfilter.must(q)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
    
#  Sorting criterion added to query (AZ 10/08/16)
    query.dsl.update({'sort': sorting})
    # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query)

    return query
Пример #15
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
Пример #16
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION))
    nested_agg.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
    query.add_aggregation(nested_agg)

    search_query = Bool()
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(Match(field='strings.string', query=term['value'], type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix'))
                    string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix'))

                string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings', query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups))
                nested_conceptid_filter = Nested(path='domains', query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups))
            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = SortableDate(temporal_filter['fromDate'])
        end_date = SortableDate(temporal_filter['toDate'])
        date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', lt=start_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.as_float()))
            if end_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', gt=end_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.as_float()))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=start_date.as_float(), lte=end_date.as_float()))
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.as_float(), lte=end_date.as_float(), relation='intersects'))
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))


        search_query.filter(temporal_query)
        #print search_query.dsl

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query, request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson
    return {'query': query, 'search_buffer':search_buffer}
Пример #17
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Bool()
                term_filter.must(
                    Match(field='strings', query=term['value'], type='phrase'))
                if term['inverted']:
                    boolfilter.must_not(term_filter)
                else:
                    boolfilter.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid',
                                         terms=concept_ids)
                if term['inverted']:
                    boolfilter.must_not(conceptid_filter)
                else:
                    boolfilter.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(
                    Match(field='strings',
                          query=term['value'],
                          type='phrase_prefix'))
                string_filter.should(
                    Match(field='strings.folded',
                          query=term['value'],
                          type='phrase_prefix'))
                if term['inverted']:
                    boolfilter.must_not(string_filter)
                else:
                    boolfilter.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(
                _buffer(feature_geom, buffer['width'], buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                boolfilter.must_not(geoshape)
            else:
                boolfilter.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        start_date = None
        end_date = None
        try:
            start_date = parser.parse(temporal_filter['fromDate'])
            start_date = start_date.isoformat()
        except:
            pass
        try:
            end_date = parser.parse(temporal_filter['toDate'])
            end_date = end_date.isoformat()
        except:
            pass

        if 'dateNodeId' in temporal_filter and temporal_filter[
                'dateNodeId'] != '':
            range = Range(field='tiles.data.%s' %
                          (temporal_filter['dateNodeId']),
                          gte=start_date,
                          lte=end_date)
            time_query_dsl = Nested(path='tiles', query=range)
        else:
            time_query_dsl = Range(field='dates', gte=start_date, lte=end_date)

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(time_query_dsl)
        else:
            boolfilter.must(time_query_dsl)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Пример #18
0
def build_base_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!)
        if (request.user.username == 'anonymous'):
            auto_filter = []
            for item in JSONDeserializer().deserialize(term_filter):
               auto_filter.append(item) 
            
            # Poiscimo concept id in context za Published status
            AUTO_TERM_FILTER = get_auto_filter(request)
            
            auto_filter.append(AUTO_TERM_FILTER)
            term_filter = JSONSerializer().serialize(auto_filter)
            
    print 'term_filter'
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            print term
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)
    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Пример #19
0
class SiteFilter(BaseSearchFilter):

    def __init__(self, request=None):

        self.request = request
        self.existing_query = False
        self.doc_types = []
        self.paramount = Bool()

    def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional):

        ## set some class properties here, as this is the access method Arches uses
        ## to instantiate this object.
        self.doc_types = self.get_doc_types(self.request)

        original_dsl = search_results_object["query"]._dsl
        ## manual test to see if any criteria have been added to the query yet
        try:
            if original_dsl['query']['match_all'] == {}:
                self.existing_query = True
        except KeyError:
            pass

        if settings.LOG_LEVEL == "DEBUG":
            with open(os.path.join(settings.LOG_DIR, "dsl_before_fpan.json"), "w") as output:
                json.dump(original_dsl, output, indent=1)

        querystring_params = self.request.GET.get(details["componentname"], "")

        ## Should always be enabled. If not (like someone typed in a different URL) raise exception.
        if querystring_params != "enabled":
            raise(Exception("Site filter is registered but not shown as enabled."))

        try:

            ## collect all of the graph-specific rules for this user
            collected_rules = {}
            for graphid in self.doc_types:
                collected_rules[graphid] = self.get_rules(self.request.user, graphid)

            ## iterate rules and generate a composite query based on them
            for graphid, rule in collected_rules.items():

                if rule["access_level"] == "full_access":
                    self.add_full_access_clause(graphid)

                elif rule["access_level"] == "no_access":
                    self.add_no_access_clause(graphid)

                else:
                    # make called to access table and if under 1500 resources are
                    # allowed, then push all of these resource ids directly to
                    # the ES query. Otherwise, revert to using the actual
                    # query rules.

                    #allowed = UserXResourceInstanceAccess.objects.filter(
                    #    user=self.request.user,
                    #    resource__graph_id=graphid,
                    #)
                    # if len(allowed) < 1500:
                    #     # this should be altered to take the resids as an arg 
                    #     self.add_resourceid_filter_clause(graphid, self.request.user)
                    #else:
                        # if rule["access_level"] == "geo_filter":
                        #     self.add_geo_filter_clause(graphid, rule["filter_config"]["geometry"])
                        #     # self.create_geo_filter(graphid, rules["filter_config"]["geometry"])
                        #
                        # elif rule["access_level"] == "attribute_filter":
                        #     # self.create_attribute_filter(graphid, rule["filter_config"])
                        #     self.add_attribute_filter_clause(graphid, rule["filter_config"])
                        # else:
                            # raise(Exception("Invalid rules for filter."))

                    if hasattr(self.request.user, 'landmanager'):
                        self.add_resourceid_filter_clause(graphid, self.request.user)

                    else:
                        if rule["access_level"] == "geo_filter":
                            self.add_geo_filter_clause(graphid, rule["filter_config"]["geometry"])
                            # self.create_geo_filter(graphid, rules["filter_config"]["geometry"])

                        elif rule["access_level"] == "attribute_filter":
                            # self.create_attribute_filter(graphid, rule["filter_config"])
                            self.add_attribute_filter_clause(graphid, rule["filter_config"])

                        else:
                            raise(Exception("Invalid rules for filter."))

            search_results_object["query"].add_query(self.paramount)

            if settings.LOG_LEVEL == "DEBUG":
                with open(os.path.join(settings.LOG_DIR, "dsl_after_fpan.json"), "w") as output:
                    json.dump(search_results_object["query"]._dsl, output, indent=1)

        except Exception as e:
            print("\n\n")
            print(e)
            logger.debug(e)
            raise(e)

    def add_full_access_clause(self, graphid):
        self.paramount.should(Terms(field="graph_id", terms=graphid))

    def add_no_access_clause(self, graphid):
        self.paramount.must_not(Terms(field="graph_id", terms=graphid))

    def add_geo_filter_clause(self, graphid, geometry):

        nested = self.create_nested_geo_filter(geometry)
        if self.existing_query:
            self.paramount.should(nested)
        else:
            self.paramount.must(nested)

    def add_attribute_filter_clause(self, graphid, filter_config):

        nested = self.create_nested_attribute_filter(
            graphid,
            filter_config["nodegroup_id"],
            filter_config["value_list"],
        )
        if self.existing_query:
            self.paramount.should(nested)
        else:
            self.paramount.must(nested)


    def add_resourceid_filter_clause(self, graphid, user):

        allowed = UserXResourceInstanceAccess.objects.filter(
            user=user,
            resource__graph_id=graphid,
        )
        resids = [str(i.resource.resourceinstanceid) for i in allowed]

        new_resid_filter = Bool()
        new_resid_filter.should(Terms(field='resourceinstanceid', terms=resids))
        if self.existing_query:
            self.paramount.should(new_resid_filter)
        else:
            self.paramount.must(new_resid_filter)

    def get_rules(self, user, doc_id):

        full_access = {"access_level": "full_access"}
        no_access = {"access_level": "no_access"}
        attribute_filter = {
            "access_level": "attribute_filter",
            "filter_config": {
                "node_name": "",
                "value": ""
            }
        }
        geo_filter = {
            "access_level": "geo_filter",
            "filter_config": {
                "geometry": None
            }
        }

        settings_perms = settings.RESOURCE_MODEL_USER_RESTRICTIONS

        # assume full access if there is no condition in the settings.
        # also, don't apply any extra filters to superuser admins.
        if not doc_id in settings_perms or user.is_superuser:
            return full_access

        ## standard, basic check to apply restrictions to public users
        if user_is_anonymous(user):
            rules = copy.deepcopy(settings_perms[doc_id]['default'])

        else:
            rules = full_access

        ## alternative, FPAN-specific scenarios for Archaeological Sites
        if doc_id == "f212980f-d534-11e7-8ca8-94659cf754d0":
            if user_is_scout(user):
                rules = copy.deepcopy(settings_perms[doc_id]['default'])

            # special handling of the state land manager permissions here
            if user_is_land_manager(user):
                ## TEMPORARY extra check to see if this is a 1.0 or 2.0 land manager
                ## In the case of 2.0, don't use any settings. perms, handle it all
                ## in here.
                if hasattr(user, "landmanager"):
                    if user.landmanager.full_access is True:
                        rules = full_access

                    elif user.landmanager.apply_area_filter is True:
                        multipolygon = user.landmanager.areas_as_multipolygon
                        geo_filter["filter_config"]["geometry"] = multipolygon
                        rules = geo_filter

                    else:
                        rules = no_access
                else:
                    rules = self.get_state_node_match(user)

        ## do a little bit of processing on attribute filters to standardize
        ## their configs 1) change node name to nodegroupids 2) handle <username>
        ## directive 3) set all values to lists for later iteration.
        if rules["access_level"] == "attribute_filter":

            node_name = rules["filter_config"]["node_name"]
            node = Node.objects.filter(
                graph_id=doc_id,
                name=node_name,
            )
            if len(node) == 1:
                ngid = str(node[0].nodegroup_id)
            else:
                logger.warning(f"Error finding node '{node_name}' in {doc_id}. Check rules-filter settings.")
                return no_access

            rules["filter_config"]["nodegroup_id"] = ngid

            if rules["filter_config"]["value"] == "<username>":
                if user.username == "anonymous":
                    rules["filter_config"]["value"] = ["anonymous"]
                else:
                    rules["filter_config"]["value"] = [user.username, "anonymous"]

            if isinstance(rules["filter_config"]["value"], list):
                rules["filter_config"]["value_list"] = rules["filter_config"]["value"]
            else:
                rules["filter_config"]["value_list"] = [rules["filter_config"]["value"]]

        return rules


    def get_state_node_match(self, user):
        """ this method still determines Land Manager access levels. but
        eventually it will be refactored to use the new LandManager model"""

        from fpan.models import ManagedArea

        full_access = {"access_level": "full_access"}
        no_access = {"access_level": "no_access"}
        attribute_filter = {
            "access_level": "attribute_filter",
            "filter_config": {
                "node_name": "",
                "value": ""
            }
        }

        # The FL_BAR user gets full access to all sites
        if user.groups.filter(name="FL_BAR").exists():
            return full_access

        # The FMSF user gets full access to all sites
        if user.groups.filter(name="FMSF").exists():
            return full_access

        elif user.groups.filter(name="StatePark").exists():

            # for the SPAdmin account, allow access to all sites in the state parks category
            if user.username == "SPAdmin":
                attribute_filter["filter_config"]["node_name"] = "Managed Area Category"
                attribute_filter["filter_config"]["value"] = "State Parks"
                return attribute_filter

            # for district users, return a list of all the park names in their district
            elif user.username.startswith("SPDistrict"):
                try:
                    dist_num = int(user.username[-1])
                except:
                    rules["access_level"] = "no_access"
                    return no_access

                parks = ManagedArea.objects.filter(sp_district=dist_num,
                    agency="FL Dept. of Environmental Protection, Div. of Recreation and Parks")

                attribute_filter["filter_config"]["node_name"] = "Managed Area Name"
                attribute_filter["filter_config"]["value"] = [p.name for p in parks]
                return attribute_filter

            # finally, normal state park users are only allowed to see those that match their username
            else:
                try:
                    park = ManagedArea.objects.get(nickname=user.username)
                except ManagedArea.DoesNotExist:
                    return no_access

                attribute_filter["filter_config"]["node_name"] = "Managed Area Name"
                attribute_filter["filter_config"]["value"] = park.name
                return attribute_filter

        # handle state forest access
        elif user.groups.filter(name="FL_Forestry").exists():

            # for the SFAdmin account, allow access to all sites in the state parks category
            if user.username == "SFAdmin":

                attribute_filter["filter_config"]["node_name"] = "Managed Area Category"
                attribute_filter["filter_config"]["value"] = "State Forest"
                return attribute_filter

            else:
                try:
                    forest = ManagedArea.objects.get(nickname=user.username)
                except:
                    return no_access

                attribute_filter["filter_config"]["node_name"] = "Managed Area Name"
                attribute_filter["filter_config"]["value"] = forest.name
                return attribute_filter

        elif user.groups.filter(name="FWC").exists():

            try:
                fwc = ManagedArea.objects.get(nickname=user.username)
            except:
                return no_access

            attribute_filter["filter_config"]["node_name"] = "Managed Area Name"
            attribute_filter["filter_config"]["value"] = fwc.name
            return attribute_filter

        elif user.groups.filter(name="FL_AquaticPreserve").exists():

            attribute_filter["filter_config"]["node_name"] = "Managing Agency"
            attribute_filter["filter_config"]["value"] = "FL Dept. of Environmental Protection, Florida Coastal Office"
            return attribute_filter

        elif user.groups.filter(name="FL_WMD").exists():

            if user.username == "SJRWMD_Admin":

                attribute_filter["filter_config"]["node_name"] = "Managed Area Category"
                attribute_filter["filter_config"]["value"] = "Water Management District"
                return attribute_filter

            elif user.username == "SJRWMD_NorthRegion":
                districts = ["North","North Central","West"]
                ma = ManagedArea.objects.filter(wmd_district__in=districts)

            elif user.username == "SJRWMD_SouthRegion":
                districts = ["South", "South Central", "Southwest"]
                ma = ManagedArea.objects.filter(wmd_district__in=districts)

            else:
                districts = ["North", "North Central", "West", "South", "Southwest", "South Central"]
                for district in districts:
                    if user.username.startswith("SJRWMD") and district.replace(" ","") in user.username:
                        ma = ManagedArea.objects.filter(wmd_district=district)

            attribute_filter["filter_config"]["node_name"] = "Managed Area Name"
            attribute_filter["filter_config"]["value"] = [m.name for m in ma]
            return attribute_filter

        else:
            return no_access

    def get_doc_types(self, request):
        """ This is a more robust version of the analogous method in core Arches.
        It was moved here a long time ago, it's possible that current core Arches
        has been updated and this method could be removed to use that one."""

        all_resource_graphids = (
            GraphModel.objects.filter(isresource=True, isactive=True)
            .exclude(name="Arches System Settings")
        ).values_list('graphid', flat=True)

        type_filter = request.GET.get('typeFilter', '')
        use_ids = []

        if type_filter != '':
            type_filters = JSONDeserializer().deserialize(type_filter)

            ## add all positive filters to the list of good ids
            pos_filters = [i['graphid'] for i in type_filters if not i['inverted']]
            for pf in pos_filters:
                use_ids.append(pf)

            ## if there are negative filters, make a list of all possible ids and
            ## subtract the negative filter ids from it.
            neg_filters = [i['graphid'] for i in type_filters if i['inverted']]
            if len(neg_filters) > 0:
                use_ids = [str(i) for i in all_resource_graphids if not str(i) in neg_filters]

        else:
            use_ids = [str(i) for i in all_resource_graphids]

        if len(use_ids) == 0:
            ret = []
        else:
            ret = list(set(use_ids))
        return ret

    def create_nested_attribute_filter(self, doc_id, nodegroup_id, value_list):

        new_string_filter = Bool()
        new_string_filter.filter(Terms(field='strings.nodegroup_id', terms=[nodegroup_id]))
        for value in value_list:
            new_string_filter.should(Match(field='strings.string', query=value, type='phrase'))
        nested = Nested(path='strings', query=new_string_filter)
        return nested

    def create_nested_geo_filter(self, geometry):

        ## process GEOS geometry object into geojson and create ES filter
        geojson_geom = JSONDeserializer().deserialize(geometry.geojson)
        geoshape = GeoShape(
            field="geometries.geom.features.geometry",
            type=geojson_geom["type"],
            coordinates=geojson_geom["coordinates"]
        )

        new_spatial_filter = Bool()
        new_spatial_filter.filter(geoshape)
        nested = Nested(path='geometries', query=new_spatial_filter)
        return nested

    def quick_query(self, rules, doc):

        if rules["access_level"] == "attribute_filter":
            self.add_attribute_filter_clause(doc, rules["filter_config"])

        elif rules["access_level"] == "geo_filter":
            self.add_geo_filter_clause(doc, rules["filter_config"]["geometry"])

        se = SearchEngineFactory().create()
        query = Query(se, start=0, limit=10000)
        query.include('graph_id')
        query.include('resourceinstanceid')
        query.add_query(self.paramount)

        ## doc_type is deprecated, must use a filter for graphid instead (i think)
        results = query.search(index='resources', doc_type=doc)

        return results

    def get_resource_access_from_es_query(self, user, graphid, invert=False):
        """
        Returns the resourceinstanceids for all resources that a user is allowed to
        access from a given graph. Set invert=True to return
        ids that the user is NOT allowed to access.
        """

        response = {
            "access_level": "partial_access",
            "id_list": []
        }

        rules = self.get_rules(user, graphid)

        if rules["access_level"] == "full_access":
            response["access_level"] = "full_access"
            return response

        if rules["access_level"] == "no_access":
            response["access_level"] = "no_access"
            return response

        results = self.quick_query(rules, graphid)

        resourceids = list(set([i['_source']['resourceinstanceid'] for i in results['hits']['hits']]))

        if invert is True:
            inverted_res = ResourceInstance.objects.filter(graph_id=graphid).exclude(resourceinstanceid__in=resourceids)
            resourceids = [str(i.resourceinstanceid) for i in inverted_res]

        response["id_list"] = resourceids

        return response
Пример #20
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        for term in JSONDeserializer().deserialize(querysting_params):
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional is False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    search_results_object['query'].min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

        search_results_object['query'].add_query(search_query)
Пример #21
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    query.add_aggregation(GeoHashGridAgg(field='points', name='grid', precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()


    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings', query=term['value'], type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid', terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(Match(field='strings', query=term['value'], type='phrase_prefix'))
                string_filter.should(Match(field='strings.folded', query=term['value'], type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(_buffer(feature_geom,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float()-1970)*31556952*1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float()-1970)*31556952*1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass


        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                field='tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))")
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))")

            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                date_range_query = Nested(path='tiles', query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                range = Range(field='tiles.data.%s' % (temporal_filter['dateNodeId']), gte=start_date, lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates', gte=start_date, lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret =  [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)


        search_query.must(temporal_query)

    query.add_query(search_query)
    return query
Пример #22
0
def nested_query_from_pathed_values(pathed_values, stem):
    """
    Given an array of pathed values to query terms from the root, return a nested query
    pathed_values: e.g. 
    [
        {
            val: '29430-4955-...'
            path: [a, b, c]
        }
    ]
    stem: the path into the index for the nested terms. This will be of the form 'nested_entity.child_entities.child_entities'
    """
    
    # f( [[A,B,C], [A,B,D] )
    # = Nested( AND( f( [[B,C],[B,D]] ))
    # = Nested( AND( Nested( AND( f([[C],[D]]) ))  ))
    # = Nested( AND( Nested( AND( valueC, valueD))))
    
    # f( [[A,B,C], [A,B,D], [A,B,D] )
    # = Nested( AND( f([[B,C],[B,D],[B,D]] ))
    # = Nested( AND( Nested( AND( f([[C],[D],[D]]) ))  ))
    # = Nested( AND( Nested( AND( valueB, valueD))))
    
    # group paths by their head of each paths list is the same, make a single nested query and recurse on the tails
    
    branch_groups = {}      # those groups with a continuing tail, where we will recursively build a nested query
    leaf_groups = []        # those groups without a continuing tail, where we will use an ordinary term query
    
    
    # build the groups
    for v in pathed_values:
        path = v['path']
        if len(path) == 1:
            # this goes in its own group
            leaf_groups.append(v)
        else:
            # see if there is already a group using this head
            head = v['path'][0]['entitytyperange']
            if head not in branch_groups:
                branch_groups[head] = []
            branch_groups[head].append(v)

    # We should now have a set of groups
    # create the bool query
    bool_term = Bool()
        
    # add terms for any leaf groups
    for leaf_group in leaf_groups:
        
        if leaf_group['term']['type'] == 'concept':
            if leaf_group['term']['inverted']:
                terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value'])
                n_terms = Nested(path=stem, query=terms)
                bool_term.must_not(n_terms)
            else:
                terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value'])
                n_terms = Nested(path=stem, query=terms)
                bool_term.must(n_terms)
                
        elif leaf_group['term']['type'] == 'term':
            
                # boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                # boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
            
            entitytype = models.EntityTypes.objects.get(conceptid_id=leaf_group['term']['context'])
            sub_bool = Bool()
            
            if leaf_group['term']['inverted']:
                sub_bool.must_not(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk]))
                sub_bool.must_not(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase'))
            else:
                sub_bool.must(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk]))
                sub_bool.must(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase'))
                
            nsub_bool = Nested(path=stem, query=sub_bool)
            bool_term.must(nsub_bool)
    
        elif leaf_group['term']['type'] == 'string':
            boolfilter_folded = Bool()
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO'))
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO'))
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], fuzziness='AUTO'))
            nested = Nested(path=stem+'.flat_child_entities', query=boolfilter_folded)
            if leaf_group['term']['inverted']:
                bool_term.must_not(nested)
            else:    
                bool_term.must(nested)
    
        elif leaf_group['term']['type'] == 'date':
            if leaf_group['term']['date_operator'] == '1': # equals query
                daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['date_value'], lte=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '0': # greater than query 
                daterange = Range(field=stem+'.flat_child_entities.date', lt=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '2': # less than query
                daterange = Range(field=stem+'.flat_child_entities.date', gt=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '3': # greater than and less than query
                daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['start_date'], lte=leaf_group['term']['end_date'])
            
            nested_date = Nested(path=stem+'.flat_child_entities', query=daterange)
            if leaf_group['term']['inverted']:
                bool_term.must_not(nested_date)
            else:
                bool_term.must(nested_date)
    
    # add terms for any branch groups
    for key in branch_groups:
        # add a nested term for each group
        branch_group = branch_groups[key]
        
        #remove head from each path and recurse
        for value in branch_group:
            value['path'] = value['path'][1:]
        sub_query = nested_query_from_pathed_values(branch_group, stem+'.child_entities')
        
        nsub_query = Nested(path=stem, query=sub_query)
        
        bool_term.must(nsub_query)
    
    return bool_term;
Пример #23
0
def build_search_results_dsl(request):
#    Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. 
#    Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion.

    sorting = {
		"child_entities.label":  {
			"order" : "asc",
			"nested_path": "child_entities",
			"nested_filter": {
				"term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"}
			}
		}
	}
    
    term_filter = request.GET.get('termFilter', '')
    
    
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))
    boolean_search = request.GET.get('booleanSearch', '')
    filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', ''))
    filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', ''))
    
    filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', ''))
    #Ignore first entry as it is a dummy
    filter_combine_flags = filter_combine_flags[1:]
    # filter_combine_flags = [False, True, False, False, False]
    
    # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', ''))
    # Not here yet, so put in some bogus data
    # filter_groups = [
    #     'NAME.E41',
    #     'NAME.E41',
    #     'DISTURBANCE_STATE.E3',
    #     'THREAT_STATE.E3'
    # ]
    
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    is_empty_temporal_filter = True

    # store each search term in an initially. These will be combined based on the global and/or and the optional groupings
    terms_queries = [];

    # logging.warning("-------QUERY-------")

    if term_filter != '' or not is_empty_temporal_filter:
        for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)):
            selectbox_boolfilter = Bool()
            
            groupid = filter_grouping[index]
            if not groupid == 'No group':
                # build a nested query against the nested_entities
                
                # trace the path from each term to the group root
                term_paths = []
                for term in select_box:

                    # trace path from group root to this term
                    if term['type'] == 'concept':
                        
                        # get the parent concept for this value i.e. the field
                        term_parent_concept = Concept.get_parent_concept(term['value'])
                        
                        # get the steps from the root to that concept
                        if term_parent_concept.nodetype.nodetype == "Collection":
                            term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid)
                        elif term_parent_concept.nodetype.nodetype == 'Concept':
                            # need to get at the parent until we reach the root collection. concepts are arranged hierarchically
                            parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member')
                            grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom)
                            term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid)
                        
                        #this path begins at the root, and ends up at the node in question
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                    elif term['type'] == 'term':

                        concept = models.Concepts.objects.get(conceptid=term['context'])
                        term_schema = Entity.get_mapping_schema_to(concept.legacyoid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })

                    elif term['type'] == 'string':
                        term_schema = Entity.get_mapping_schema_to(groupid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()

                    if 'inverted' not in temporal_filter[index]:
                        inverted_temporal_filter = False
                    else:
                        if temporal_filter[index]['inverted']:
                            inverted_temporal_filter = True
                        else:
                            inverted_temporal_filter = False
                    
                    term_paths.append({
                        'term': {
                            'date_operator': '3',
                            'start_date': start_date,
                            'end_date': end_date,
                            'type': 'date',
                            'inverted': inverted_temporal_filter
                        },
                        'path': term_path
                    })
                    
                    
                if 'filters' in temporal_filter[index]:
                    term_schema = Entity.get_mapping_schema_to(groupid)
                    term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']

                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']
                
                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()
                        if 'inverted' not in temporal_filter[index]:
                            inverted_temporal_filter = False
                        else:
                            if temporal_filter[index]['inverted']:
                                inverted_temporal_filter = True
                            else:
                                inverted_temporal_filter = False
                                
                        term_paths.append({
                            'term': {
                                'date_operator': date_operator,
                                'date_value': date_value,
                                'type': 'date',
                                'inverted': inverted_temporal_filter
                            },
                            'path': term_path
                        })

                # combine the traced path to build a nested query                
                group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities')

                
                # add nested query to overall query
                selectbox_boolfilter.must(group_query)
                
                # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2))

            else:    
                for term in select_box:
                    
                    if term['type'] == 'term':
                        entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                        boolfilter_nested = Bool()
                        boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                        boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                        nested = Nested(path='child_entities', query=boolfilter_nested)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:    
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'concept':
                        concept_ids = _get_child_concepts(term['value'])
                        terms = Terms(field='domains.conceptid', terms=concept_ids)
                        nested = Nested(path='domains', query=terms)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                    selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'string':
                        boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths
                        boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed
                        boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed
                        boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested = Nested(path='child_entities', query=boolfilter_folded)
                        boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested2 = Nested(path='domains', query=boolfilter_folded2)
                        boolquery2.should(nested)
                        boolquery2.should(nested2)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                # use boolfilter here instead of boolquery because boolquery
                                # can't be combined with other boolfilters using boolean OR
                                selectbox_boolfilter.should(boolquery2)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(boolquery2)
                            else:    
                                selectbox_boolfilter.must(boolquery2)
                            
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()
                    range = Range(field='dates.value', gte=start_date, lte=end_date)
                    nested = Nested(path='dates', query=range)
            
                    if 'inverted' not in temporal_filter[index]:
                        temporal_filter[index]['inverted'] = False

                    if temporal_filter[index]['inverted']:
                        selectbox_boolfilter.must_not(nested)
                    else:
                        selectbox_boolfilter.must(nested)
                        
                if 'filters' in temporal_filter[index]:
                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']


                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()

                        if date_operator == '1': # equals query
                            range = Range(field='dates.value', gte=date_value, lte=date_value)
                        elif date_operator == '0': # greater than query 
                            range = Range(field='dates.value', lt=date_value)
                        elif date_operator == '2': # less than query
                            range = Range(field='dates.value', gt=date_value)
                        
                        nested = Nested(path='dates', query=range)
                        if 'inverted' not in temporal_filter[index]:
                            temporal_filter[index]['inverted'] = False

                        if temporal_filter[index]['inverted']:
                            selectbox_boolfilter.must_not(nested)
                        else:
                            selectbox_boolfilter.must(nested)


            terms_queries.append(selectbox_boolfilter)
            # if not selectbox_boolfilter.empty:
            #     if boolean_search == 'or':
            #         boolfilter.should(selectbox_boolfilter)
            #     else:
            #         boolfilter.must(selectbox_boolfilter)
        
        # We now have individual query terms for each of the search components. Combine into one group now
        # Start by building a an array of groups which will be combined according to the global And/Or
        # Queries within one of these groups will be combined by the complement of the global And/Or
        # We may end up with [ [A,B], [C], [D,E] ], which would translate to either:
        #    (A || B) && C && (D || E)
        #       or
        #    (A && B) || C || (D && E)
        # for global AND or OR respectively
        
        # logging.warning("TERMS QUERIES %s", terms_queries)
        
        bool_components = [];
        
        for i, term_query in enumerate(terms_queries):
            if i is 0:
                bool_components.append([term_query])
            else:
                should_group_with_previous = filter_combine_flags[i-1]
                if should_group_with_previous:
                    bool_components[-1].append(term_query)
                else:
                    bool_components.append([term_query])
            
        # logging.warning("BOOL COMPONENTS %s", bool_components)
        # Now build the ES queries
        for bool_component in bool_components:
            if len(bool_component) is 1:
                # just combine this on its own
                q = bool_component[0]
            else:
                q = Bool()
                for sub_component in bool_component:
                    if boolean_search == 'or':
                        #apply the OPPOSITE of the global boolean operator
                        q.must(sub_component)
                    else:
                        q.should(sub_component)
                        
            # combine to the overall query according to the global boolean operator
            if boolean_search == 'or':
                boolfilter.should(q)
            else:
                boolfilter.must(q)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
    
#  Sorting criterion added to query (AZ 10/08/16)
    query.dsl.update({'sort': sorting})
    # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query)

    return query
Пример #24
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")
        for term in JSONDeserializer().deserialize(querysting_params):
            if term["type"] == "term" or term["type"] == "string":
                string_filter = Bool()
                if term["type"] == "term":
                    string_filter.must(
                        Match(field="strings.string",
                              query=term["value"],
                              type="phrase"))
                elif term["type"] == "string":
                    string_filter.should(
                        Match(field="strings.string",
                              query=term["value"],
                              type="phrase_prefix"))
                    string_filter.should(
                        Match(field="strings.string.folded",
                              query=term["value"],
                              type="phrase_prefix"))

                if include_provisional is False:
                    string_filter.must_not(
                        Match(field="strings.provisional",
                              query="true",
                              type="phrase"))
                elif include_provisional == "only provisional":
                    string_filter.must_not(
                        Match(field="strings.provisional",
                              query="false",
                              type="phrase"))

                string_filter.filter(
                    Terms(field="strings.nodegroup_id",
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path="strings",
                                              query=string_filter)
                if term["inverted"]:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    search_results_object["query"].min_score("0.01")
            elif term["type"] == "concept":
                concept_ids = _get_child_concepts(term["value"])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field="domains.conceptid", terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field="domains.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    conceptid_filter.must_not(
                        Match(field="domains.provisional",
                              query="true",
                              type="phrase"))
                elif include_provisional == "only provisional":
                    conceptid_filter.must_not(
                        Match(field="domains.provisional",
                              query="false",
                              type="phrase"))

                nested_conceptid_filter = Nested(path="domains",
                                                 query=conceptid_filter)
                if term["inverted"]:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

        search_results_object["query"].add_query(search_query)
Пример #25
0
def nested_query_from_pathed_values(pathed_values, stem):
    """
    Given an array of pathed values to query terms from the root, return a nested query
    pathed_values: e.g. 
    [
        {
            val: '29430-4955-...'
            path: [a, b, c]
        }
    ]
    stem: the path into the index for the nested terms. This will be of the form 'nested_entity.child_entities.child_entities'
    """
    
    # f( [[A,B,C], [A,B,D] )
    # = Nested( AND( f( [[B,C],[B,D]] ))
    # = Nested( AND( Nested( AND( f([[C],[D]]) ))  ))
    # = Nested( AND( Nested( AND( valueC, valueD))))
    
    # f( [[A,B,C], [A,B,D], [A,B,D] )
    # = Nested( AND( f([[B,C],[B,D],[B,D]] ))
    # = Nested( AND( Nested( AND( f([[C],[D],[D]]) ))  ))
    # = Nested( AND( Nested( AND( valueB, valueD))))
    
    # group paths by their head of each paths list is the same, make a single nested query and recurse on the tails
    
    branch_groups = {}      # those groups with a continuing tail, where we will recursively build a nested query
    leaf_groups = []        # those groups without a continuing tail, where we will use an ordinary term query
    
    
    # build the groups
    for v in pathed_values:
        path = v['path']
        if len(path) == 1:
            # this goes in its own group
            leaf_groups.append(v)
        else:
            # see if there is already a group using this head
            head = v['path'][0]['entitytyperange']
            if head not in branch_groups:
                branch_groups[head] = []
            branch_groups[head].append(v)

    # We should now have a set of groups
    # create the bool query
    bool_term = Bool()
        
    # add terms for any leaf groups
    for leaf_group in leaf_groups:
        
        if leaf_group['term']['type'] == 'concept':
            if leaf_group['term']['inverted']:
                terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value'])
                n_terms = Nested(path=stem, query=terms)
                bool_term.must_not(n_terms)
            else:
                terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value'])
                n_terms = Nested(path=stem, query=terms)
                bool_term.must(n_terms)
                
        elif leaf_group['term']['type'] == 'term':
            
                # boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                # boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
            
            entitytype = models.EntityTypes.objects.get(conceptid_id=leaf_group['term']['context'])
            sub_bool = Bool()
            
            if leaf_group['term']['inverted']:
                sub_bool.must_not(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk]))
                sub_bool.must_not(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase'))
            else:
                sub_bool.must(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk]))
                sub_bool.must(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase'))
                
            nsub_bool = Nested(path=stem, query=sub_bool)
            bool_term.must(nsub_bool)
    
        elif leaf_group['term']['type'] == 'string':
            boolfilter_folded = Bool()
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO'))
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO'))
            boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], fuzziness='AUTO'))
            nested = Nested(path=stem+'.flat_child_entities', query=boolfilter_folded)
            if leaf_group['term']['inverted']:
                bool_term.must_not(nested)
            else:    
                bool_term.must(nested)
    
        elif leaf_group['term']['type'] == 'date':
            if leaf_group['term']['date_operator'] == '1': # equals query
                daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['date_value'], lte=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '0': # greater than query 
                daterange = Range(field=stem+'.flat_child_entities.date', lt=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '2': # less than query
                daterange = Range(field=stem+'.flat_child_entities.date', gt=leaf_group['term']['date_value'])
            elif leaf_group['term']['date_operator'] == '3': # greater than and less than query
                daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['start_date'], lte=leaf_group['term']['end_date'])
            
            nested_date = Nested(path=stem+'.flat_child_entities', query=daterange)
            if leaf_group['term']['inverted']:
                bool_term.must_not(nested_date)
            else:
                bool_term.must(nested_date)
    
    # add terms for any branch groups
    for key in branch_groups:
        # add a nested term for each group
        branch_group = branch_groups[key]
        
        #remove head from each path and recurse
        for value in branch_group:
            value['path'] = value['path'][1:]
        sub_query = nested_query_from_pathed_values(branch_group, stem+'.child_entities')
        
        nsub_query = Nested(path=stem, query=sub_query)
        
        bool_term.must(nsub_query)
    
    return bool_term;
Пример #26
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Пример #27
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start = temporal_filter['year_min_max'][0]*10000
        end = temporal_filter['year_min_max'][1]*10000
        range = Range(field='extendeddates.value', gte=start, lte=end)
        nested = Nested(path='extendeddates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'filters' in temporal_filter:
        time_boolfilter = Bool()
        for temporal_filter in temporal_filter['filters']:
            date_type = ''
            date = ''
            date_operator = ''
            for node in temporal_filter['nodes']:
                if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                    date_operator = node['value']
                elif node['entitytypeid'] == 'date':
                    date = node['value']
                else:
                    date_type = node['value']

            terms = Terms(field='extendeddategroups.conceptid', terms=date_type)
            boolfilter.must(terms)

            date_value = date_to_int(date)

            if date_operator == '1': # equals query
                range = Range(field='extendeddategroups.value', gte=date_value, lte=date_value)
            elif date_operator == '0': # greater than query 
                range = Range(field='extendeddategroups.value', lt=date_value)
            elif date_operator == '2': # less than query
                range = Range(field='extendeddategroups.value', gt=date_value)

            if 'inverted' not in temporal_filter:
                temporal_filter['inverted'] = False

            if temporal_filter['inverted']:
                boolfilter.must_not(range)
            else:
                boolfilter.must(range)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
        
    return query
Пример #28
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    query.add_aggregation(
        GeoHashGridAgg(field='points',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings',
                                    query=term['value'],
                                    type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid',
                                         terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(
                    Match(field='strings',
                          query=term['value'],
                          type='phrase_prefix'))
                string_filter.should(
                    Match(field='strings.folded',
                          query=term['value'],
                          type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(
                _buffer(feature_geom, buffer['width'], buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float() - 1970) * 31556952 * 1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float() - 1970) * 31556952 * 1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass

        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                field = 'tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))"
                )
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))"
                )

            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                date_range_query = Nested(path='tiles',
                                          query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                range = Range(field='tiles.data.%s' %
                              (temporal_filter['dateNodeId']),
                              gte=start_date,
                              lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates',
                                         gte=start_date,
                                         lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret = [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)

        search_query.must(temporal_query)

    query.add_query(search_query)
    return query