Пример #1
0
def _filters_faceted_query(facets, authz=None):
    filters = {}
    indexed = {}
    for (idx, alias, group, field, value) in facets:
        indexed[idx] = indexed.get(idx, {})
        indexed[idx][alias] = field_filter_query(field, value)
        filters[idx] = filters.get(idx, {})
        filters[idx][group] = filters[idx].get(group, [])
        filters[idx][group].append(value)

    queries = []
    for (idx, facets) in indexed.items():
        shoulds = []
        for field, values in filters[idx].items():
            shoulds.append(field_filter_query(field, values))
        query = []
        if authz is not None:
            query.append(authz_query(authz))
        query = {
            'bool': {
                'should': shoulds,
                'filter': query,
                'minimum_should_match': 1
            }
        }
        queries.append({'index': idx})
        queries.append({
            'size': 0,
            'query': query,
            'aggs': {
                'counters': {
                    'filters': {
                        'filters': facets
                    }
                }
            }
        })

    results = {}
    if not len(queries):
        return results

    res = es.msearch(body=queries)
    for resp in res.get('responses', []):
        aggs = resp.get('aggregations', {}).get('counters', {})
        for alias, value in aggs.get('buckets', {}).items():
            results[alias] = value.get('doc_count', results.get(alias, 0))
    return results
Пример #2
0
 def get_filters(self):
     """Apply query filters from the user interface."""
     filters = []
     for field, values in self.parser.filters.items():
         if field not in self.parser.facet_names:
             filters.append(field_filter_query(field, values))
     return filters
Пример #3
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    proxy = model.get_proxy(entity)
    Thing = model.get(Entity.THING)
    types = [registry.name, registry.email, registry.identifier,
             registry.iban, registry.phone, registry.address]
    queries = []
    aliases = {}
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for type_ in types:
        if type_.group is None:
            continue
        for fidx, value in enumerate(proxy.get_type_values(type_)):
            if type_.specificity(value) < 0.1:
                continue
            schemata = model.get_type_schemata(type_)
            schemata = [s for s in schemata if s.is_a(Thing)]
            index = entities_read_index(schemata)
            alias = '%s_%s' % (type_.name, fidx)
            query = field_filter_query(type_.group, value)
            queries.append((index, alias, query))
            aliases[alias] = (type_.group, value)

    res = _filters_faceted_query(authz, queries)
    for alias, (field, value) in aliases.items():
        total = res.get(alias, 0)
        if total > 1:
            yield (field, value, total)
Пример #4
0
    def get_filters(self):
        """Apply query filters from the user interface."""
        filters = []
        if self.AUTHZ_FIELD is not None:
            # This enforces the authorization (access control) rules on
            # a particular query by comparing the collections a user is
            # authorized for with the one on the document.
            if self.parser.authz and not self.parser.authz.is_admin:
                authz = authz_query(self.parser.authz, field=self.AUTHZ_FIELD)
                filters.append(authz)

        range_filters = dict()
        for field, values in self.parser.filters.items():
            if field in self.SKIP_FILTERS:
                continue
            if field not in self.parser.facet_names:
                # Collect all range query filters for a field in a single query
                if field.startswith(("gt:", "gte:", "lt:", "lte:")):
                    op, field = field.split(":", 1)
                    if range_filters.get(field) is None:
                        range_filters[field] = {op: list(values)[0]}
                    else:
                        range_filters[field][op] = list(values)[0]
                    continue
                filters.append(field_filter_query(field, values))

        for field, ops in range_filters.items():
            filters.append(range_filter_query(field, ops))

        return filters
Пример #5
0
 def get_post_filters(self, exclude=None):
     """Apply post-aggregation query filters."""
     filters = []
     for field, values in self.parser.filters.items():
         if field in self.SKIP_FILTERS or field == exclude:
             continue
         if field in self.parser.facet_filters:
             filters.append(field_filter_query(field, values))
     return {'bool': {'filter': filters}}
Пример #6
0
 def __init__(self, query, node, prop=None, limit=0, count=False):
     self.graph = query.graph
     self.graph.add(node.proxy)
     self.node = node
     self.id = node.id
     self.limit = limit or 0
     self.count = count
     self.entities = []
     self.prop = prop
     if prop is not None:
         self.index = entities_read_index(prop.schema)
         field = 'properties.%s' % prop.name
         self.filter = field_filter_query(field, node.value)
         self.id = prop.qname
     else:
         schemata = model.get_type_schemata(self.node.type)
         self.index = entities_read_index(schemata)
         self.filter = field_filter_query(node.type.group, node.value)
Пример #7
0
 def get_post_filters(self, exclude=None):
     """Apply post-aggregation query filters."""
     filters = []
     for field, values in self.parser.filters.items():
         if field in self.SKIP_FILTERS or field == exclude:
             continue
         if field in self.parser.facet_filters:
             filters.append(field_filter_query(field, values))
     return {'bool': {'filter': filters}}
Пример #8
0
    def get_negative_filters(self):
        """Apply negative filters."""
        filters = []
        for field, _ in self.parser.empties.items():
            filters.append({"exists": {"field": field}})

        for field, values in self.parser.excludes.items():
            filters.append(field_filter_query(field, values))
        return filters
Пример #9
0
 def __init__(self, graph, authz=None, collection_ids=None):
     self.graph = graph
     self.authz = authz
     self.patterns = []
     self.filters = []
     if authz is not None:
         self.filters.append(authz_query(authz))
     if collection_ids is not None:
         filter_ = field_filter_query('collection_id', collection_ids)
         self.filters.append(filter_)
Пример #10
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    # NOTE: This must also work for documents.
    FIELDS = [
        'names',
        'emails',
        'phones',
        'addresses',
        'identifiers'
    ]
    pivots = []
    queries = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for field in FIELDS:
        for value in entity.get(field, []):
            if value is None:
                continue
            queries.append({})
            queries.append({
                'size': 0,
                'query': {
                    'bool': {
                        'filter': [
                            authz_query(authz),
                            field_filter_query(field, value)
                        ],
                        'must_not': [
                            {'ids': {'values': [entity.get('id')]}},
                        ]
                    }
                }
            })
            pivots.append((field, value))

    if not len(queries):
        return []

    res = es.msearch(index=entities_index(), body=queries)
    results = []
    for (field, value), resp in zip(pivots, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total > 0:
            qvalue = quote(value.encode('utf-8'))
            key = ('filter:%s' % field, qvalue)
            results.append({
                'id': query_string([key]),
                'value': value,
                'field': field,
                'count': total
            })

    results.sort(key=lambda p: p['count'], reverse=True)
    return results
Пример #11
0
def expand_proxies(proxies, authz, properties=None, limit=0):
    """Expand an entity's graph to find adjacent entities that are connected
    by a property (eg: Passport entity linked to a Person) or an Entity type
    edge (eg: Person connected to Company through Directorship).

    properties: list of FtM Properties to expand as edges.
    limit: max number of entities to return
    """
    graph = Graph(edge_types=(registry.entity,))
    for proxy in proxies:
        graph.add(proxy)

    queries = {}
    entity_ids = [proxy.id for proxy in proxies]
    # First, find all the entities pointing to the current one via a stub
    # property. This will return the intermediate edge entities in some
    # cases - then we'll use graph.resolve() to get the far end of the
    # edge.
    for prop in _expand_properties(proxies, properties):
        if not prop.stub:
            continue
        index = entities_read_index(prop.reverse.schema)
        field = "properties.%s" % prop.reverse.name
        queries[(index, prop.qname)] = field_filter_query(field, entity_ids)

    entities, counts = _counted_msearch(queries, authz, limit=limit)
    for entity in entities:
        graph.add(model.get_proxy(entity))

    if limit > 0:
        graph.resolve()

    results = []
    for prop in _expand_properties(proxies, properties):
        count = counts.get(prop.qname, 0)
        if not prop.stub:
            count = sum(len(p.get(prop)) for p in proxies)

        entities = set()
        for proxy in proxies:
            entities.update(_expand_adjacent(graph, proxy, prop))

        if count > 0:
            item = {
                "property": prop.name,
                "count": count,
                "entities": entities,
            }
            results.append(item)

    # pprint(results)
    return results
Пример #12
0
def _filters_faceted_query(authz, facets):
    filters = {}
    indexed = {}
    for (idx, alias, group, field, value) in facets:
        indexed[idx] = indexed.get(idx, {})
        indexed[idx][alias] = field_filter_query(field, value)
        filters[idx] = filters.get(idx, {})
        filters[idx][group] = filters[idx].get(group, [])
        filters[idx][group].append(value)

    queries = []
    for (idx, facets) in indexed.items():
        shoulds = []
        for field, values in filters[idx].items():
            shoulds.append(field_filter_query(field, values))
        query = {
            'bool': {
                'should': shoulds,
                'filter': [authz_query(authz)],
                'minimum_should_match': 1
            }
        }
        queries.append({'index': idx})
        queries.append({
            'size': 0,
            'query': query,
            'aggs': {'counters': {'filters': {'filters': facets}}}
        })

    results = {}
    if not len(queries):
        return results

    res = es.msearch(body=queries)
    for resp in res.get('responses', []):
        aggs = resp.get('aggregations', {}).get('counters', {})
        for alias, value in aggs.get('buckets', {}).items():
            results[alias] = value.get('doc_count', results.get(alias, 0))
    return results
Пример #13
0
    def get_filters(self):
        """Apply query filters from the user interface."""
        filters = []
        # This enforces the authorization (access control) rules on
        # a particular query by comparing the collections a user is
        # authorized for with the one on the document.
        if self.parser.authz and not self.parser.authz.is_admin:
            filters.append(authz_query(self.parser.authz))

        for field, values in self.parser.filters.items():
            if field in self.SKIP_FILTERS:
                continue
            if field not in self.parser.facet_names:
                filters.append(field_filter_query(field, values))
        return filters
Пример #14
0
    def get_filters(self):
        """Apply query filters from the user interface."""
        filters = []
        # This enforces the authorization (access control) rules on
        # a particular query by comparing the collections a user is
        # authorized for with the one on the document.
        if self.parser.authz and not self.parser.authz.is_admin:
            filters.append(authz_query(self.parser.authz))

        for field, values in self.parser.filters.items():
            if field in self.SKIP_FILTERS:
                continue
            if field not in self.parser.facet_names:
                filters.append(field_filter_query(field, values))
        return filters
Пример #15
0
def convert_filters(filters):
    ret = []
    id_values = []

    for field, values in filters.iteritems():
        # Combine id or _id into one filter
        if field in ['id', '_id']:
            id_values.extend(values)
        else:
            ret.append(field_filter_query(field, list(values)))

    if id_values:
        ret.append({'ids': {'values': id_values}})

    return ret
Пример #16
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    proxy = model.get_proxy(entity)
    Thing = model.get(Entity.THING)
    types = [registry.name, registry.email, registry.identifier,
             registry.iban, registry.phone, registry.address]
    pivots = []
    queries = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for type_ in types:
        if type_.group is None:
            continue
        for value in proxy.get_type_values(type_):
            if type_.specificity(value) < 0.1:
                continue
            schemata = model.get_type_schemata(type_)
            schemata = [s for s in schemata if s.is_a(Thing)]
            index = entities_read_index(schemata)
            queries.append({'index': index})
            queries.append({
                'size': 0,
                'query': {
                    'bool': {
                        'filter': [
                            authz_query(authz),
                            field_filter_query(type_.group, value)
                        ],
                        'must_not': [
                            {'ids': {'values': [entity.get('id')]}},
                        ]
                    }
                }
            })
            pivots.append((type_.group, value))

    if not len(queries):
        return

    res = es.msearch(body=queries)
    for (field, value), resp in zip(pivots, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total is not None and total > 0:
            yield (field, value, total)
Пример #17
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    # NOTE: This must also work for documents.
    FIELDS = [
        'names',
        'emails',
        'phones',
        'addresses',
        'identifiers'
    ]
    pivots = []
    queries = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for field in FIELDS:
        for value in entity.get(field, []):
            if value is None or not len(value):
                continue
            queries.append({})
            queries.append({
                'size': 0,
                'query': {
                    'bool': {
                        'filter': [
                            authz_query(authz),
                            field_filter_query(field, value)
                        ],
                        'must_not': [
                            {'ids': {'values': [entity.get('id')]}},
                        ]
                    }
                }
            })
            pivots.append((field, value))

    if not len(queries):
        return

    res = es.msearch(index=entities_read_index(), body=queries)
    for (field, value), resp in zip(pivots, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total is not None and total > 0:
            yield (field, value, total)
Пример #18
0
    def get_filters_list(self, skip):
        filters = []
        range_filters = dict()
        for field, values in self.parser.filters.items():
            if field in skip:
                continue
            # Collect all range query filters for a field in a single query
            if field.startswith(("gt:", "gte:", "lt:", "lte:")):
                op, field = field.split(":", 1)
                if range_filters.get(field) is None:
                    range_filters[field] = {op: list(values)[0]}
                else:
                    range_filters[field][op] = list(values)[0]
                continue
            filters.append(field_filter_query(field, values))

        for field, ops in range_filters.items():
            filters.append(range_filter_query(field, ops))

        return filters
Пример #19
0
def entity_tags(proxy, authz, prop_types=DEFAULT_TAGS):
    """For a given proxy, determine how many other mentions exist for each
    property value associated, if it is one of a set of types."""
    queries = {}
    lookup = {}
    values = set()
    for prop, value in proxy.itervalues():
        if prop.type not in prop_types:
            continue
        if prop.specificity(value) > 0.1:
            values.add((prop.type, value))

    type_names = [t.name for t in prop_types]
    log.debug("Tags[%s]: %s values", type_names, len(values))
    for (type_, value) in values:
        key = type_.node_id(value)
        lookup[key] = (type_, value)
        # Determine which indexes may contain further mentions (only things).
        schemata = model.get_type_schemata(type_)
        schemata = [s for s in schemata if s.is_a(Entity.THING)]
        index = entities_read_index(schemata)
        queries[(index, key)] = field_filter_query(type_.group, value)

    _, counts = _counted_msearch(queries, authz)
    results = []
    for key, count in counts.items():
        if count > 1:
            type_, value = lookup[key]
            result = {
                "id": key,
                "field": type_.group,
                "value": value,
                "count": count - 1,
            }
            results.append(result)

    results.sort(key=lambda p: p["count"], reverse=True)
    # pprint(results)
    return results
Пример #20
0
def convert_filters(filters):
    ret = []
    for field, values in filters.items():
        ret.append(field_filter_query(field, values))
    return ret
Пример #21
0
def convert_filters(filters):
    ret = []
    for field, values in filters.items():
        ret.append(field_filter_query(field, values))
    return ret