def _filters_faceted_query(facets, authz=None): filters = {} indexed = {} for (idx, alias, group, field, value) in facets: indexed[idx] = indexed.get(idx, {}) indexed[idx][alias] = field_filter_query(field, value) filters[idx] = filters.get(idx, {}) filters[idx][group] = filters[idx].get(group, []) filters[idx][group].append(value) queries = [] for (idx, facets) in indexed.items(): shoulds = [] for field, values in filters[idx].items(): shoulds.append(field_filter_query(field, values)) query = [] if authz is not None: query.append(authz_query(authz)) query = { 'bool': { 'should': shoulds, 'filter': query, 'minimum_should_match': 1 } } queries.append({'index': idx}) queries.append({ 'size': 0, 'query': query, 'aggs': { 'counters': { 'filters': { 'filters': facets } } } }) results = {} if not len(queries): return results res = es.msearch(body=queries) for resp in res.get('responses', []): aggs = resp.get('aggregations', {}).get('counters', {}) for alias, value in aggs.get('buckets', {}).items(): results[alias] = value.get('doc_count', results.get(alias, 0)) return results
def get_filters(self): """Apply query filters from the user interface.""" filters = [] for field, values in self.parser.filters.items(): if field not in self.parser.facet_names: filters.append(field_filter_query(field, values)) return filters
def entity_tags(entity, authz): """Do a search on tags of an entity.""" proxy = model.get_proxy(entity) Thing = model.get(Entity.THING) types = [registry.name, registry.email, registry.identifier, registry.iban, registry.phone, registry.address] queries = [] aliases = {} # Go through all the tags which apply to this entity, and find how # often they've been mentioned in other entities. for type_ in types: if type_.group is None: continue for fidx, value in enumerate(proxy.get_type_values(type_)): if type_.specificity(value) < 0.1: continue schemata = model.get_type_schemata(type_) schemata = [s for s in schemata if s.is_a(Thing)] index = entities_read_index(schemata) alias = '%s_%s' % (type_.name, fidx) query = field_filter_query(type_.group, value) queries.append((index, alias, query)) aliases[alias] = (type_.group, value) res = _filters_faceted_query(authz, queries) for alias, (field, value) in aliases.items(): total = res.get(alias, 0) if total > 1: yield (field, value, total)
def get_filters(self): """Apply query filters from the user interface.""" filters = [] if self.AUTHZ_FIELD is not None: # This enforces the authorization (access control) rules on # a particular query by comparing the collections a user is # authorized for with the one on the document. if self.parser.authz and not self.parser.authz.is_admin: authz = authz_query(self.parser.authz, field=self.AUTHZ_FIELD) filters.append(authz) range_filters = dict() for field, values in self.parser.filters.items(): if field in self.SKIP_FILTERS: continue if field not in self.parser.facet_names: # Collect all range query filters for a field in a single query if field.startswith(("gt:", "gte:", "lt:", "lte:")): op, field = field.split(":", 1) if range_filters.get(field) is None: range_filters[field] = {op: list(values)[0]} else: range_filters[field][op] = list(values)[0] continue filters.append(field_filter_query(field, values)) for field, ops in range_filters.items(): filters.append(range_filter_query(field, ops)) return filters
def get_post_filters(self, exclude=None): """Apply post-aggregation query filters.""" filters = [] for field, values in self.parser.filters.items(): if field in self.SKIP_FILTERS or field == exclude: continue if field in self.parser.facet_filters: filters.append(field_filter_query(field, values)) return {'bool': {'filter': filters}}
def __init__(self, query, node, prop=None, limit=0, count=False): self.graph = query.graph self.graph.add(node.proxy) self.node = node self.id = node.id self.limit = limit or 0 self.count = count self.entities = [] self.prop = prop if prop is not None: self.index = entities_read_index(prop.schema) field = 'properties.%s' % prop.name self.filter = field_filter_query(field, node.value) self.id = prop.qname else: schemata = model.get_type_schemata(self.node.type) self.index = entities_read_index(schemata) self.filter = field_filter_query(node.type.group, node.value)
def get_post_filters(self, exclude=None): """Apply post-aggregation query filters.""" filters = [] for field, values in self.parser.filters.items(): if field in self.SKIP_FILTERS or field == exclude: continue if field in self.parser.facet_filters: filters.append(field_filter_query(field, values)) return {'bool': {'filter': filters}}
def get_negative_filters(self): """Apply negative filters.""" filters = [] for field, _ in self.parser.empties.items(): filters.append({"exists": {"field": field}}) for field, values in self.parser.excludes.items(): filters.append(field_filter_query(field, values)) return filters
def __init__(self, graph, authz=None, collection_ids=None): self.graph = graph self.authz = authz self.patterns = [] self.filters = [] if authz is not None: self.filters.append(authz_query(authz)) if collection_ids is not None: filter_ = field_filter_query('collection_id', collection_ids) self.filters.append(filter_)
def entity_tags(entity, authz): """Do a search on tags of an entity.""" # NOTE: This must also work for documents. FIELDS = [ 'names', 'emails', 'phones', 'addresses', 'identifiers' ] pivots = [] queries = [] # Go through all the tags which apply to this entity, and find how # often they've been mentioned in other entities. for field in FIELDS: for value in entity.get(field, []): if value is None: continue queries.append({}) queries.append({ 'size': 0, 'query': { 'bool': { 'filter': [ authz_query(authz), field_filter_query(field, value) ], 'must_not': [ {'ids': {'values': [entity.get('id')]}}, ] } } }) pivots.append((field, value)) if not len(queries): return [] res = es.msearch(index=entities_index(), body=queries) results = [] for (field, value), resp in zip(pivots, res.get('responses', [])): total = resp.get('hits', {}).get('total') if total > 0: qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total }) results.sort(key=lambda p: p['count'], reverse=True) return results
def expand_proxies(proxies, authz, properties=None, limit=0): """Expand an entity's graph to find adjacent entities that are connected by a property (eg: Passport entity linked to a Person) or an Entity type edge (eg: Person connected to Company through Directorship). properties: list of FtM Properties to expand as edges. limit: max number of entities to return """ graph = Graph(edge_types=(registry.entity,)) for proxy in proxies: graph.add(proxy) queries = {} entity_ids = [proxy.id for proxy in proxies] # First, find all the entities pointing to the current one via a stub # property. This will return the intermediate edge entities in some # cases - then we'll use graph.resolve() to get the far end of the # edge. for prop in _expand_properties(proxies, properties): if not prop.stub: continue index = entities_read_index(prop.reverse.schema) field = "properties.%s" % prop.reverse.name queries[(index, prop.qname)] = field_filter_query(field, entity_ids) entities, counts = _counted_msearch(queries, authz, limit=limit) for entity in entities: graph.add(model.get_proxy(entity)) if limit > 0: graph.resolve() results = [] for prop in _expand_properties(proxies, properties): count = counts.get(prop.qname, 0) if not prop.stub: count = sum(len(p.get(prop)) for p in proxies) entities = set() for proxy in proxies: entities.update(_expand_adjacent(graph, proxy, prop)) if count > 0: item = { "property": prop.name, "count": count, "entities": entities, } results.append(item) # pprint(results) return results
def _filters_faceted_query(authz, facets): filters = {} indexed = {} for (idx, alias, group, field, value) in facets: indexed[idx] = indexed.get(idx, {}) indexed[idx][alias] = field_filter_query(field, value) filters[idx] = filters.get(idx, {}) filters[idx][group] = filters[idx].get(group, []) filters[idx][group].append(value) queries = [] for (idx, facets) in indexed.items(): shoulds = [] for field, values in filters[idx].items(): shoulds.append(field_filter_query(field, values)) query = { 'bool': { 'should': shoulds, 'filter': [authz_query(authz)], 'minimum_should_match': 1 } } queries.append({'index': idx}) queries.append({ 'size': 0, 'query': query, 'aggs': {'counters': {'filters': {'filters': facets}}} }) results = {} if not len(queries): return results res = es.msearch(body=queries) for resp in res.get('responses', []): aggs = resp.get('aggregations', {}).get('counters', {}) for alias, value in aggs.get('buckets', {}).items(): results[alias] = value.get('doc_count', results.get(alias, 0)) return results
def get_filters(self): """Apply query filters from the user interface.""" filters = [] # This enforces the authorization (access control) rules on # a particular query by comparing the collections a user is # authorized for with the one on the document. if self.parser.authz and not self.parser.authz.is_admin: filters.append(authz_query(self.parser.authz)) for field, values in self.parser.filters.items(): if field in self.SKIP_FILTERS: continue if field not in self.parser.facet_names: filters.append(field_filter_query(field, values)) return filters
def get_filters(self): """Apply query filters from the user interface.""" filters = [] # This enforces the authorization (access control) rules on # a particular query by comparing the collections a user is # authorized for with the one on the document. if self.parser.authz and not self.parser.authz.is_admin: filters.append(authz_query(self.parser.authz)) for field, values in self.parser.filters.items(): if field in self.SKIP_FILTERS: continue if field not in self.parser.facet_names: filters.append(field_filter_query(field, values)) return filters
def convert_filters(filters): ret = [] id_values = [] for field, values in filters.iteritems(): # Combine id or _id into one filter if field in ['id', '_id']: id_values.extend(values) else: ret.append(field_filter_query(field, list(values))) if id_values: ret.append({'ids': {'values': id_values}}) return ret
def entity_tags(entity, authz): """Do a search on tags of an entity.""" proxy = model.get_proxy(entity) Thing = model.get(Entity.THING) types = [registry.name, registry.email, registry.identifier, registry.iban, registry.phone, registry.address] pivots = [] queries = [] # Go through all the tags which apply to this entity, and find how # often they've been mentioned in other entities. for type_ in types: if type_.group is None: continue for value in proxy.get_type_values(type_): if type_.specificity(value) < 0.1: continue schemata = model.get_type_schemata(type_) schemata = [s for s in schemata if s.is_a(Thing)] index = entities_read_index(schemata) queries.append({'index': index}) queries.append({ 'size': 0, 'query': { 'bool': { 'filter': [ authz_query(authz), field_filter_query(type_.group, value) ], 'must_not': [ {'ids': {'values': [entity.get('id')]}}, ] } } }) pivots.append((type_.group, value)) if not len(queries): return res = es.msearch(body=queries) for (field, value), resp in zip(pivots, res.get('responses', [])): total = resp.get('hits', {}).get('total') if total is not None and total > 0: yield (field, value, total)
def entity_tags(entity, authz): """Do a search on tags of an entity.""" # NOTE: This must also work for documents. FIELDS = [ 'names', 'emails', 'phones', 'addresses', 'identifiers' ] pivots = [] queries = [] # Go through all the tags which apply to this entity, and find how # often they've been mentioned in other entities. for field in FIELDS: for value in entity.get(field, []): if value is None or not len(value): continue queries.append({}) queries.append({ 'size': 0, 'query': { 'bool': { 'filter': [ authz_query(authz), field_filter_query(field, value) ], 'must_not': [ {'ids': {'values': [entity.get('id')]}}, ] } } }) pivots.append((field, value)) if not len(queries): return res = es.msearch(index=entities_read_index(), body=queries) for (field, value), resp in zip(pivots, res.get('responses', [])): total = resp.get('hits', {}).get('total') if total is not None and total > 0: yield (field, value, total)
def get_filters_list(self, skip): filters = [] range_filters = dict() for field, values in self.parser.filters.items(): if field in skip: continue # Collect all range query filters for a field in a single query if field.startswith(("gt:", "gte:", "lt:", "lte:")): op, field = field.split(":", 1) if range_filters.get(field) is None: range_filters[field] = {op: list(values)[0]} else: range_filters[field][op] = list(values)[0] continue filters.append(field_filter_query(field, values)) for field, ops in range_filters.items(): filters.append(range_filter_query(field, ops)) return filters
def entity_tags(proxy, authz, prop_types=DEFAULT_TAGS): """For a given proxy, determine how many other mentions exist for each property value associated, if it is one of a set of types.""" queries = {} lookup = {} values = set() for prop, value in proxy.itervalues(): if prop.type not in prop_types: continue if prop.specificity(value) > 0.1: values.add((prop.type, value)) type_names = [t.name for t in prop_types] log.debug("Tags[%s]: %s values", type_names, len(values)) for (type_, value) in values: key = type_.node_id(value) lookup[key] = (type_, value) # Determine which indexes may contain further mentions (only things). schemata = model.get_type_schemata(type_) schemata = [s for s in schemata if s.is_a(Entity.THING)] index = entities_read_index(schemata) queries[(index, key)] = field_filter_query(type_.group, value) _, counts = _counted_msearch(queries, authz) results = [] for key, count in counts.items(): if count > 1: type_, value = lookup[key] result = { "id": key, "field": type_.group, "value": value, "count": count - 1, } results.append(result) results.sort(key=lambda p: p["count"], reverse=True) # pprint(results) return results
def convert_filters(filters): ret = [] for field, values in filters.items(): ret.append(field_filter_query(field, values)) return ret
def convert_filters(filters): ret = [] for field, values in filters.items(): ret.append(field_filter_query(field, values)) return ret