def refresh_collection(collection_id): """Operations to execute after updating a collection-related domain object. This will refresh stats and flush cache.""" cache.kv.delete( cache.object_key(Collection, collection_id), cache.object_key(Collection, collection_id, "stats"), )
def refresh_entity(entity, sync=False): if isinstance(entity, (Document, Entity)): entity_id = entity.collection.ns.sign(entity.id) collection_id = entity.collection_id else: entity_id = entity.get('id') collection_id = entity.get('collection_id') cache.kv.delete(cache.object_key(Entity, entity_id), cache.object_key(Collection, collection_id))
def refresh_entity(entity, sync=False): if is_mapping(entity): entity_id = entity.get('id') collection_id = entity.get('collection_id') else: entity_id = entity.id collection_id = entity.collection_id cache.kv.delete(cache.object_key(Entity, entity_id), cache.object_key(Collection, collection_id))
def refresh_entity(entity, sync=False): if is_mapping(entity): entity_id = entity.get('id') collection_id = entity.get('collection_id') else: entity_id = entity.id collection_id = entity.collection_id cache.kv.delete(cache.object_key(Entity, entity_id), cache.object_key(Collection, collection_id))
def refresh_collection(collection_id, sync=True): """Operations to execute after updating a collection-related domain object. This will refresh stats and flush cache.""" if collection_id is None: return keys = [ cache.object_key(Collection, collection_id), cache.object_key(Collection, collection_id, 'stats') ] if sync: keys.append(cache.object_key(Collection, collection_id, 'schema')) cache.kv.delete(*keys)
def get_entity(entity_id): key = cache.object_key(Entity, entity_id) entity = cache.get_complex(key) if entity is None: entity = index.get_entity(entity_id) cache.set_complex(key, entity, expire=cache.EXPIRE) return entity
def resolve(stub): _instrument_stub(stub) cache_keys = {} schemata = {} for clazz, key, schema in stub._rx_queue: if (clazz, key) in stub._rx_cache: continue cid = cache.object_key(clazz, key) cache_keys[cid] = (clazz, key) schemata[cid] = schema keys = list(cache_keys.keys()) queries = defaultdict(list) for cid, value in cache.get_many_complex(keys): clazz, key = cache_keys.get(cid) if value is None: log.info("MISS [%s]: %s", clazz.__name__, key) if clazz == Entity: queries[schemata.get(cid)].append(key) loader = LOADERS.get(clazz) if loader is not None: value = loader(key) stub._rx_cache[(clazz, key)] = value for schema, ids in queries.items(): for entity in entities_by_ids(ids, schemata=schema, cached=True): stub._rx_cache[(Entity, entity.get('id'))] = entity
def resolve(stub): _instrument_stub(stub) cache_keys = {} schemata = {} for clazz, key, schema in stub._rx_queue: if (clazz, key) in stub._rx_cache: continue cid = cache.object_key(clazz, key) cache_keys[cid] = (clazz, key) schemata[cid] = schema keys = list(cache_keys.keys()) queries = defaultdict(list) for cid, value in cache.get_many_complex(keys): clazz, key = cache_keys.get(cid) if value is None: # log.info("MISS [%s]: %s", clazz.__name__, key) if clazz == Entity: queries[schemata.get(cid)].append(key) loader = LOADERS.get(clazz) if loader is not None: value = loader(key) stub._rx_cache[(clazz, key)] = value for schema, ids in queries.items(): for entity in entities_by_ids(ids, schemata=schema, cached=True): stub._rx_cache[(Entity, entity.get('id'))] = entity
def entities_by_ids(ids, schemata=None, cached=False, includes=None, excludes=None): """Iterate over unpacked entities based on a search for the given entity IDs.""" ids = ensure_list(ids) if not len(ids): return index = entities_read_index(schema=schemata) query = {'ids': {'values': ids}} # query = {'bool': {'filter': query}} query = { 'query': query, '_source': _source_spec(includes, excludes), 'size': MAX_PAGE } result = es.search(index=index, body=query) for doc in result.get('hits', {}).get('hits', []): entity = unpack_result(doc) if entity is not None: # Cache entities only briefly to avoid filling up the cache: if cached: key = cache.object_key(Entity, entity.get('id')) cache.set_complex(key, entity, expire=60 * 60) yield entity
def get_collection(collection_id): """Fetch a collection from the index.""" if collection_id is None: return key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is not None: return data collection = Collection.by_id(collection_id) if collection is None: return data = collection.to_dict() stats = get_collection_stats(collection.id) data['count'] = stats['count'] data['schemata'] = stats['schemata'] # if no countries or langs are given, take the most common from the data. countries = ensure_list(collection.countries) countries = countries or stats['countries'].keys() data['countries'] = registry.country.normalize_set(countries) languages = ensure_list(collection.languages) languages = languages or stats['languages'].keys() data['languages'] = registry.language.normalize_set(languages) cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_collection(collection_id): key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is None: data = index.get_collection(collection_id) cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_collection(collection_id): """Fetch a collection from the index.""" if collection_id is None: return key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is not None: return data collection = Collection.by_id(collection_id) if collection is None: return data = collection.to_dict() stats = get_collection_stats(collection.id) data['count'] = stats['count'] data['schemata'] = stats['schemata'] # if no countries or langs are given, take the most common from the data. countries = ensure_list(collection.countries) countries = countries or stats['countries'].keys() data['countries'] = registry.country.normalize_set(countries) languages = ensure_list(collection.languages) languages = languages or stats['languages'].keys() data['languages'] = registry.language.normalize_set(languages) cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_collection_facet(collection_id, facet, refresh=False): """Compute some statistics on the content of a collection.""" key = cache.object_key(Collection, collection_id, facet) data = cache.get_complex(key) if not refresh and data is not None: return data query = {'term': {'collection_id': collection_id}} query = { 'size': 0, 'query': {'bool': {'filter': [query]}}, 'aggs': { 'values': {'terms': {'field': facet, 'size': 300}}, 'total': {'cardinality': {'field': facet}} } } schemata = set() facet_type = registry.groups.get(facet) if facet_type is not None: schemata = model.get_type_schemata(facet_type) result = es.search(index=entities_read_index(schema=schemata), body=query, request_timeout=3600, timeout='20m') aggregations = result.get('aggregations') values = {} for bucket in aggregations.get('values').get('buckets', []): values[bucket['key']] = bucket['doc_count'] data = { 'values': values, 'total': aggregations.get('total').get('value', 0) } cache.set_complex(key, data, expires=cache.EXPIRE) return data
def compute_collection(collection, sync=False): key = cache.object_key(Collection, collection.id, 'stats') if cache.get(key) and not sync: return cache.set(key, 'computed', expires=cache.EXPIRE - 60) log.info("Collection [%s] changed, computing...", collection.id) index.update_collection_stats(collection.id) index.index_collection(collection, sync=sync)
def get_profile(entityset_id, authz=None): """A profile is an entityset having a party. The idea is to cache profile metadata for the API, and to generate a merged view of all the entities the current user has access to.""" if entityset_id is None: return key = cache.object_key(EntitySet, entityset_id) data = cache.get_complex(key) stub = Stub() if data is None: entityset = get_entityset(entityset_id) if entityset is None: return data = entityset.to_dict() data["items"] = [] for item in entityset.items(): data["items"].append(item.to_dict()) cache.set_complex(key, data, expires=cache.EXPIRE) # Filter the subset of items the current user can access if authz is not None: items = [ i for i in data["items"] if authz.can(i["collection_id"], authz.READ) ] data["items"] = items # Load the constituent entities for the profile and generate a # combined proxy with all of the given properties. for item in data["items"]: if Judgement(item["judgement"]) == Judgement.POSITIVE: resolver.queue(stub, Entity, item.get("entity_id")) resolver.resolve(stub) merged = None data["proxies"] = [] for item in data["items"]: item["entity"] = resolver.get(stub, Entity, item.get("entity_id")) if item["entity"] is not None: proxy = model.get_proxy(item["entity"]) proxy.context = {} data["proxies"].append(proxy) if merged is None: merged = proxy.clone() merged.context["entities"] = [proxy.id] else: merged.merge(proxy) merged.context["entities"].append(proxy.id) if merged is None: merged = model.make_entity(Entity.LEGAL_ENTITY) # Polish it a bit: merged.id = data.get("id") merged = name_entity(merged) data["merged"] = merged data["label"] = merged.caption data["shallow"] = False return data
def compute_collection(collection, force=False, sync=False): key = cache.object_key(Collection, collection.id, "stats") if cache.get(key) is not None and not force: return refresh_collection(collection.id) log.info("[%s] Computing statistics...", collection) index.update_collection_stats(collection.id) cache.set(key, datetime.utcnow().isoformat()) index.index_collection(collection, sync=sync)
def compute_collection(collection, force=False, sync=False): key = cache.object_key(Collection, collection.id, "stats") if cache.get(key) is not None and not force: return refresh_collection(collection.id) log.info("[%s] Computing statistics...", collection) index.update_collection_stats(collection.id) cache.set(key, "computed", expires=cache.EXPIRE) index.index_collection(collection, sync=sync)
def entities_by_ids(ids, schemata=None, cached=False, includes=PROXY_INCLUDES, excludes=None): """Iterate over unpacked entities based on a search for the given entity IDs.""" ids = ensure_list(ids) if not len(ids): return cached = cached and excludes is None and includes == PROXY_INCLUDES entities = {} if cached: keys = [cache.object_key(Entity, i) for i in ids] for _, entity in cache.get_many_complex(keys): if entity is not None: entities[entity.get("id")] = entity missing = [i for i in ids if entities.get(id) is None] index = entities_read_index(schema=schemata) query = { "query": { "ids": { "values": missing } }, "_source": _source_spec(includes, excludes), "size": MAX_PAGE, } result = es.search(index=index, body=query) for doc in result.get("hits", {}).get("hits", []): entity = unpack_result(doc) if entity is not None: entity_id = entity.get("id") entities[entity_id] = entity if cached: key = cache.object_key(Entity, entity_id) cache.set_complex(key, entity, expires=60 * 60 * 2) for i in ids: entity = entities.get(i) if entity is not None: yield entity
def get_entity(entity_id, **kwargs): """Fetch an entity from the index.""" if entity_id is None: return key = cache.object_key(Entity, entity_id) entity = cache.get_complex(key) if entity is not None: return entity log.debug("Entity [%s]: object cache miss", entity_id) for entity in entities_by_ids(entity_id, cached=True): return entity
def get_entity(entity_id, **kwargs): """Fetch an entity from the index.""" if entity_id is None: return key = cache.object_key(Entity, entity_id) entity = cache.get_complex(key) if entity is not None: return entity log.debug("Entity [%s]: object cache miss", entity_id) for entity in entities_by_ids(entity_id, cached=True): return entity
def get_role(role_id): if role_id is None: return key = cache.object_key(Role, role_id) data = cache.get_complex(key) if data is None: role = Role.by_id(role_id) if role is None: return data = role.to_dict() cache.set_complex(key, data, expires=cache.EXPIRE) return data
def get_role(role_id): if role_id is None: return key = cache.object_key(Role, role_id) data = cache.get_complex(key) if data is None: log.debug("Role [%s]: object cache miss", role_id) role = Role.by_id(role_id) if role is None: return data = role.to_dict() cache.set_complex(key, data, expire=cache.EXPIRE) return data
def iter_value_entities(type_, value): value = stringify(value) if type_.group is None or value is None: return key = cache.object_key(type(type_), value) degree_key = cache.object_key(type(type_), value, 'deg1') degree = cache.get(degree_key) if degree is not None: for item in cache.kv.sscan_iter(key): qname, entity_id = item.decode('utf-8').split('@', 1) prop = model.get_qname(qname) yield entity_id, prop else: degree = 0 pipe = cache.kv.pipeline() for entity_id, prop in _iter_value_entities(type_, value): yield entity_id, prop item = '@'.join((prop.qname, entity_id)) pipe.sadd(key, item) degree += 1 pipe.set(degree_key, degree, ex=cache.EXPIRE) pipe.execute()
def get_role(role_id): if role_id is None: return key = cache.object_key(Role, role_id) data = cache.get_complex(key) if data is None: log.debug("Role [%s]: object cache miss", role_id) role = Role.by_id(role_id) if role is None: return data = role.to_dict() cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_export(export_id): if export_id is None: return key = cache.object_key(Export, export_id) data = cache.get_complex(key) if data is None: export = Export.by_id(export_id) if export is None: return log.debug("Export cache refresh: %r", export) data = export.to_dict() cache.set_complex(key, data, expires=cache.EXPIRE) return data
def get_collection_stats(collection_id): """Compute some statistics on the content of a collection.""" key = cache.object_key(Collection, collection_id, 'stats') data = cache.get_complex(key) if data is not None: return data log.info("Generating collection stats: %s", collection_id) query = { 'size': 0, 'query': { 'bool': { 'filter': [{ 'term': { 'collection_id': collection_id } }] } }, 'aggs': { 'schemata': { 'terms': { 'field': 'schema', 'size': 1000 } }, 'countries': { 'terms': { 'field': 'countries', 'size': 500 } }, 'languages': { 'terms': { 'field': 'languages', 'size': 10 } }, } } result = search_safe(index=entities_read_index(), body=query) aggregations = result.get('aggregations', {}) data = {'count': result['hits']['total']} for facet in ['schemata', 'countries', 'languages']: data[facet] = {} for bucket in aggregations[facet]['buckets']: data[facet][bucket['key']] = bucket['doc_count'] cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_role_channels(role): """Generate the set of notification channels that the current user should listen to.""" key = cache.object_key(Role, role.id, 'channels') channels = cache.get_list(key) if len(channels): return channels channels = [Notification.GLOBAL] if role.deleted_at is None and role.type == Role.USER: channels.append(channel(role)) for group in role.roles: channels.append(channel(group)) cache.set_list(key, channels) return channels
def get_role_channels(role): """Generate the set of notification channels that the current user should listen to.""" key = cache.object_key(Role, role.id, 'channels') channels = cache.get_list(key) if len(channels): return channels channels = [Notification.GLOBAL] if role.deleted_at is None and role.type == Role.USER: channels.append(channel(role)) for group in role.roles: channels.append(channel(group)) cache.set_list(key, channels) return channels
def get_role(role_id): key = cache.object_key(Role, role_id) data = cache.get_complex(key) if data is None: role = Role.by_id(role_id) if role is None: return data = { 'id': role.id, 'name': role.name, 'label': role.label, 'type': role.type } cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_role_channels(role): """Generate the set of notification channels that the current user should listen to.""" key = cache.object_key(Role, role.id, 'channels') channels = cache.get_list(key) if len(channels): return channels channels = [Notification.GLOBAL] if role.deleted_at is None and role.type == Role.USER: authz = Authz.from_role(role) for role_id in authz.roles: channels.append(channel(role_id, Role)) for coll_id in authz.collections(authz.READ): channels.append(channel(coll_id, Collection)) cache.set_list(key, channels, expire=cache.EXPIRE) return channels
def get_alert(alert_id): key = cache.object_key(Alert, alert_id) data = cache.get_complex(key) if data is None: alert = Alert.by_id(alert_id) if alert is None: return data = { 'id': alert.id, 'query': alert.query, 'role_id': alert.role_id, 'notified_at': alert.notified_at, 'created_at': alert.created_at, 'updated_at': alert.updated_at } cache.set_complex(key, data, expire=cache.EXPIRE) return data
def get_collection(collection_id): """Fetch a collection from the index.""" if collection_id is None: return key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is not None: return data collection = Collection.by_id(collection_id) if collection is None: return data = collection.to_dict() things = get_collection_things(collection.id) data['count'] = sum(things.values()) cache.set_complex(key, data, expires=cache.EXPIRE) return data
def get_collection(collection_id): """Fetch a collection from the index.""" if collection_id is None: return key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is not None: return data collection = Collection.by_id(collection_id) if collection is None: return data = collection.to_dict() schemata = get_facet_values(collection.id, 'schema') schemata = schemata.get('values', {}) data['count'] = sum(schemata.values()) data['schemata'] = schemata cache.set_complex(key, data, expires=cache.EXPIRE) return data
def get_collection(collection_id): """Fetch a collection from the index.""" if collection_id is None: return key = cache.object_key(Collection, collection_id) data = cache.get_complex(key) if data is not None: return data collection = Collection.by_id(collection_id) if collection is None: return data = collection.to_dict() index = entities_read_index(schema=Entity.THING) query = {"term": {"collection_id": collection_id}} result = es.count(index=index, body={"query": query}) data["count"] = result.get("count", 0) cache.set_complex(key, data, expires=cache.EXPIRE) return data
def entities_by_ids(ids, schemata=None, cached=False, includes=None, excludes=None): """Iterate over unpacked entities based on a search for the given entity IDs.""" ids = ensure_list(ids) if not len(ids): return index = entities_read_index(schema=schemata) query = {'ids': {'values': ids}} # query = {'bool': {'filter': query}} query = { 'query': query, '_source': _source_spec(includes, excludes), 'size': MAX_PAGE } result = es.search(index=index, body=query) for doc in result.get('hits', {}).get('hits', []): entity = unpack_result(doc) if entity is not None: # Cache entities only briefly to avoid filling up the cache: if cached: key = cache.object_key(Entity, entity.get('id')) cache.set_complex(key, entity, expire=60 * 60) yield entity
def refresh_role(role, sync=False): cache.kv.delete(cache.object_key(Role, role.id), cache.object_key(Role, role.id, 'channels'), cache.key(Authz.PREFIX, Authz.READ, role.id), cache.key(Authz.PREFIX, Authz.WRITE, role.id))
def refresh_collection(collection_id, sync=False): """Operations to execute after updating a collection-related domain object. This will refresh stats and re-index.""" cache.kv.delete(cache.object_key(Collection, collection_id))
def refresh_role(role, sync=False): cache.kv.delete(cache.key(Authz.PREFIX, Authz.READ, role.id), cache.key(Authz.PREFIX, Authz.WRITE, role.id), cache.object_key(Role, role.id))
def refresh_entity(collection, entity_id): cache.kv.delete(cache.object_key(Entity, entity_id)) refresh_collection(collection.id)