示例#1
0
def refresh_collection(collection_id):
    """Operations to execute after updating a collection-related
    domain object. This will refresh stats and flush cache."""
    cache.kv.delete(
        cache.object_key(Collection, collection_id),
        cache.object_key(Collection, collection_id, "stats"),
    )
示例#2
0
文件: entities.py 项目: wdsn/aleph
def refresh_entity(entity, sync=False):
    if isinstance(entity, (Document, Entity)):
        entity_id = entity.collection.ns.sign(entity.id)
        collection_id = entity.collection_id
    else:
        entity_id = entity.get('id')
        collection_id = entity.get('collection_id')
    cache.kv.delete(cache.object_key(Entity, entity_id),
                    cache.object_key(Collection, collection_id))
示例#3
0
文件: __init__.py 项目: pudo/aleph
def refresh_entity(entity, sync=False):
    if is_mapping(entity):
        entity_id = entity.get('id')
        collection_id = entity.get('collection_id')
    else:
        entity_id = entity.id
        collection_id = entity.collection_id
    cache.kv.delete(cache.object_key(Entity, entity_id),
                    cache.object_key(Collection, collection_id))
示例#4
0
def refresh_entity(entity, sync=False):
    if is_mapping(entity):
        entity_id = entity.get('id')
        collection_id = entity.get('collection_id')
    else:
        entity_id = entity.id
        collection_id = entity.collection_id
    cache.kv.delete(cache.object_key(Entity, entity_id),
                    cache.object_key(Collection, collection_id))
示例#5
0
def refresh_collection(collection_id, sync=True):
    """Operations to execute after updating a collection-related
    domain object. This will refresh stats and flush cache."""
    if collection_id is None:
        return
    keys = [
        cache.object_key(Collection, collection_id),
        cache.object_key(Collection, collection_id, 'stats')
    ]
    if sync:
        keys.append(cache.object_key(Collection, collection_id, 'schema'))
    cache.kv.delete(*keys)
示例#6
0
def get_entity(entity_id):
    key = cache.object_key(Entity, entity_id)
    entity = cache.get_complex(key)
    if entity is None:
        entity = index.get_entity(entity_id)
        cache.set_complex(key, entity, expire=cache.EXPIRE)
    return entity
示例#7
0
def resolve(stub):
    _instrument_stub(stub)
    cache_keys = {}
    schemata = {}
    for clazz, key, schema in stub._rx_queue:
        if (clazz, key) in stub._rx_cache:
            continue

        cid = cache.object_key(clazz, key)
        cache_keys[cid] = (clazz, key)
        schemata[cid] = schema

    keys = list(cache_keys.keys())
    queries = defaultdict(list)
    for cid, value in cache.get_many_complex(keys):
        clazz, key = cache_keys.get(cid)
        if value is None:
            log.info("MISS [%s]: %s", clazz.__name__, key)
            if clazz == Entity:
                queries[schemata.get(cid)].append(key)
            loader = LOADERS.get(clazz)
            if loader is not None:
                value = loader(key)
        stub._rx_cache[(clazz, key)] = value

    for schema, ids in queries.items():
        for entity in entities_by_ids(ids, schemata=schema, cached=True):
            stub._rx_cache[(Entity, entity.get('id'))] = entity
示例#8
0
文件: resolver.py 项目: pudo/aleph
def resolve(stub):
    _instrument_stub(stub)
    cache_keys = {}
    schemata = {}
    for clazz, key, schema in stub._rx_queue:
        if (clazz, key) in stub._rx_cache:
            continue

        cid = cache.object_key(clazz, key)
        cache_keys[cid] = (clazz, key)
        schemata[cid] = schema

    keys = list(cache_keys.keys())
    queries = defaultdict(list)
    for cid, value in cache.get_many_complex(keys):
        clazz, key = cache_keys.get(cid)
        if value is None:
            # log.info("MISS [%s]: %s", clazz.__name__, key)
            if clazz == Entity:
                queries[schemata.get(cid)].append(key)
            loader = LOADERS.get(clazz)
            if loader is not None:
                value = loader(key)
        stub._rx_cache[(clazz, key)] = value

    for schema, ids in queries.items():
        for entity in entities_by_ids(ids, schemata=schema, cached=True):
            stub._rx_cache[(Entity, entity.get('id'))] = entity
示例#9
0
def entities_by_ids(ids,
                    schemata=None,
                    cached=False,
                    includes=None,
                    excludes=None):
    """Iterate over unpacked entities based on a search for the given
    entity IDs."""
    ids = ensure_list(ids)
    if not len(ids):
        return
    index = entities_read_index(schema=schemata)
    query = {'ids': {'values': ids}}
    # query = {'bool': {'filter': query}}
    query = {
        'query': query,
        '_source': _source_spec(includes, excludes),
        'size': MAX_PAGE
    }
    result = es.search(index=index, body=query)
    for doc in result.get('hits', {}).get('hits', []):
        entity = unpack_result(doc)
        if entity is not None:
            # Cache entities only briefly to avoid filling up the cache:
            if cached:
                key = cache.object_key(Entity, entity.get('id'))
                cache.set_complex(key, entity, expire=60 * 60)
            yield entity
示例#10
0
def get_collection(collection_id):
    """Fetch a collection from the index."""
    if collection_id is None:
        return
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    collection = Collection.by_id(collection_id)
    if collection is None:
        return

    data = collection.to_dict()
    stats = get_collection_stats(collection.id)
    data['count'] = stats['count']
    data['schemata'] = stats['schemata']

    # if no countries or langs are given, take the most common from the data.
    countries = ensure_list(collection.countries)
    countries = countries or stats['countries'].keys()
    data['countries'] = registry.country.normalize_set(countries)

    languages = ensure_list(collection.languages)
    languages = languages or stats['languages'].keys()
    data['languages'] = registry.language.normalize_set(languages)
    cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#11
0
def get_collection(collection_id):
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is None:
        data = index.get_collection(collection_id)
        cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#12
0
文件: collections.py 项目: pudo/aleph
def get_collection(collection_id):
    """Fetch a collection from the index."""
    if collection_id is None:
        return
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    collection = Collection.by_id(collection_id)
    if collection is None:
        return

    data = collection.to_dict()
    stats = get_collection_stats(collection.id)
    data['count'] = stats['count']
    data['schemata'] = stats['schemata']

    # if no countries or langs are given, take the most common from the data.
    countries = ensure_list(collection.countries)
    countries = countries or stats['countries'].keys()
    data['countries'] = registry.country.normalize_set(countries)

    languages = ensure_list(collection.languages)
    languages = languages or stats['languages'].keys()
    data['languages'] = registry.language.normalize_set(languages)
    cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#13
0
def get_collection_facet(collection_id, facet, refresh=False):
    """Compute some statistics on the content of a collection."""
    key = cache.object_key(Collection, collection_id, facet)
    data = cache.get_complex(key)
    if not refresh and data is not None:
        return data

    query = {'term': {'collection_id': collection_id}}
    query = {
        'size': 0,
        'query': {'bool': {'filter': [query]}},
        'aggs': {
            'values': {'terms': {'field': facet, 'size': 300}},
            'total': {'cardinality': {'field': facet}}
        }
    }
    schemata = set()
    facet_type = registry.groups.get(facet)
    if facet_type is not None:
        schemata = model.get_type_schemata(facet_type)
    result = es.search(index=entities_read_index(schema=schemata),
                       body=query,
                       request_timeout=3600,
                       timeout='20m')
    aggregations = result.get('aggregations')
    values = {}
    for bucket in aggregations.get('values').get('buckets', []):
        values[bucket['key']] = bucket['doc_count']
    data = {
        'values': values,
        'total': aggregations.get('total').get('value', 0)
    }
    cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#14
0
def compute_collection(collection, sync=False):
    key = cache.object_key(Collection, collection.id, 'stats')
    if cache.get(key) and not sync:
        return
    cache.set(key, 'computed', expires=cache.EXPIRE - 60)
    log.info("Collection [%s] changed, computing...", collection.id)
    index.update_collection_stats(collection.id)
    index.index_collection(collection, sync=sync)
示例#15
0
文件: profiles.py 项目: sunu/aleph
def get_profile(entityset_id, authz=None):
    """A profile is an entityset having a party. The idea is to cache
    profile metadata for the API, and to generate a merged view of all
    the entities the current user has access to."""
    if entityset_id is None:
        return
    key = cache.object_key(EntitySet, entityset_id)
    data = cache.get_complex(key)
    stub = Stub()
    if data is None:
        entityset = get_entityset(entityset_id)
        if entityset is None:
            return
        data = entityset.to_dict()
        data["items"] = []
        for item in entityset.items():
            data["items"].append(item.to_dict())
        cache.set_complex(key, data, expires=cache.EXPIRE)

    # Filter the subset of items the current user can access
    if authz is not None:
        items = [
            i for i in data["items"]
            if authz.can(i["collection_id"], authz.READ)
        ]
        data["items"] = items

    # Load the constituent entities for the profile and generate a
    # combined proxy with all of the given properties.
    for item in data["items"]:
        if Judgement(item["judgement"]) == Judgement.POSITIVE:
            resolver.queue(stub, Entity, item.get("entity_id"))
    resolver.resolve(stub)
    merged = None
    data["proxies"] = []
    for item in data["items"]:
        item["entity"] = resolver.get(stub, Entity, item.get("entity_id"))
        if item["entity"] is not None:
            proxy = model.get_proxy(item["entity"])
            proxy.context = {}
            data["proxies"].append(proxy)
            if merged is None:
                merged = proxy.clone()
                merged.context["entities"] = [proxy.id]
            else:
                merged.merge(proxy)
                merged.context["entities"].append(proxy.id)

    if merged is None:
        merged = model.make_entity(Entity.LEGAL_ENTITY)

    # Polish it a bit:
    merged.id = data.get("id")
    merged = name_entity(merged)
    data["merged"] = merged
    data["label"] = merged.caption
    data["shallow"] = False
    return data
示例#16
0
def compute_collection(collection, force=False, sync=False):
    key = cache.object_key(Collection, collection.id, "stats")
    if cache.get(key) is not None and not force:
        return
    refresh_collection(collection.id)
    log.info("[%s] Computing statistics...", collection)
    index.update_collection_stats(collection.id)
    cache.set(key, datetime.utcnow().isoformat())
    index.index_collection(collection, sync=sync)
示例#17
0
def compute_collection(collection, force=False, sync=False):
    key = cache.object_key(Collection, collection.id, "stats")
    if cache.get(key) is not None and not force:
        return
    refresh_collection(collection.id)
    log.info("[%s] Computing statistics...", collection)
    index.update_collection_stats(collection.id)
    cache.set(key, "computed", expires=cache.EXPIRE)
    index.index_collection(collection, sync=sync)
示例#18
0
文件: entities.py 项目: mudsill/aleph
def entities_by_ids(ids,
                    schemata=None,
                    cached=False,
                    includes=PROXY_INCLUDES,
                    excludes=None):
    """Iterate over unpacked entities based on a search for the given
    entity IDs."""
    ids = ensure_list(ids)
    if not len(ids):
        return
    cached = cached and excludes is None and includes == PROXY_INCLUDES
    entities = {}
    if cached:
        keys = [cache.object_key(Entity, i) for i in ids]
        for _, entity in cache.get_many_complex(keys):
            if entity is not None:
                entities[entity.get("id")] = entity

    missing = [i for i in ids if entities.get(id) is None]
    index = entities_read_index(schema=schemata)
    query = {
        "query": {
            "ids": {
                "values": missing
            }
        },
        "_source": _source_spec(includes, excludes),
        "size": MAX_PAGE,
    }
    result = es.search(index=index, body=query)
    for doc in result.get("hits", {}).get("hits", []):
        entity = unpack_result(doc)
        if entity is not None:
            entity_id = entity.get("id")
            entities[entity_id] = entity
            if cached:
                key = cache.object_key(Entity, entity_id)
                cache.set_complex(key, entity, expires=60 * 60 * 2)

    for i in ids:
        entity = entities.get(i)
        if entity is not None:
            yield entity
示例#19
0
文件: entities.py 项目: pudo/aleph
def get_entity(entity_id, **kwargs):
    """Fetch an entity from the index."""
    if entity_id is None:
        return
    key = cache.object_key(Entity, entity_id)
    entity = cache.get_complex(key)
    if entity is not None:
        return entity
    log.debug("Entity [%s]: object cache miss", entity_id)
    for entity in entities_by_ids(entity_id, cached=True):
        return entity
示例#20
0
def get_entity(entity_id, **kwargs):
    """Fetch an entity from the index."""
    if entity_id is None:
        return
    key = cache.object_key(Entity, entity_id)
    entity = cache.get_complex(key)
    if entity is not None:
        return entity
    log.debug("Entity [%s]: object cache miss", entity_id)
    for entity in entities_by_ids(entity_id, cached=True):
        return entity
示例#21
0
文件: roles.py 项目: sunu/aleph
def get_role(role_id):
    if role_id is None:
        return
    key = cache.object_key(Role, role_id)
    data = cache.get_complex(key)
    if data is None:
        role = Role.by_id(role_id)
        if role is None:
            return
        data = role.to_dict()
        cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#22
0
文件: roles.py 项目: pudo/aleph
def get_role(role_id):
    if role_id is None:
        return
    key = cache.object_key(Role, role_id)
    data = cache.get_complex(key)
    if data is None:
        log.debug("Role [%s]: object cache miss", role_id)
        role = Role.by_id(role_id)
        if role is None:
            return
        data = role.to_dict()
        cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#23
0
文件: expand.py 项目: jbaehne/aleph
def iter_value_entities(type_, value):
    value = stringify(value)
    if type_.group is None or value is None:
        return
    key = cache.object_key(type(type_), value)
    degree_key = cache.object_key(type(type_), value, 'deg1')
    degree = cache.get(degree_key)
    if degree is not None:
        for item in cache.kv.sscan_iter(key):
            qname, entity_id = item.decode('utf-8').split('@', 1)
            prop = model.get_qname(qname)
            yield entity_id, prop
    else:
        degree = 0
        pipe = cache.kv.pipeline()
        for entity_id, prop in _iter_value_entities(type_, value):
            yield entity_id, prop
            item = '@'.join((prop.qname, entity_id))
            pipe.sadd(key, item)
            degree += 1
        pipe.set(degree_key, degree, ex=cache.EXPIRE)
        pipe.execute()
示例#24
0
def get_role(role_id):
    if role_id is None:
        return
    key = cache.object_key(Role, role_id)
    data = cache.get_complex(key)
    if data is None:
        log.debug("Role [%s]: object cache miss", role_id)
        role = Role.by_id(role_id)
        if role is None:
            return
        data = role.to_dict()
        cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#25
0
def get_export(export_id):
    if export_id is None:
        return
    key = cache.object_key(Export, export_id)
    data = cache.get_complex(key)
    if data is None:
        export = Export.by_id(export_id)
        if export is None:
            return
        log.debug("Export cache refresh: %r", export)
        data = export.to_dict()
        cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#26
0
def get_collection_stats(collection_id):
    """Compute some statistics on the content of a collection."""
    key = cache.object_key(Collection, collection_id, 'stats')
    data = cache.get_complex(key)
    if data is not None:
        return data

    log.info("Generating collection stats: %s", collection_id)
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection_id
                    }
                }]
            }
        },
        'aggs': {
            'schemata': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 10
                }
            },
        }
    }
    result = search_safe(index=entities_read_index(), body=query)
    aggregations = result.get('aggregations', {})
    data = {'count': result['hits']['total']}

    for facet in ['schemata', 'countries', 'languages']:
        data[facet] = {}
        for bucket in aggregations[facet]['buckets']:
            data[facet][bucket['key']] = bucket['doc_count']
    cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#27
0
def get_role_channels(role):
    """Generate the set of notification channels that the current
    user should listen to."""
    key = cache.object_key(Role, role.id, 'channels')
    channels = cache.get_list(key)
    if len(channels):
        return channels
    channels = [Notification.GLOBAL]
    if role.deleted_at is None and role.type == Role.USER:
        channels.append(channel(role))
        for group in role.roles:
            channels.append(channel(group))
    cache.set_list(key, channels)
    return channels
示例#28
0
def get_role_channels(role):
    """Generate the set of notification channels that the current
    user should listen to."""
    key = cache.object_key(Role, role.id, 'channels')
    channels = cache.get_list(key)
    if len(channels):
        return channels
    channels = [Notification.GLOBAL]
    if role.deleted_at is None and role.type == Role.USER:
        channels.append(channel(role))
        for group in role.roles:
            channels.append(channel(group))
    cache.set_list(key, channels)
    return channels
示例#29
0
文件: roles.py 项目: jbaehne/aleph
def get_role(role_id):
    key = cache.object_key(Role, role_id)
    data = cache.get_complex(key)
    if data is None:
        role = Role.by_id(role_id)
        if role is None:
            return
        data = {
            'id': role.id,
            'name': role.name,
            'label': role.label,
            'type': role.type
        }
        cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#30
0
def get_role_channels(role):
    """Generate the set of notification channels that the current
    user should listen to."""
    key = cache.object_key(Role, role.id, 'channels')
    channels = cache.get_list(key)
    if len(channels):
        return channels
    channels = [Notification.GLOBAL]
    if role.deleted_at is None and role.type == Role.USER:
        authz = Authz.from_role(role)
        for role_id in authz.roles:
            channels.append(channel(role_id, Role))
        for coll_id in authz.collections(authz.READ):
            channels.append(channel(coll_id, Collection))
    cache.set_list(key, channels, expire=cache.EXPIRE)
    return channels
示例#31
0
文件: alerts.py 项目: jbaehne/aleph
def get_alert(alert_id):
    key = cache.object_key(Alert, alert_id)
    data = cache.get_complex(key)
    if data is None:
        alert = Alert.by_id(alert_id)
        if alert is None:
            return
        data = {
            'id': alert.id,
            'query': alert.query,
            'role_id': alert.role_id,
            'notified_at': alert.notified_at,
            'created_at': alert.created_at,
            'updated_at': alert.updated_at
        }
        cache.set_complex(key, data, expire=cache.EXPIRE)
    return data
示例#32
0
def get_collection(collection_id):
    """Fetch a collection from the index."""
    if collection_id is None:
        return
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    collection = Collection.by_id(collection_id)
    if collection is None:
        return

    data = collection.to_dict()
    things = get_collection_things(collection.id)
    data['count'] = sum(things.values())
    cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#33
0
def get_collection(collection_id):
    """Fetch a collection from the index."""
    if collection_id is None:
        return
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    collection = Collection.by_id(collection_id)
    if collection is None:
        return

    data = collection.to_dict()
    schemata = get_facet_values(collection.id, 'schema')
    schemata = schemata.get('values', {})
    data['count'] = sum(schemata.values())
    data['schemata'] = schemata
    cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#34
0
def get_collection(collection_id):
    """Fetch a collection from the index."""
    if collection_id is None:
        return
    key = cache.object_key(Collection, collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    collection = Collection.by_id(collection_id)
    if collection is None:
        return

    data = collection.to_dict()

    index = entities_read_index(schema=Entity.THING)
    query = {"term": {"collection_id": collection_id}}
    result = es.count(index=index, body={"query": query})
    data["count"] = result.get("count", 0)
    cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
示例#35
0
文件: entities.py 项目: pudo/aleph
def entities_by_ids(ids, schemata=None, cached=False,
                    includes=None, excludes=None):
    """Iterate over unpacked entities based on a search for the given
    entity IDs."""
    ids = ensure_list(ids)
    if not len(ids):
        return
    index = entities_read_index(schema=schemata)
    query = {'ids': {'values': ids}}
    # query = {'bool': {'filter': query}}
    query = {
        'query': query,
        '_source': _source_spec(includes, excludes),
        'size': MAX_PAGE
    }
    result = es.search(index=index, body=query)
    for doc in result.get('hits', {}).get('hits', []):
        entity = unpack_result(doc)
        if entity is not None:
            # Cache entities only briefly to avoid filling up the cache:
            if cached:
                key = cache.object_key(Entity, entity.get('id'))
                cache.set_complex(key, entity, expire=60 * 60)
            yield entity
示例#36
0
文件: roles.py 项目: pudo/aleph
def refresh_role(role, sync=False):
    cache.kv.delete(cache.object_key(Role, role.id),
                    cache.object_key(Role, role.id, 'channels'),
                    cache.key(Authz.PREFIX, Authz.READ, role.id),
                    cache.key(Authz.PREFIX, Authz.WRITE, role.id))
示例#37
0
文件: collections.py 项目: pudo/aleph
def refresh_collection(collection_id, sync=False):
    """Operations to execute after updating a collection-related
    domain object. This will refresh stats and re-index."""
    cache.kv.delete(cache.object_key(Collection, collection_id))
示例#38
0
文件: roles.py 项目: jbaehne/aleph
def refresh_role(role, sync=False):
    cache.kv.delete(cache.key(Authz.PREFIX, Authz.READ, role.id),
                    cache.key(Authz.PREFIX, Authz.WRITE, role.id),
                    cache.object_key(Role, role.id))
示例#39
0
def refresh_entity(collection, entity_id):
    cache.kv.delete(cache.object_key(Entity, entity_id))
    refresh_collection(collection.id)