Пример #1
0
def load_mapping(collection, mapping_id, sync=False):
    """Flush and reload all entities generated by a mapping."""
    mapping = Mapping.by_id(mapping_id)
    if mapping is None:
        return log.error("Could not find mapping: %s", mapping_id)
    origin = mapping_origin(mapping.id)
    aggregator = get_aggregator(collection)
    aggregator.delete(origin=origin)
    delete_entities(collection.id, origin=origin, sync=True)
    if mapping.disabled:
        return log.info("Mapping is disabled: %s", mapping_id)
    publish(
        Events.LOAD_MAPPING,
        params={
            "collection": collection,
            "table": mapping.table_id
        },
        channels=[collection, mapping.role],
        actor_id=mapping.role_id,
    )
    try:
        map_to_aggregator(collection, mapping, aggregator)
        aggregate_model(collection, aggregator)
        index_aggregator(collection, aggregator, sync=sync)
        mapping.set_status(status=Status.SUCCESS)
        db.session.commit()
    except Exception as exc:
        mapping.set_status(status=Status.FAILED, error=str(exc))
        db.session.commit()
        aggregator.delete(origin=origin)
    finally:
        aggregator.close()
Пример #2
0
def bulk_write(collection, entities, unsafe=False, role_id=None, index=True):
    """Write a set of entities - given as dicts - to the index."""
    # This is called mainly by the /api/2/collections/X/_bulk API.
    now = datetime.utcnow().isoformat()
    aggregator = get_aggregator(collection)
    writer = aggregator.bulk()
    entity_ids = set()
    for data in entities:
        if not is_mapping(data):
            raise InvalidData("Failed to read input data", errors=data)
        entity = model.get_proxy(data)
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=entity.to_dict())
        entity = collection.ns.apply(entity)
        if not unsafe:
            entity = remove_checksums(entity)
        entity.context = {
            'role_id': role_id,
            'created_at': now,
            'updated_at': now,
        }
        writer.put(entity, origin='bulk')
        if index and len(entity_ids) < MAX_PAGE:
            entity_ids.add(entity.id)
    writer.flush()
    if index:
        if len(entity_ids) >= MAX_PAGE:
            entity_ids = None
        index_aggregator(collection, aggregator, entity_ids=entity_ids)
        refresh_collection(collection.id)
Пример #3
0
def update_entity(collection, entity_id=None):
    """Update xref and aggregator after an entity has been edited."""
    from aleph.logic.xref import xref_entity
    from aleph.logic.profiles import profile_fragments

    log.info("[%s] Update entity: %s", collection, entity_id)
    entity = index.get_entity(entity_id)
    proxy = model.get_proxy(entity)
    if collection.casefile:
        xref_entity(collection, proxy)

    aggregator = get_aggregator(collection, origin=MODEL_ORIGIN)
    profile_fragments(collection, aggregator, entity_id=entity_id)

    # Inline name properties from adjacent entities. See the
    # docstring on `inline_names` for a more detailed discussion.
    prop = proxy.schema.get("namesMentioned")
    if prop is not None:
        entity_ids = proxy.get_type_values(registry.entity)
        names = set()
        for related in index.entities_by_ids(entity_ids):
            related = model.get_proxy(related)
            names.update(related.get_type_values(registry.name))

        if len(names) > 0:
            name_proxy = model.make_entity(proxy.schema)
            name_proxy.id = proxy.id
            name_proxy.add(prop, names)
            aggregator.put(name_proxy, fragment="names")

    index_aggregator(collection, aggregator, entity_ids=[entity_id])
    refresh_entity(collection, proxy.id)
Пример #4
0
def bulk_write(collection,
               entities,
               safe=False,
               role_id=None,
               mutable=True,
               index=True):
    """Write a set of entities - given as dicts - to the index."""
    # This is called mainly by the /api/2/collections/X/_bulk API.
    aggregator = get_aggregator(collection)
    writer = aggregator.bulk()
    entity_ids = set()
    for data in entities:
        entity = model.get_proxy(data, cleaned=False)
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=entity.to_dict())
        entity = collection.ns.apply(entity)
        if safe:
            entity = remove_checksums(entity)
        entity.context = {"role_id": role_id, "mutable": mutable}
        for field in ("created_at", "updated_at"):
            timestamp = data.get(field)
            if timestamp is not None:
                dt = registry.date.to_datetime(timestamp)
                if dt is not None:
                    entity.context[field] = dt.isoformat()
        writer.put(entity, origin="bulk")
        if index and len(entity_ids) < MAX_PAGE:
            entity_ids.add(entity.id)
    writer.flush()
    if index:
        if len(entity_ids) >= MAX_PAGE:
            entity_ids = None
        index_aggregator(collection, aggregator, entity_ids=entity_ids)
        refresh_collection(collection.id)
Пример #5
0
def index_many(stage, collection, sync=False, entity_ids=None, batch=BATCH_SIZE):
    """Project the contents of the collections aggregator into the index."""
    if entity_ids is not None:
        entity_ids = ensure_list(entity_ids)
        # WEIRD: Instead of indexing a single entity, this will try
        # pull a whole batch of them off the queue and do it at once.
        tasks = stage.get_tasks(limit=max(1, batch - len(entity_ids)))
        for task in tasks:
            entity_ids.extend(ensure_list(task.payload.get("entity_ids")))
        stage.mark_done(len(tasks))
    aggregator = get_aggregator(collection)
    index_aggregator(collection, aggregator, entity_ids=entity_ids, sync=sync)
    refresh_collection(collection.id)
Пример #6
0
def save_entityset_item(entityset, collection, entity_id, **data):
    """Change the association between an entity and an entityset. In the case of
    a profile, this may require re-indexing of the entity to update the associated
    profile_id.
    """
    item = EntitySetItem.save(entityset,
                              entity_id,
                              collection_id=collection.id,
                              **data)
    if entityset.type == EntitySet.PROFILE and entityset.collection_id == collection.id:
        from aleph.logic.profiles import profile_fragments

        aggregator = get_aggregator(collection)
        profile_fragments(collection, aggregator, entity_id=entity_id)
        index_aggregator(collection, aggregator, entity_ids=[entity_id])
        refresh_entity(collection, entity_id)
    refresh_entityset(entityset.id)
    return item