Exemplo n.º 1
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    try:
        aggregator.drop()
    finally:
        aggregator.close()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering linkages metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Exemplo n.º 2
0
def prune_entity(collection, entity_id=None, job_id=None):
    """Prune handles the full deletion of an entity outside of the HTTP request
    cycle. This involves cleaning up adjacent entities like xref results, notifications
    and so on."""
    # This is recursive and will also delete any entities which
    # reference the given entity. Usually this is going to be child
    # documents, or directoships referencing a person. It's a pretty
    # dangerous operation, though.
    log.info("[%s] Prune entity: %s", collection, entity_id)
    for adjacent in index.iter_adjacent(collection.id, entity_id):
        log.warning("Recursive delete: %s", adjacent.get("id"))
        delete_entity(collection, adjacent, job_id=job_id)
    flush_notifications(entity_id, clazz=Entity)
    obj = Entity.by_id(entity_id, collection=collection)
    if obj is not None:
        obj.delete()
    doc = Document.by_id(entity_id, collection=collection)
    if doc is not None:
        doc.delete()
    EntitySetItem.delete_by_entity(entity_id)
    Mapping.delete_by_table(entity_id)
    xref_index.delete_xref(collection, entity_id=entity_id)
    aggregator = get_aggregator(collection)
    aggregator.delete(entity_id=entity_id)
    refresh_entity(collection, entity_id)
    collection.touch()
    db.session.commit()
Exemplo n.º 3
0
def cleanup_deleted():
    from aleph.model import Alert, Entity, Collection
    from aleph.model import Permission, Role, Document
    from aleph.model import Diagram, Mapping
    Mapping.cleanup_deleted()
    Diagram.cleanup_deleted()
    Document.cleanup_deleted()
    Alert.cleanup_deleted()
    Permission.cleanup_deleted()
    Entity.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Exemplo n.º 4
0
def delete_collection(collection, keep_metadata=False, sync=False):
    reset_collection(collection, sync=False)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Exemplo n.º 5
0
def flush(collection_id, mapping_id):
    """Flush all entities loaded by mapping with id `mapping_id`.
    ---
    post:
      summary: Flush entities loaded by a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    queue_task(collection,
               OP_FLUSH_MAPPING,
               job_id=get_session_id(),
               payload={'mapping_id': mapping.id})
    return ('', 202)
Exemplo n.º 6
0
def delete(collection_id, mapping_id):
    """Delete a mapping.
    ---
    delete:
      summary: Delete a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '204':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.delete()
    db.session.commit()
    return ('', 204)
Exemplo n.º 7
0
def view(collection_id, mapping_id):
    """Return the mapping with id `mapping_id`.
    ---
    get:
      summary: Fetch a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
      - Collection
    """
    get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    return MappingSerializer.jsonify(mapping)
Exemplo n.º 8
0
def load_mapping(collection, mapping_id, sync=False):
    """Flush and reload all entities generated by a mapping."""
    mapping = Mapping.by_id(mapping_id)
    if mapping is None:
        return log.error("Could not find mapping: %s", mapping_id)
    origin = mapping_origin(mapping.id)
    aggregator = get_aggregator(collection)
    aggregator.delete(origin=origin)
    delete_entities(collection.id, origin=origin, sync=True)
    if mapping.disabled:
        return log.info("Mapping is disabled: %s", mapping_id)
    publish(
        Events.LOAD_MAPPING,
        params={
            "collection": collection,
            "table": mapping.table_id
        },
        channels=[collection, mapping.role],
        actor_id=mapping.role_id,
    )
    try:
        map_to_aggregator(collection, mapping, aggregator)
        aggregate_model(collection, aggregator)
        index_aggregator(collection, aggregator, sync=sync)
        mapping.set_status(status=Status.SUCCESS)
        db.session.commit()
    except Exception as exc:
        mapping.set_status(status=Status.FAILED, error=str(exc))
        db.session.commit()
        aggregator.delete(origin=origin)
    finally:
        aggregator.close()
Exemplo n.º 9
0
def get_deep_collection(collection):
    mappings = Mapping.by_collection(collection.id).count()
    entitysets = EntitySet.type_counts(collection_id=collection.id)
    return {
        "statistics": index.get_collection_stats(collection.id),
        "counts": {"mappings": mappings, "entitysets": entitysets},
        "status": get_status(collection),
        "shallow": False,
    }
Exemplo n.º 10
0
def delete_collection(collection, keep_metadata=False,
                      sync=False, reset_sync=False):
    reset_collection(collection, sync=reset_sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering this metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)

        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Exemplo n.º 11
0
def load_mapping(stage, collection, mapping_id):
    """Flush and reload all entities generated by a mapping."""
    mapping = Mapping.by_id(mapping_id)
    if mapping is None:
        return log.error("Could not find mapping: %s", mapping_id)
    flush_mapping(stage, collection, mapping_id)
    publish(Events.LOAD_MAPPING,
            params={'collection': collection, 'table': mapping.table_id},
            channels=[collection, mapping.role],
            actor_id=mapping.role_id)
    mapper = make_mapper(collection, mapping)
    aggregator = get_aggregator(collection)
    try:
        writer = aggregator.bulk()
        entities_count = 0
        entity_ids = set()
        for idx, record in enumerate(mapper.source.records, 1):
            for entity in mapper.map(record).values():
                if entity.schema.is_a('Thing'):
                    entity.add('proof', mapping.table_id)
                entity = collection.ns.apply(entity)
                entity_ids.add(entity.id)
                entities_count += 1
                fragment = '%s-%s' % (mapping.id, idx)
                writer.put(entity, fragment=fragment)

            if idx > 0 and idx % 500 == 0:
                payload = {
                    'entity_ids': entity_ids,
                    'mapping_id': mapping.id
                }
                queue_task(collection, OP_INDEX,
                           job_id=stage.job.id,
                           payload=payload)
                entity_ids = set()
                stage.report_finished(500)
                log.info("[%s] Loaded %s records, %s entities...",
                         collection.foreign_id,
                         idx, entities_count)

        writer.flush()
        payload = {
            'entity_ids': entity_ids,
            'mapping_id': mapping.id
        }
        queue_task(collection, OP_INDEX,
                   job_id=stage.job.id,
                   payload=payload)
        mapping.set_status(status=Mapping.SUCCESS)
        log.info("[%s] Mapping done (%s entities)",
                 mapping.id, entities_count)
    except Exception as exc:
        mapping.set_status(status=Mapping.FAILED, error=str(exc))
    finally:
        aggregator.close()
Exemplo n.º 12
0
def trigger(collection_id, mapping_id):
    """Load entities by running the mapping with id `mapping_id`. Flushes
    previously loaded entities before loading new entities.
    ---
    post:
      summary: Load entities from a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.disabled = False
    mapping.set_status(Status.PENDING)
    db.session.commit()
    job_id = get_session_id()
    queue_task(collection,
               OP_LOAD_MAPPING,
               job_id=job_id,
               mapping_id=mapping.id)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    return MappingSerializer.jsonify(mapping, status=202)
Exemplo n.º 13
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    aggregator.drop()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Mapping.delete_by_collection(collection.id)
    EntitySet.delete_by_collection(collection.id, deleted_at)
    Entity.delete_by_collection(collection.id)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id)
Exemplo n.º 14
0
def delete_entity(collection, entity, deleted_at=None, sync=False):
    # This is recursive and will also delete any entities which
    # reference the given entity. Usually this is going to be child
    # documents, or directoships referencing a person. It's a pretty
    # dangerous operation, though.
    entity_id = collection.ns.sign(entity.get("id"))
    for adjacent in index.iter_adjacent(entity):
        log.warning("Recursive delete: %r", adjacent)
        delete_entity(collection, adjacent, deleted_at=deleted_at, sync=sync)
    flush_notifications(entity_id, clazz=Entity)
    obj = Entity.by_id(entity_id, collection=collection)
    if obj is not None:
        obj.delete()
    doc = Document.by_id(entity_id, collection=collection)
    if doc is not None:
        doc.delete()
    index.delete_entity(entity_id, sync=sync)
    EntitySetItem.delete_by_entity(entity_id)
    Mapping.delete_by_table(entity_id)
    xref_index.delete_xref(collection, entity_id=entity_id, sync=sync)
    delete_aggregator_entity(collection, entity_id)
    refresh_entity(collection, entity_id)
Exemplo n.º 15
0
def update(collection_id, mapping_id):
    """Update the mapping with id `mapping_id`.
    ---
    post:
      summary: Update a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MappingCreate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
      - Collection
      - Mapping
    """
    get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    data = parse_request("MappingCreate")
    mapping.update(
        query=load_query(),
        table_id=get_table_id(data),
        entityset_id=get_entityset_id(data),
    )
    db.session.commit()
    return MappingSerializer.jsonify(mapping)
Exemplo n.º 16
0
def update(collection_id, mapping_id):
    """Update the mapping with id `mapping_id`.
    ---
    post:
      summary: Update a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MappingCreate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
      - Collection
    """
    get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    data = parse_request('MappingCreate')
    entity_id = data.get('table_id')
    query = load_query()
    entity = get_index_entity(entity_id, request.authz.READ)
    mapping.update(query=query, table_id=entity.get('id'))
    return MappingSerializer.jsonify(mapping)
Exemplo n.º 17
0
def index(collection_id):
    """Returns a list of mappings for the collection and table.
    ---
    get:
      summary: List mappings
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - description: The table id.
        in: query
        name: table
        schema:
          type: string
      requestBody:
      responses:
        '200':
          content:
            application/json:
              schema:
                type: object
                allOf:
                - $ref: '#/components/schemas/QueryResponse'
                properties:
                  results:
                    type: array
                    items:
                      $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
        - Collection
        - Mapping
    """
    collection = get_db_collection(collection_id)
    parser = QueryParser(request.args, request.authz)
    table_id = first(parser.filters.get("table"))
    q = Mapping.by_collection(collection.id, table_id=table_id)
    result = DatabaseQueryResult(request, q, parser=parser)
    return MappingSerializer.jsonify_result(result)
Exemplo n.º 18
0
def flush(collection_id, mapping_id):
    """Flush all entities loaded by mapping with id `mapping_id`.
    ---
    post:
      summary: Flush entities loaded by a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.disabled = True
    mapping.last_run_status = None
    mapping.last_run_err_msg = None
    db.session.add(mapping)
    db.session.commit()
    queue_task(
        collection,
        OP_FLUSH_MAPPING,
        job_id=get_session_id(),
        mapping_id=mapping_id,
    )
    return ("", 202)
Exemplo n.º 19
0
def create(collection_id):
    """Create a mapping.
    ---
    post:
      summary: Create a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MappingCreate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    data = parse_request("MappingCreate")
    mapping = Mapping.create(
        load_query(),
        get_table_id(data),
        collection,
        request.authz.id,
        entityset_id=get_entityset_id(data),
    )
    db.session.commit()
    return MappingSerializer.jsonify(mapping)
Exemplo n.º 20
0
def delete(collection_id, mapping_id):
    """Delete a mapping.
    ---
    delete:
      summary: Delete a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '204':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.delete()
    db.session.commit()
    queue_task(
        collection,
        OP_FLUSH_MAPPING,
        job_id=get_session_id(),
        payload={"mapping_id": mapping_id},
    )
    return ("", 204)
Exemplo n.º 21
0
def create(collection_id):
    """Create a mapping.
    ---
    post:
      summary: Create a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MappingCreate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    data = parse_request('MappingCreate')
    entity_id = data.get('table_id')
    query = load_query()
    entity = get_index_entity(entity_id, request.authz.READ)
    mapping = Mapping.create(query, entity.get('id'), collection,
                             request.authz.id)  # noqa
    return MappingSerializer.jsonify(mapping)
Exemplo n.º 22
0
def trigger(collection_id, mapping_id):
    """Load entities by running the mapping with id `mapping_id`. Flushes
    previously loaded entities before loading new entities.
    ---
    post:
      summary: Load entities from a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    job_id = get_session_id()
    payload = {'mapping_id': mapping.id}
    queue_task(collection, OP_LOAD_MAPPING, job_id=job_id, payload=payload)
    collection.touch()
    db.session.commit()
    return ('', 202)