def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) try: aggregator.drop() finally: aggregator.close() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering linkages metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id, sync=True)
def reindex_collection(collection, skip_errors=True, sync=False, flush=False): """Re-index all entities from the model, mappings and aggregator cache.""" from aleph.logic.mapping import map_to_aggregator from aleph.logic.profiles import profile_fragments aggregator = get_aggregator(collection) for mapping in collection.mappings: if mapping.disabled: log.debug("[%s] Skip mapping: %r", collection, mapping) continue try: map_to_aggregator(collection, mapping, aggregator) except Exception: # More or less ignore broken models. log.exception("Failed mapping: %r", mapping) aggregate_model(collection, aggregator) profile_fragments(collection, aggregator) if flush: log.debug("[%s] Flushing...", collection) index.delete_entities(collection.id, sync=True) index_aggregator(collection, aggregator, skip_errors=skip_errors, sync=sync) compute_collection(collection, force=True)
def xref_collection(stage, collection): """Cross-reference all the entities and documents in a collection.""" delete_xref(collection, sync=True) delete_entities(collection.id, origin=ORIGIN, sync=True) index_matches(collection, _query_entities(collection)) index_matches(collection, _query_mentions(collection)) reindex_collection(collection, sync=False)
def load_mapping(collection, mapping_id, sync=False): """Flush and reload all entities generated by a mapping.""" mapping = Mapping.by_id(mapping_id) if mapping is None: return log.error("Could not find mapping: %s", mapping_id) origin = mapping_origin(mapping.id) aggregator = get_aggregator(collection) aggregator.delete(origin=origin) delete_entities(collection.id, origin=origin, sync=True) if mapping.disabled: return log.info("Mapping is disabled: %s", mapping_id) publish( Events.LOAD_MAPPING, params={ "collection": collection, "table": mapping.table_id }, channels=[collection, mapping.role], actor_id=mapping.role_id, ) try: map_to_aggregator(collection, mapping, aggregator) aggregate_model(collection, aggregator) index_aggregator(collection, aggregator, sync=sync) mapping.set_status(status=Status.SUCCESS) db.session.commit() except Exception as exc: mapping.set_status(status=Status.FAILED, error=str(exc)) db.session.commit() aggregator.delete(origin=origin) finally: aggregator.close()
def delete_entities(collection_id, deleted_at=None): deleted_at = deleted_at or datetime.utcnow() log.info("Deleting entities...") Entity.delete_by_collection(collection_id, deleted_at=deleted_at) index.delete_entities(collection_id) log.info("Deleting cross-referencing matches...") Match.delete_by_collection(collection_id, deleted_at=deleted_at)
def reset_collection(collection, sync=False): """Reset the collection by deleting any derived data.""" drop_aggregator(collection) cancel_queue(collection) flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) refresh_collection(collection.id, sync=sync)
def flush_mapping(stage, collection, mapping_id, sync=False): """Delete entities loaded by a mapping""" log.debug("Flushing entities for mapping: %s", mapping_id) delete_entities(collection.id, mapping_id=mapping_id, sync=True) drop_aggregator(collection) collection.touch() db.session.commit() update_collection(collection)
def reset_collection(collection, sync=False): """Reset the collection by deleting any derived data.""" drop_aggregator(collection) Match.delete_by_collection(collection.id) cancel_queue(collection) index.delete_entities(collection.id, sync=sync) refresh_collection(collection.id) db.session.commit()
def flush_mapping(collection, mapping_id, sync=True): """Delete entities loaded by a mapping""" log.debug("Flushing entities for mapping: %s", mapping_id) origin = mapping_origin(mapping_id) aggregator = get_aggregator(collection) aggregator.delete(origin=origin) delete_entities(collection.id, origin=origin, sync=sync) update_collection(collection, sync=sync)
def xref_collection(stage, collection): """Cross-reference all the entities and documents in a collection.""" log.info("[%s] Clearing previous xref state....", collection) delete_xref(collection, sync=True) delete_entities(collection.id, origin=ORIGIN, sync=True) index_matches(collection, _query_entities(collection)) index_matches(collection, _query_mentions(collection)) log.info("[%s] Xref done, re-indexing to reify mentions...", collection) reindex_collection(collection, sync=False)
def flush_mapping(stage, collection, mapping_id, sync=True): """Delete entities loaded by a mapping""" log.debug("Flushing entities for mapping: %s", mapping_id) origin = mapping_origin(mapping_id) aggregator = get_aggregator(collection) aggregator.delete(origin=origin) aggregator.close() delete_entities(collection.id, origin=origin, sync=sync) collection.touch() db.session.commit() update_collection(collection, sync=sync)
def delete_collection(collection, sync=False): flush_notifications(collection) drop_aggregator(collection) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Match.delete_by_collection(collection.id, deleted_at=deleted_at) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() index.delete_collection(collection.id, sync=sync) index.delete_entities(collection.id, sync=False) refresh_collection(collection.id) Authz.flush()
def reindex_collection(collection, sync=False, flush=False): """Re-index all entities from the model, mappings and aggregator cache.""" from aleph.logic.mapping import map_to_aggregator if flush: log.debug("[%s] Flushing...", collection) index.delete_entities(collection.id, sync=True) aggregator = get_aggregator(collection) for mapping in collection.mappings: try: map_to_aggregator(collection, mapping, aggregator) except Exception as ex: # More or less ignore broken models. log.warn("Failed mapping [%s]: %s", mapping.id, ex) aggregate_model(collection, aggregator) index_aggregator(collection, aggregator, sync=sync) compute_collection(collection, sync=True)
def delete_collection_content(collection_id): # Deleting a collection affects many associated objects and requires # checks, so this is done manually and in detail here. q = db.session.query(Collection) q = q.filter(Collection.id == collection_id) collection = q.first() if collection is None: log.error("No collection with ID: %r", collection_id) return log.info("Deleting collection [%r]: %r", collection.id, collection.label) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection_id, deleted_at=deleted_at) Match.delete_by_collection(collection_id, deleted_at=deleted_at) Permission.delete_by_collection(collection_id, deleted_at=deleted_at) index.delete_collection(collection_id) index.delete_entities(collection_id) collection.delete(deleted_at=deleted_at) db.session.commit()
def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) aggregator.drop() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Mapping.delete_by_collection(collection.id) EntitySet.delete_by_collection(collection.id, deleted_at) Entity.delete_by_collection(collection.id) Document.delete_by_collection(collection.id) if not keep_metadata: Permission.delete_by_collection(collection.id) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id)
def delete_bulk_entities(collection_id, deleted_at=None): deleted_at = deleted_at or datetime.utcnow() log.info("Deleting entities...") index.delete_entities(collection_id, bulk_only=True) Match.delete_by_collection(collection_id, deleted_at=deleted_at)