def load_mapping(collection, mapping_id, sync=False): """Flush and reload all entities generated by a mapping.""" mapping = Mapping.by_id(mapping_id) if mapping is None: return log.error("Could not find mapping: %s", mapping_id) origin = mapping_origin(mapping.id) aggregator = get_aggregator(collection) aggregator.delete(origin=origin) delete_entities(collection.id, origin=origin, sync=True) if mapping.disabled: return log.info("Mapping is disabled: %s", mapping_id) publish( Events.LOAD_MAPPING, params={ "collection": collection, "table": mapping.table_id }, channels=[collection, mapping.role], actor_id=mapping.role_id, ) try: map_to_aggregator(collection, mapping, aggregator) aggregate_model(collection, aggregator) index_aggregator(collection, aggregator, sync=sync) mapping.set_status(status=Status.SUCCESS) db.session.commit() except Exception as exc: mapping.set_status(status=Status.FAILED, error=str(exc)) db.session.commit() aggregator.delete(origin=origin) finally: aggregator.close()
def test_publish_event(self): role = self.create_user() email = '*****@*****.**' label = 'So public' recipient = self.create_user(foreign_id='rolex', email=email) update_role(recipient) collection = self.create_collection(foreign_id='NoNoNo', label=label) event = Events.PUBLISH_COLLECTION publish(event, role.id, params={'collection': collection}, channels=[Notification.GLOBAL]) db.session.commit() notifications = Notification.all().all() assert 1 == len(notifications), notifications not0 = notifications[0] assert not0._event == event.name, not0._event assert not0.params['collection'] == collection.id, not0.params with mail.record_messages() as outbox: assert len(outbox) == 0, outbox generate_digest() assert len(outbox) == 1, outbox msg = outbox[0] assert email in msg.recipients, msg.recipients assert label in msg.html, msg.html
def test_publish_event(self): role = self.create_user() email = '*****@*****.**' label = 'So public' recipient = self.create_user(foreign_id='rolex', email=email) update_role(recipient) collection = self.create_collection(foreign_id='NoNoNo', label=label) event = Events.PUBLISH_COLLECTION publish(event, role.id, params={'collection': collection}, channels=[GLOBAL]) db.session.commit() result = get_notifications(recipient) notifications = result.get('hits', {}) assert 1 == notifications['total']['value'], notifications not0 = notifications['hits'][0]['_source'] assert not0['event'] == event.name, not0['event'] assert not0['params']['collection'] == str( collection.id), not0['params'] # noqa with mail.record_messages() as outbox: assert len(outbox) == 0, outbox generate_digest() assert len(outbox) == 1, outbox msg = outbox[0] assert email in msg.recipients, msg.recipients assert label in msg.html, msg.html
def create_entityset(collection, data, authz): """Create an entity set. This will create or update any entities that already exist in the entityset and sign their IDs into the collection. """ old_to_new_id_map = {} entity_ids = [] for entity in data.pop("entities", []): old_id = entity.get("id") new_id = upsert_entity(entity, collection, sync=True) old_to_new_id_map[old_id] = new_id entity_ids.append(new_id) layout = data.get("layout", {}) data["layout"] = replace_layout_ids(layout, old_to_new_id_map) entityset = EntitySet.create(data, collection, authz) for entity_id in entity_ids: save_entityset_item(entityset, collection, entity_id) publish( Events.CREATE_ENTITYSET, params={ "collection": collection, "entityset": entityset }, channels=[collection, authz.role], actor_id=authz.id, ) return entityset
def check_alert(alert_id): alert = Alert.by_id(alert_id) if alert is None or alert.role is None: return log.info("Check alert [%s]: %s", alert.id, alert.query) authz = Authz.from_role(alert.role) query = alert_query(alert, authz) index = entities_read_index(schema=Entity.THING) result = es.search(index=index, body=query) for result in result.get('hits').get('hits', []): entity = unpack_result(result) if entity is None: continue log.info('Alert [%s]: %s', alert.query, entity.get('name')) params = { 'alert': alert, 'role': alert.role, 'entity': entity.get('id') } publish(Events.MATCH_ALERT, params=params, channels=[alert.role]) db.session.flush() alert.update() db.session.commit() db.session.close()
def check_alert(alert_id): alert = Alert.by_id(alert_id) if alert is None or alert.role is None: return log.info("Check alert [%s]: %s", alert.id, alert.query) authz = Authz.from_role(alert.role) try: query = alert_query(alert, authz) index = entities_read_index(schema=Entity.THING) result = es.search(index=index, body=query) except RequestError as re: log.error("Invalid query [%s]: %r", alert.query, re.error) alert.delete() db.session.commit() return for result in result.get("hits").get("hits", []): entity = unpack_result(result) if entity is None: continue log.info("Alert [%s]: %s", alert.query, entity.get("id")) params = { "alert": alert, "role": alert.role, "entity": entity.get("id"), "collection": entity.get("collection_id"), } channels = [alert.role] # channels.append(channel_tag(collection_id, Collection)) publish(Events.MATCH_ALERT, params=params, channels=channels) alert.update() db.session.commit()
def test_publish_event(self): role = self.create_user() email = "*****@*****.**" label = "So public" recipient = self.create_user(foreign_id="rolex", email=email) update_role(recipient) collection = self.create_collection(foreign_id="NoNoNo", label=label) event = Events.PUBLISH_COLLECTION publish(event, role.id, params={"collection": collection}, channels=[GLOBAL]) db.session.commit() result = get_notifications(recipient) notifications = result.get("hits", {}) assert 1 == notifications["total"]["value"], notifications not0 = notifications["hits"][0]["_source"] assert not0["event"] == event.name, not0["event"] params = not0["params"] assert params["collection"] == str(collection.id), params with mail.record_messages() as outbox: assert len(outbox) == 0, outbox generate_digest() assert len(outbox) == 1, outbox msg = outbox[0] assert email in msg.recipients, msg.recipients assert label in msg.html, msg.html
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) db.session.commit() refresh_role(role) if post is None: return params = {"role": role, "collection": collection} if pre is None or not pre.read: if role.foreign_id == Role.SYSTEM_GUEST: publish( Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[GLOBAL], ) else: publish( Events.GRANT_COLLECTION, actor_id=editor_id, params=params, channels=[role], ) return post
def check_alert(alert_id): alert = Alert.by_id(alert_id) if alert is None or alert.role is None: return if not alert.role.is_alertable: return authz = Authz.from_role(alert.role) query = alert_query(alert, authz) index = entities_read_index(schema=Entity.THING) result = es.search(index=index, body=query) for result in result.get('hits').get('hits', []): entity = unpack_result(result) if entity is None: continue log.info('Alert [%s]: %s', alert.query, entity.get('name')) params = { 'alert': alert, 'role': alert.role, 'entity': entity } publish(Events.MATCH_ALERT, actor_id=entity.get('uploader_id'), params=params) alert.update() db.session.commit() db.session.close()
def complete_export(export_id, file_path): export = Export.by_id(export_id) file_path = ensure_path(file_path) export.file_name = safe_filename(file_path) export.file_size = file_path.stat().st_size export.content_hash = checksum(file_path) try: archive.archive_file(file_path, content_hash=export.content_hash, mime_type=export.mime_type) export.set_status(status=Status.SUCCESS) except Exception: log.exception("Failed to upload export: %s", export) export.set_status(status=Status.FAILED) db.session.commit() params = {"export": export} role = Role.by_id(export.creator_id) log.info("Export [%r] complete: %s", export, export.status) publish( Events.COMPLETE_EXPORT, params=params, channels=[role], ) send_export_notification(export)
def test_publish_event(self): role = self.create_user() email = '*****@*****.**' label = 'So public' recipient = self.create_user(foreign_id='rolex', email=email) update_role(recipient) collection = self.create_collection(foreign_id='NoNoNo', label=label) event = Events.PUBLISH_COLLECTION publish(event, role.id, params={'collection': collection}, channels=[Notification.GLOBAL]) db.session.commit() notifications = Notification.all().all() assert 1 == len(notifications), notifications not0 = notifications[0] assert not0._event == event.name, not0._event assert not0.params['collection'] == str(collection.id), not0.params with mail.record_messages() as outbox: assert len(outbox) == 0, outbox generate_digest() assert len(outbox) == 1, outbox msg = outbox[0] assert email in msg.recipients, msg.recipients assert label in msg.html, msg.html
def create_collection(data, authz, sync=False): now = datetime.utcnow() collection = Collection.create(data, authz, created_at=now) if collection.created_at == now: publish(Events.CREATE_COLLECTION, params={'collection': collection}, channels=[collection, authz.role], actor_id=authz.id) db.session.commit() return update_collection(collection, sync=sync)
def load_mapping(stage, collection, mapping_id): """Flush and reload all entities generated by a mapping.""" mapping = Mapping.by_id(mapping_id) if mapping is None: return log.error("Could not find mapping: %s", mapping_id) flush_mapping(stage, collection, mapping_id) publish(Events.LOAD_MAPPING, params={'collection': collection, 'table': mapping.table_id}, channels=[collection, mapping.role], actor_id=mapping.role_id) mapper = make_mapper(collection, mapping) aggregator = get_aggregator(collection) try: writer = aggregator.bulk() entities_count = 0 entity_ids = set() for idx, record in enumerate(mapper.source.records, 1): for entity in mapper.map(record).values(): if entity.schema.is_a('Thing'): entity.add('proof', mapping.table_id) entity = collection.ns.apply(entity) entity_ids.add(entity.id) entities_count += 1 fragment = '%s-%s' % (mapping.id, idx) writer.put(entity, fragment=fragment) if idx > 0 and idx % 500 == 0: payload = { 'entity_ids': entity_ids, 'mapping_id': mapping.id } queue_task(collection, OP_INDEX, job_id=stage.job.id, payload=payload) entity_ids = set() stage.report_finished(500) log.info("[%s] Loaded %s records, %s entities...", collection.foreign_id, idx, entities_count) writer.flush() payload = { 'entity_ids': entity_ids, 'mapping_id': mapping.id } queue_task(collection, OP_INDEX, job_id=stage.job.id, payload=payload) mapping.set_status(status=Mapping.SUCCESS) log.info("[%s] Mapping done (%s entities)", mapping.id, entities_count) except Exception as exc: mapping.set_status(status=Mapping.FAILED, error=str(exc)) finally: aggregator.close()
def create_collection(data, role=None): role = role or Role.load_cli_user() created_at = datetime.utcnow() collection = Collection.create(data, role=role, created_at=created_at) if collection.created_at == created_at: publish(Events.CREATE_COLLECTION, actor_id=role.id, params={'collection': collection}) db.session.commit() index.index_collection(collection) return collection
def ingest_upload(id): collection = get_db_collection(id, request.authz.WRITE) meta, foreign_id = _load_metadata(collection) parent_id = _load_parent(collection, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: documents = [] for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.uploader_id = request.authz.id ingest_document(document, path) documents.append(document) if not len(request.files): # If there is no files uploaded, try to create an empty # directory instead. Maybe this should be more explicit, # but it seemed like the most simple way of fitting it # into the API. document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id) document.schema = Document.SCHEMA_FOLDER document.update(meta) document.uploader_id = request.authz.id ingest_document(document, None) documents.append(document) finally: shutil.rmtree(upload_dir) if collection.casefile: for document in documents: params = {'document': document, 'collection': collection} publish(Events.INGEST_DOCUMENT, actor_id=document.uploader_id, params=params) # Update child counts in index. if parent_id is not None: index_document_id.apply_async([parent_id], priority=1) refresh_index(index=entities_index()) return jsonify({ 'status': 'ok', 'documents': [CombinedSchema().dump(d).data for d in documents] })
def create_collection(data, role=None, sync=False): role = role or Role.load_cli_user() created_at = datetime.utcnow() collection = Collection.create(data, creator=role, created_at=created_at) publish(Events.CREATE_COLLECTION, params={'collection': collection}, actor_id=role.id) db.session.commit() Authz.flush() refresh_collection(collection.id) return index.index_collection(collection, sync=sync)
def create_collection(data, role=None, sync=False): role = role or Role.load_cli_user() created_at = datetime.utcnow() collection = Collection.create(data, role=role, created_at=created_at) if collection.created_at == created_at: publish(Events.CREATE_COLLECTION, actor_id=role.id, params={'collection': collection}) db.session.commit() Authz.flush() refresh_collection(collection.id) return index.index_collection(collection, sync=sync)
def _notify(collection, document_id): if not collection.casefile: return channels = [ channel_tag(document_id, Entity), channel_tag(collection), ] params = {'collection': collection, 'document': document_id} publish(Events.INGEST_DOCUMENT, params=params, channels=channels, actor_id=request.authz.id)
def test_publish_event(self): event = Events.PUBLISH_COLLECTION role = self.create_user() collection = self.create_collection(foreign_id='NoNoNo') publish(event, role.id, params={'collection': collection}) db.session.commit() notifications = Notification.all().all() assert 1 == len(notifications), notifications not0 = notifications[0] assert not0._event == event.name, not0._event assert not0.params['collection'] == collection.id, not0.params
def ingest(document_id, file_path=None, refresh=False): """Process a given document by extracting its contents. This may include creating or updating child documents.""" document = Document.by_id(document_id) if document is None: log.error("Could not find document: %s", document_id) return # Work path will be used by storagelayer to cache a local # copy of data from an S3-based archive, and by ingestors # to perform processing and generate intermediary files. work_path = mkdtemp(prefix="aleph.ingest.") if file_path is None: file_path = archive.load_file(document.content_hash, file_name=document.safe_file_name, temp_path=work_path) try: manager = get_manager() result = DocumentResult(manager, document, file_path=file_path) get_manager().ingest(file_path, result=result, work_path=work_path) document.status = Document.STATUS_SUCCESS log.debug('Ingested [%s:%s]: %s', document.id, document.schema, document.name) if document.collection.casefile and not refresh: params = { 'collection': document.collection, 'document': document } publish(Events.INGEST_DOCUMENT, actor_id=document.uploader_id, params=params) db.session.commit() process_document(document) except Exception: db.session.rollback() document = Document.by_id(document_id) log.exception("Ingest failed [%s]: %s", document.id, document.name) document.status = Document.STATUS_FAIL db.session.commit() process_document(document) finally: # Removing the temp_path given to storagelayer makes it redundant # to also call cleanup on the archive. remove_directory(work_path)
def complete_export(export_id, file_path=None): export = Export.by_id(export_id) if file_path: export.set_filepath(file_path) export.publish() db.session.commit() params = {"export": export} role = Role.by_id(export.creator_id) publish( Events.COMPLETE_EXPORT, params=params, channels=[role], ) send_export_notification(export)
def check_alert(alert): authz = Authz(role=alert.role) query = alert_query(alert, authz) found = 0 for result in scan(es, query=query, index=entities_index()): entity = unpack_result(result) found += 1 params = {'alert': alert, 'role': authz.role, 'entity': entity} publish(Events.MATCH_ALERT, actor_id=entity.get('uploader_id'), params=params) alert.update() log.info('Found %d new results for: %s', found, alert.label) db.session.commit()
def check_alert(authz, alert): entity = get_entity(alert.entity_id) if alert.entity_id else None query = {'q': alert.query_text} state = SearchQueryParser(query, authz) query = AlertDocumentsQuery(state, entity=entity, since=alert.notified_at) results = query.search().get('hits') for result in results.get('hits', []): document = unpack_result(result) params = {'alert': alert, 'role': authz.role, 'entity': document} publish(Events.MATCH_ALERT, actor_id=document.get('uploader_id'), params=params) alert.update() log.info('Found %d new results for: %s', results['total'], alert.label) db.session.commit()
def ingest(document_id, role_id=None): """Process a given document by extracting its contents. This may include creating or updating child documents.""" document = Document.by_id(document_id) if document is None: log.error("Could not find document: %s", document_id) return get_manager().ingest_document(document, role_id=role_id) # is this too often? from aleph.logic.collections import update_collection update_collection(document.collection) from aleph.logic.notifications import publish params = {'document': document, 'collection': document.collection} publish(Events.INGEST_DOCUMENT, actor_id=role_id, params=params)
def setUp(self): super(NotificationsApiTestCase, self).setUp() self.rolex = self.create_user(foreign_id='rolex') self.admin = self.create_user(foreign_id='admin') self.col = self.create_collection(creator=self.admin) update_role(self.rolex) update_role(self.admin) event = Events.PUBLISH_COLLECTION publish(event, self.admin.id, params={ 'collection': self.col }, channels=[Notification.GLOBAL]) event = Events.GRANT_COLLECTION publish(event, self.admin.id, params={ 'collection': self.col, 'role': self.rolex }, channels=[self.col, self.rolex]) db.session.commit()
def setUp(self): super(NotificationsApiTestCase, self).setUp() self.rolex = self.create_user(foreign_id='rolex') self.admin = self.create_user(foreign_id='admin') self.col = self.create_collection(creator=self.admin) update_role(self.rolex) update_role(self.admin) event = Events.PUBLISH_COLLECTION publish(event, self.admin.id, params={ 'collection': self.col }, channels=[Notification.GLOBAL]) event = Events.GRANT_COLLECTION publish(event, self.admin.id, params={ 'collection': self.col, 'role': self.rolex }) db.session.commit()
def ingest(document_id, role_id=None): """Process a given document by extracting its contents. This may include creating or updating child documents.""" document = Document.by_id(document_id) if document is None: log.error("Could not find document: %s", document_id) return get_manager().ingest_document(document, role_id=role_id) if document.collection.casefile: index_collection(document.collection) params = { 'document': document, 'collection': document.collection } publish(Events.INGEST_DOCUMENT, actor_id=role_id, params=params)
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.foreign_id == Role.SYSTEM_GUEST: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params, channels=[role]) db.session.commit() Authz.flush() refresh_role(role) return post
def ingest_upload(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() sync = get_flag('sync', default=False) meta, foreign_id = _load_metadata() parent = _load_parent(collection, meta) upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.')) try: content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = upload_dir.joinpath(path) storage.save(str(path)) content_hash = archive.archive_file(path) document = Document.save(collection=collection, parent=parent, foreign_id=foreign_id, content_hash=content_hash, meta=meta, uploader_id=request.authz.id) collection.touch() db.session.commit() proxy = document.to_proxy() if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync: index_proxy(collection, proxy, sync=sync) ingest_entity(collection, proxy, job_id=job_id, sync=sync) document_id = collection.ns.sign(document.id) if collection.casefile: publish(Events.INGEST_DOCUMENT, params={ 'collection': collection, 'document': document_id }, channels=[collection], actor_id=request.authz.id) db.session.commit() finally: shutil.rmtree(upload_dir) return jsonify({'status': 'ok', 'id': document_id}, status=201)
def create_diagram(collection, data, authz): """Create a network diagram. This will create or update any entities that already exist in the diagram and sign their IDs into the collection. """ old_to_new_id_map = {} entity_ids = [] for entity in data.pop('entities', []): old_id = entity.get('id') new_id = upsert_entity(entity, collection, validate=False, sync=True) old_to_new_id_map[old_id] = new_id entity_ids.append(new_id) data['entities'] = entity_ids layout = data.get('layout', {}) data['layout'] = replace_layout_ids(layout, old_to_new_id_map) diagram = Diagram.create(data, collection, authz.id) db.session.commit() publish(Events.CREATE_DIAGRAM, params={ 'collection': collection, 'diagram': diagram }, channels=[collection, authz.role], actor_id=authz.id) return diagram
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.is_public: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params) elif pre is not None and pre.read and not post.read: publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params) db.session.commit() Authz.flush() return post
def update_permission(role, collection, read, write, editor=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} granted = pre is None or (pre.read != post.read and post.read) revoked = pre is not None and (pre.read != post.read and pre.read) if granted: if role.is_public: publish(Events.PUBLISH_COLLECTION, actor_id=editor.id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor.id, params=params) elif revoked: publish(Events.REVOKE_COLLECTION, actor_id=editor.id, params=params) cchannel = channel(collection) Subscription.unsubscribe(role=role, channel=cchannel) db.session.commit() return post
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.is_public: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params) elif pre is not None and pre.read and not post.read: publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params) db.session.commit() Authz.flush() refresh_role(role) return post