def retry(): """Retry importing documents which were not successfully parsed.""" q = Document.all_ids() q = q.filter(Document.status != Document.STATUS_SUCCESS) log.info("Retry: %s documents", q.count()) for idx, (doc_id, ) in enumerate(q.all(), 1): ingest.apply_async([doc_id], priority=1) if idx % 1000 == 0: log.info("Process: %s documents...", idx)
def retry(foreign_id=None): """Retry importing documents which were not successfully parsed.""" q = Document.all_ids() q = q.filter(Document.status != Document.STATUS_SUCCESS) if foreign_id is not None: collection = Collection.by_foreign_id(foreign_id) q = q.filter(Document.collection_id == collection.id) log.info("Retry: %s documents", q.count()) for idx, (doc_id, ) in enumerate(q.all(), 1): ingest.apply_async([doc_id], priority=1) if idx % 1000 == 0: log.info("Process: %s documents...", idx)
def process_collection(collection_id): """Re-analyze the elements of this collection, documents and entities.""" q = db.session.query(Collection).filter(Collection.id == collection_id) collection = q.first() if collection is None: log.error("No collection with ID: %r", collection_id) # re-process the documents q = db.session.query(Document) q = q.filter(Document.collection_id == collection_id) q = q.filter(Document.parent_id == None) # noqa for document in q: ingest.apply_async([document.id], priority=1) # re-process entities q = db.session.query(Entity) q = q.filter(Entity.collection_id == collection.id) for entity in q: update_entity_full(entity.id) update_collection(collection)