Пример #1
0
 def handle(self, task):
     manager = Manager(task.stage, task.context)
     entity = model.get_proxy(task.payload)
     log.debug("Ingest: %r", entity)
     manager.ingest_entity(entity)
     manager.close()
     self.dispatch_next(task, manager.emitted)
Пример #2
0
 def _ingest(self, dataset, task):
     manager = Manager(dataset, task.stage, task.context)
     entity = model.get_proxy(task.payload)
     log.debug("Ingest: %r", entity)
     try:
         manager.ingest_entity(entity)
     finally:
         manager.close()
     return manager.emitted
Пример #3
0
def _ingest_path(db, conn, dataset, path, languages=[]):
    context = {'languages': languages}
    job = Job.create(conn, dataset)
    stage = job.get_stage(OP_INGEST)
    manager = Manager(db, stage, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity('Document')
            checksum = manager.store(path)
            entity.set('contentHash', checksum)
            entity.make_id(checksum)
            entity.set('fileName', path.name)
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()
Пример #4
0
 def handle_task(cls, queue, payload, context):
     queue.task_done()
     try:
         manager = Manager(queue, context)
         entity = model.get_proxy(payload)
         log.debug("Ingest: %r", entity)
         manager.ingest_entity(entity)
         manager.close()
         cls.handle_done(queue)
     except (KeyboardInterrupt, SystemExit, RuntimeError):
         cls.handle_retry(queue, payload, context)
         cls.handle_done(queue)
         raise
     except Exception:
         cls.handle_retry(queue, payload, context)
         cls.handle_done(queue)
         log.exception("Processing failed.")
Пример #5
0
def ingest(path, dataset, languages=None):
    """Queue a set of files for ingest."""
    context = {'languages': languages}
    conn = get_redis()
    queue = ServiceQueue(conn, ServiceQueue.OP_INGEST, dataset)
    manager = Manager(queue, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity('Document')
            checksum = manager.store(path)
            entity.set('contentHash', checksum)
            entity.make_id(checksum)
            entity.set('fileName', path.name)
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()
Пример #6
0
def _ingest_path(db, conn, dataset, path, languages=[]):
    context = {"languages": languages}
    job = Job.create(conn, dataset)
    stage = job.get_stage(OP_INGEST)
    manager = Manager(db, stage, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity("Document")
            checksum = manager.store(path)
            entity.set("contentHash", checksum)
            entity.make_id(checksum)
            entity.set("fileName", path.name)
            log.info("Queue: %r", entity.to_dict())
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()