示例#1
0
def test_get_processor(dummy_app):
    """Test register processor."""
    processor = current_processors.get_processor(name=DummyProcessor.id)
    assert isinstance(processor, FilesProcessor)

    with pytest.raises(UnsupportedProcessor):
        processor = current_processors.get_processor(name="invalid")
示例#2
0
def test_process(dummy_app, object_version):
    """Test process."""
    processor = current_processors.get_processor(name=DummyProcessor.id)

    test_cases = [
        dict(name="Invalid File Case",
             obj="file.pdf",
             exception=FileNotFoundError,
             can_process=True),
        dict(name="Invalid Processor Case",
             obj=object_version,
             exception=InvalidProcessor,
             can_process=False),
        dict(name="Valid Processor Case",
             obj=object_version,
             exception=None,
             can_process=True)
    ]

    for case in test_cases:
        if case['exception'] is None:
            processor.process(case['obj'], can_process=case['can_process'])

            continue

        with pytest.raises(case['exception']):
            processor.process(case['obj'], can_process=case['can_process'])
示例#3
0
def test_register_unregister_processor(appctx):
    """Test register and unregister processor flow."""
    current_processors.register_processor(
        DummyProcessor.id,
        DummyProcessor,
    )

    with pytest.raises(DuplicatedProcessor):
        current_processors.register_processor(
            DummyProcessor.id,
            DummyProcessor,
        )

    current_processors.unregister_processor(DummyProcessor.id)

    with pytest.raises(UnsupportedProcessor):
        current_processors.get_processor(DummyProcessor.id)
示例#4
0
def process_file_async(self, bucket_id, key_id):
    """Process file with processor tika."""
    try:
        current_app.logger.debug(f"Processing file {bucket_id}:{key_id}")

        obj = ObjectVersion.get(bucket_id, key_id)  # type: ObjectVersion
        processor = current_processors.get_processor(
            name=TikaProcessor.id())  # type: TikaProcessor
        processor.process(obj)

        current_app.logger.debug(f"Processed file {bucket_id}:{key_id}")
    except InvalidProcessor:
        # Because we use use reject_on_worker_lost, we need to handle occasional processed files been requeued.
        current_app.logger.debug(
            f"Requeued file {bucket_id}:{key_id} already processed")
    except Exception:
        try:
            raise self.retry()
        except MaxRetriesExceededError as e:
            raise Reject(str(e), requeue=False)
def test_signals(dummy_app, object_version):
    """Test file_processed signal."""
    calls = []

    def file_processed_listener(base_app, processor_id, file, data):
        assert processor_id == DummyProcessor.id
        assert object_version == file
        assert data['content'] == 'dummy'

        calls.append('file-processed')

    file_processed.connect(file_processed_listener, weak=False)

    try:
        processor = current_processors.get_processor(name=DummyProcessor.id)

        processor.process(object_version=object_version)

        assert calls == ['file-processed']
    finally:
        file_processed.disconnect(file_processed_listener)