def test_simple_items(self): items = self._new_items(4) pipeline = Pipeline(MySource(items), [MyItemTask()]) yield from pipeline.process() self._check_item_values(items)
def test_pipeline_skipping(self): source1 = MyItemSource([1, 2, 3]) source2 = MyItemSource([4, 5, 6]) source3 = MyItemSource([7, 8, 9]) task1 = MyItemTask() pipeline1 = Pipeline(source1, [task1]) pipeline2 = Pipeline(source2, [MyItemTask()]) pipeline3 = Pipeline(source3, [MyItemTask()]) pipeline2.skippable = True app = Application(PipelineSeries([pipeline1, pipeline2, pipeline3])) def callback(work_item): app.stop() task1.callback = callback yield from app.run() self.assertTrue(source1.values, 'unprocessed') self.assertTrue(source2.values, 'skipped') self.assertFalse( source3.values, 'processed', )
def test_concurrency_under(self): items = self._new_items(100) item_queue = ItemQueue() task = MyItemTask() pipeline = Pipeline(MySource(items), [task], item_queue) pipeline.concurrency = 2 yield from pipeline.process() self._check_item_values(items) self.assertEqual(2, task.peak_work)
def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline(AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline(url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ]) download_stop_pipeline = Pipeline(AppSource(app_session), [StatsStopTask()]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline(queued_file_source, [LinkConversionTask()]) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline(AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', (app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline)) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series
def test_simple(self): source1 = MyItemSource([1, 2, 3]) source2 = MyItemSource([4, 5, 6]) pipeline1 = Pipeline(source1, [MyItemTask()]) pipeline2 = Pipeline(source2, [MyItemTask()]) app = Application(PipelineSeries([pipeline1, pipeline2])) exit_code = yield from app.run() self.assertEqual(0, exit_code)
def test_stopping(self): items = self._new_items(10) task = MyItemTask() pipeline = Pipeline(MySource(items), [task]) def task_callback(): if task.item_count == 5: pipeline.stop() task.callback = task_callback yield from pipeline.process() self.assertIsNone(items[-1].processed_value)
def test_concurrency_step_up(self): items = self._new_items(100) task = MyItemTask() pipeline = Pipeline(MySource(items), [task], ItemQueue()) def task_callback(): if task.item_count == 20: _logger.debug('Set concurrency 10') pipeline.concurrency = 10 task.callback = task_callback yield from pipeline.process() self._check_item_values(items) self.assertEqual(10, task.peak_work)
def test_pipeline_series(self): items = self._new_items(100) item_queue = ItemQueue() task = MyItemTask() pipeline_1 = Pipeline(MySource(items), [task], item_queue) pipeline_2 = Pipeline(MySource(items), [task], item_queue) series = PipelineSeries((pipeline_1, pipeline_2)) series.concurrency_pipelines.add(pipeline_2) self.assertEqual(1, series.concurrency) series.concurrency = 2 self.assertEqual(2, series.concurrency) self.assertEqual(1, pipeline_1.concurrency) self.assertEqual(2, pipeline_2.concurrency)
def test_concurrency_zero(self): items = self._new_items(100) task = MyItemTask() pipeline = Pipeline(MySource(items), [task], ItemQueue()) pipeline.concurrency = 5 def task_callback(): if task.item_count == 10: _logger.debug('Set concurrency to 0') pipeline.concurrency = 0 def callback(): _logger.debug('Set concurrency to 10') pipeline.concurrency = 10 asyncio.get_event_loop().call_later(0.5, callback) task.callback = task_callback yield from pipeline.process() self._check_item_values(items) self.assertEqual(10, task.peak_work)
def test_exit_codes(self): for error_class, expected_exit_code in Application.ERROR_CODE_MAP.items( ): with self.subTest(error_class): source = MyItemSource([1, 2, 3]) def callback(work_item): raise error_class(work_item) task = MyItemTask(callback=callback) pipeline = Pipeline(source, [task]) app = Application(PipelineSeries([pipeline])) exit_code = yield from app.run() self.assertEqual(expected_exit_code, exit_code)
def test_item_task_error(self): items = self._new_items(4) pipeline = Pipeline(MySource(items), [MyItemTask(test_error=True)]) with self.assertRaises(MyItemTaskError): yield from pipeline.process()
def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline( AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline( url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ] ) download_stop_pipeline = Pipeline( AppSource(app_session), [ StatsStopTask() ]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline( queued_file_source, [ LinkConversionTask() ] ) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline( AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', ( app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline )) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series