def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline(AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline(url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ]) download_stop_pipeline = Pipeline(AppSource(app_session), [StatsStopTask()]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline(queued_file_source, [LinkConversionTask()]) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline(AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', (app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline)) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series
def new_mock_item_session(): args = argparse.Namespace(directory_prefix='/tmp/') app_session = AppSession(None, args, None) url_record = new_mock_url_record() item_session = ItemSession(app_session, url_record) item_session.request = BaseRequest() item_session.request.url = 'http://example.com' return item_session