예제 #1
0
    def _build_pipelines(self) -> PipelineSeries:
        app_session = AppSession(self._factory, self._args, self.get_stderr())

        app_start_pipeline = Pipeline(AppSource(app_session), [
            LoggingSetupTask(),
            DatabaseSetupTask(),
            ParserSetupTask(),
            WARCVisitsTask(),
            SSLContextTask(),
            ResmonSetupTask(),
            StatsStartTask(),
            URLFiltersSetupTask(),
            NetworkSetupTask(),
            ClientSetupTask(),
            WARCRecorderSetupTask(),
            FileWriterSetupTask(),
            ProcessorSetupTask(),
            ProxyServerSetupTask(),
            CoprocessorSetupTask(),
            LinkConversionSetupTask(),
            PluginSetupTask(),
            InputURLTask(),
            URLFiltersPostURLImportSetupTask(),
        ])

        url_item_source = URLItemSource(app_session)

        download_pipeline = Pipeline(url_item_source, [
            ProcessTask(),
            ResmonSleepTask(),
            BackgroundAsyncTask(),
            CheckQuotaTask(),
        ])

        download_stop_pipeline = Pipeline(AppSource(app_session),
                                          [StatsStopTask()])
        download_stop_pipeline.skippable = True

        queued_file_source = QueuedFileSource(app_session)

        conversion_pipeline = Pipeline(queued_file_source,
                                       [LinkConversionTask()])
        conversion_pipeline.skippable = True

        app_stop_pipeline = Pipeline(AppSource(app_session), [
            BackgroundAsyncCleanupTask(),
            AppStopTask(),
            WARCRecorderTeardownTask(),
            CookieJarTeardownTask(),
            LoggingShutdownTask(),
        ])

        pipeline_series = self._factory.new(
            'PipelineSeries',
            (app_start_pipeline, download_pipeline, download_stop_pipeline,
             conversion_pipeline, app_stop_pipeline))
        pipeline_series.concurrency_pipelines.add(download_pipeline)

        return pipeline_series
예제 #2
0
def new_mock_item_session():
    args = argparse.Namespace(directory_prefix='/tmp/')
    app_session = AppSession(None, args, None)
    url_record = new_mock_url_record()
    item_session = ItemSession(app_session, url_record)
    item_session.request = BaseRequest()
    item_session.request.url = 'http://example.com'

    return item_session