def process(self, session: AppSession): self._debug_log_registered_hooks(session) internal_plugin_path = get_package_filename(os.path.join('application', 'plugins')) plugin_locations = [internal_plugin_path] plugin_filenames = [] if session.args.plugin_script: plugin_filenames.append(session.args.plugin_script) locator = PluginLocator(plugin_locations, plugin_filenames) session.plugin_manager = PluginManager(plugin_locator=locator) session.plugin_manager.collectPlugins() for plugin_info in session.plugin_manager.getAllPlugins(): if plugin_info.path.startswith(internal_plugin_path): _logger.debug(__( _('Found plugin {name} from {filename}.'), filename=plugin_info.path, name=plugin_info.name )) else: _logger.info(__( _('Found plugin {name} from {filename}.'), filename=plugin_info.path, name=plugin_info.name )) plugin_info.plugin_object.app_session = session if plugin_info.plugin_object.should_activate(): session.plugin_manager.activatePluginByName(plugin_info.name) self._connect_plugin_hooks(session, plugin_info.plugin_object)
def process(self, session: AppSession): '''Build MITM proxy server.''' args = session.args if not (args.phantomjs or args.youtube_dl or args.proxy_server): return proxy_server = session.factory.new( 'HTTPProxyServer', session.factory['HTTPClient'], ) cookie_jar = session.factory.get('CookieJarWrapper') proxy_coprocessor = session.factory.new( 'ProxyCoprocessor', session ) proxy_socket = tornado.netutil.bind_sockets( session.args.proxy_server_port, address=session.args.proxy_server_address )[0] proxy_port = proxy_socket.getsockname()[1] proxy_async_server = yield from asyncio.start_server(proxy_server, sock=proxy_socket) session.async_servers.append(proxy_async_server) session.proxy_server_port = proxy_port
def process(self, session: AppSession): self._debug_log_registered_hooks(session) internal_plugin_path = get_package_filename( os.path.join('application', 'plugins')) plugin_locations = [internal_plugin_path] plugin_filenames = [] if session.args.plugin_script: plugin_filenames.append(session.args.plugin_script) locator = PluginLocator(plugin_locations, plugin_filenames) session.plugin_manager = PluginManager(plugin_locator=locator) session.plugin_manager.collectPlugins() for plugin_info in session.plugin_manager.getAllPlugins(): if plugin_info.path.startswith(internal_plugin_path): _logger.debug( __(_('Found plugin {name} from {filename}.'), filename=plugin_info.path, name=plugin_info.name)) else: _logger.info( __(_('Found plugin {name} from {filename}.'), filename=plugin_info.path, name=plugin_info.name)) plugin_info.plugin_object.app_session = session if plugin_info.plugin_object.should_activate(): session.plugin_manager.activatePluginByName(plugin_info.name) self._connect_plugin_hooks(session, plugin_info.plugin_object)
def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline(AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline(url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ]) download_stop_pipeline = Pipeline(AppSource(app_session), [StatsStopTask()]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline(queued_file_source, [LinkConversionTask()]) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline(AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', (app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline)) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series
def new_mock_item_session(): args = argparse.Namespace(directory_prefix='/tmp/') app_session = AppSession(None, args, None) url_record = new_mock_url_record() item_session = ItemSession(app_session, url_record) item_session.request = BaseRequest() item_session.request.url = 'http://example.com' return item_session
def process(self, session: AppSession): '''Build MITM proxy server.''' args = session.args if not (args.phantomjs or args.youtube_dl or args.proxy_server): return proxy_server = session.factory.new( 'HTTPProxyServer', session.factory['HTTPClient'], ) cookie_jar = session.factory.get('CookieJarWrapper') proxy_coprocessor = session.factory.new('ProxyCoprocessor', session) proxy_socket = tornado.netutil.bind_sockets( session.args.proxy_server_port, address=session.args.proxy_server_address)[0] proxy_port = proxy_socket.getsockname()[1] proxy_async_server = yield from asyncio.start_server(proxy_server, sock=proxy_socket) session.async_servers.append(proxy_async_server) session.proxy_server_port = proxy_port
def _close_file_logger(cls, session: AppSession): if session.file_log_handler: logger = logging.getLogger() logger.removeHandler(session.file_log_handler) session.file_log_handler = None
def process(self, session: AppSession): session.ssl_context = self._build_ssl_context(session)