def apply_plugin(): """ All plugin modules need to define this method """ # This method is expected to perform the following steps. # 1. Register the required hook function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config cfg.simulate = True cfg.localise = 0 # Dummy function that does not really write the mirrored files. hooks.register_plugin_function('connector:save_url_plugin', save_url) # Hook to get access to the downloaded data after process_url has been called. hooks.register_post_callback_method('crawler:fetcher_process_url_callback', process_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off header dumping, since no files are saved cfg.urlheaders = 0 logconsole( 'Simulation mode turned on. Crawl will be simulated and no files will be saved.' )
def apply_plugin(): """ All plugin modules need to define this method """ # This method is expected to perform the following steps. # 1. Register the required hook function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config cfg.simulate = True cfg.localise = 0 # Dummy function that does not really write the mirrored files. hooks.register_plugin_function('connector:save_url_plugin', save_url) # Hook to get access to the downloaded data after process_url has been called. hooks.register_post_callback_method('crawler:fetcher_process_url_callback', process_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off header dumping, since no files are saved cfg.urlheaders = 0 logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')
def apply_plugin(): """ All plugin modules need to define this method """ # This method is expected to perform the following steps. # 1. Register the required hook function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config cfg.simulate = True cfg.localise = 0 hooks.register_plugin_function('connector:save_url_plugin', save_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off header dumping, since no files are saved cfg.urlheaders = 0 # For simulator, we need in-mem data mode # since files are never saved! cfg.datamode = CONNECTOR_DATA_MODE_INMEM logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')