Exemplo n.º 1
0
def apply_plugin():
    """ All plugin modules need to define this method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config
    cfg.simulate = True
    cfg.localise = 0

    # Dummy function that does not really write the mirrored files.
    hooks.register_plugin_function('connector:save_url_plugin', save_url)

    # Hook to get access to the downloaded data after process_url has been called.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                        process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off header dumping, since no files are saved
    cfg.urlheaders = 0
    logconsole(
        'Simulation mode turned on. Crawl will be simulated and no files will be saved.'
    )
def apply_plugin():
    """ All plugin modules need to define this method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.
    
    cfg = objects.config
    cfg.simulate = True
    cfg.localise = 0

    # Dummy function that does not really write the mirrored files.
    hooks.register_plugin_function('connector:save_url_plugin', save_url)

    # Hook to get access to the downloaded data after process_url has been called.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                            process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off header dumping, since no files are saved
    cfg.urlheaders = 0
    logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')
Exemplo n.º 3
0
def apply_plugin():
    """ Apply the plugin - overrideable method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook/plugin function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config

    hooks.register_post_callback_method('datamgr:post_download_setup_callback',
                                        create_index)
    #logger.disableConsoleLogging()
    # Turn off session-saver feature
    cfg.savesessions = False
    # Turn off interrupt handling
    # cfg.ignoreinterrupts = True
    # No need for localising
    cfg.localise = 0
    # Turn off image downloading
    cfg.images = 0
    # Turn off caching
    cfg.pagecache = 0
Exemplo n.º 4
0
def apply_plugin():
    """ Apply the plugin - overrideable method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook/plugin function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config

    hooks.register_post_callback_method('datamgr:post_download_setup_callback',
                                        create_index)
    #logger.disableConsoleLogging()
    # Turn off session-saver feature
    cfg.savesessions = False
    # Turn off interrupt handling
    # cfg.ignoreinterrupts = True
    # No need for localising
    cfg.localise = 0
    # Turn off image downloading
    cfg.images = 0
    # Turn off caching
    cfg.pagecache = 0
Exemplo n.º 5
0
def apply_plugin():
    """ Apply the plugin - overrideable method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook/plugin function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config

    # Makes sense to activate the callback only if swish-integration
    # is enabled.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                        process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off console-logging
    logger = objects.logger
    #logger.disableConsoleLogging()
    # Turn off session-saver feature
    cfg.savesessions = False
    # Turn off interrupt handling
    # cfg.ignoreinterrupts = True
    # No need for localising
    cfg.localise = 0
    # Turn off image downloading
    cfg.images = 0
    # Increase sleep time
    cfg.sleeptime = 1.0
    # sys.stderr = open('swish-errors.txt','wb')
    # cfg.maxtrackers = 2
    cfg.usethreads = 0
Exemplo n.º 6
0
def apply_plugin():
    """ Apply the plugin - overrideable method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook/plugin function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config

    # Makes sense to activate the callback only if swish-integration
    # is enabled.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                        process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off console-logging
    logger = objects.logger
    #logger.disableConsoleLogging()
    # Turn off session-saver feature
    cfg.savesessions = False
    # Turn off interrupt handling
    # cfg.ignoreinterrupts = True
    # No need for localising
    cfg.localise = 0
    # Turn off image downloading
    cfg.images = 0
    # Increase sleep time
    cfg.sleeptime = 1.0
    # sys.stderr = open('swish-errors.txt','wb')
    # cfg.maxtrackers = 2
    cfg.usethreads = 0