def activate(self): self.ident = os.environ['ITEM_IDENT'] self.redis_url = os.environ['REDIS_URL'] self.log_key = os.environ['LOG_KEY'] self.log_channel = shared_config.log_channel() self.pipeline_channel = shared_config.pipeline_channel() self.control = Control(self.redis_url, self.log_channel, self.pipeline_channel) self.settings = mod_settings.Settings() self.settings_listener = mod_settings.Listener(self.redis_url, self.settings, self.control, self.ident) self.settings_listener.start() self.last_age = 0 self.logger = logging.getLogger('archivebot.pipeline.wpull_plugin') self.logger.info('wpull plugin initialization complete for job ID ' '{}'.format(self.ident)) archivebot.wpull.plugin.activate(self.app_session) self.logger.info('wpull dupespotter subsystem loaded for job ID ' '{}'.format(self.ident)) super().activate() self.logger.info('wpull plugin activated')
WARC_MAX_SIZE = '5368709120' assert 'TMUX' in env or 'STY' in env or env.get('NO_SCREEN') == "1", \ "Refusing to start outside of screen or tmux, set NO_SCREEN=1 to override" if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"): raise Exception( "Needs seesaw@python3/development version 0.1.8b1 or higher. " "You have version {0}".format(seesaw.__version__)) assert downloader not in ('ignorednick', 'YOURNICKHERE'), 'please use a real nickname' RSYNC_URL = env['RSYNC_URL'] REDIS_URL = env['REDIS_URL'] LOG_CHANNEL = shared_config.log_channel() PIPELINE_CHANNEL = shared_config.pipeline_channel() # ------------------------------------------------------------------------------ # CONTROL CONNECTION # ------------------------------------------------------------------------------ control = control.Control(REDIS_URL, LOG_CHANNEL, PIPELINE_CHANNEL) # ------------------------------------------------------------------------------ # SEESAW EXTENSIONS # ------------------------------------------------------------------------------ extensions.install_stdout_extension(control) # ------------------------------------------------------------------------------
assert 'FINISHED_WARCS_DIR' in env, 'FINISHED_WARCS_DIR not set.' assert 'TMUX' in env or 'STY' in env or env.get('NO_SCREEN') == "1", \ "Refusing to start outside of screen or tmux, set NO_SCREEN=1 to override" if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"): raise Exception( "Needs seesaw@python3/development version 0.1.8b1 or higher. " "You have version {0}".format(seesaw.__version__) ) assert downloader not in ('ignorednick', 'YOURNICKHERE'), 'please use a real nickname' RSYNC_URL = env['RSYNC_URL'] REDIS_URL = env['REDIS_URL'] LOG_CHANNEL = shared_config.log_channel() PIPELINE_CHANNEL = shared_config.pipeline_channel() # ------------------------------------------------------------------------------ # CONTROL CONNECTION # ------------------------------------------------------------------------------ control = control.Control(REDIS_URL, LOG_CHANNEL, PIPELINE_CHANNEL) # ------------------------------------------------------------------------------ # SEESAW EXTENSIONS # ------------------------------------------------------------------------------ extensions.install_stdout_extension(control) # ------------------------------------------------------------------------------
import json import logging import os import random import time import re import sys from archivebot import shared_config from archivebot.control import Control from archivebot.wpull import settings as mod_settings ident = os.environ['ITEM_IDENT'] redis_url = os.environ['REDIS_URL'] log_key = os.environ['LOG_KEY'] log_channel = shared_config.log_channel() pipeline_channel = shared_config.pipeline_channel() control = Control(redis_url, log_channel, pipeline_channel) settings = mod_settings.Settings() settings_listener = mod_settings.Listener(redis_url, settings, control, ident) settings_listener.start() last_age = 0 logger = logging.getLogger('archivebot.pipeline.wpull_hooks') def log_ignore(url, pattern): packet = dict(