Exemplo n.º 1
0
    def activate(self):
        self.ident = os.environ['ITEM_IDENT']
        self.redis_url = os.environ['REDIS_URL']
        self.log_key = os.environ['LOG_KEY']
        self.log_channel = shared_config.log_channel()
        self.pipeline_channel = shared_config.pipeline_channel()
        self.control = Control(self.redis_url, self.log_channel, self.pipeline_channel)

        self.settings = mod_settings.Settings()
        self.settings_listener = mod_settings.Listener(self.redis_url, self.settings,
                                                       self.control, self.ident)
        self.settings_listener.start()

        self.last_age = 0
        self.logger = logging.getLogger('archivebot.pipeline.wpull_plugin')

        self.logger.info('wpull plugin initialization complete for job ID '
                         '{}'.format(self.ident))

        archivebot.wpull.plugin.activate(self.app_session)
        self.logger.info('wpull dupespotter subsystem loaded for job ID '
                         '{}'.format(self.ident))


        super().activate()
        self.logger.info('wpull plugin activated')
Exemplo n.º 2
0
    WARC_MAX_SIZE = '5368709120'

assert 'TMUX' in env or 'STY' in env or env.get('NO_SCREEN') == "1", \
        "Refusing to start outside of screen or tmux, set NO_SCREEN=1 to override"

if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"):
    raise Exception(
        "Needs seesaw@python3/development version 0.1.8b1 or higher. "
        "You have version {0}".format(seesaw.__version__))

assert downloader not in ('ignorednick',
                          'YOURNICKHERE'), 'please use a real nickname'

RSYNC_URL = env['RSYNC_URL']
REDIS_URL = env['REDIS_URL']
LOG_CHANNEL = shared_config.log_channel()
PIPELINE_CHANNEL = shared_config.pipeline_channel()

# ------------------------------------------------------------------------------
# CONTROL CONNECTION
# ------------------------------------------------------------------------------

control = control.Control(REDIS_URL, LOG_CHANNEL, PIPELINE_CHANNEL)

# ------------------------------------------------------------------------------
# SEESAW EXTENSIONS
# ------------------------------------------------------------------------------

extensions.install_stdout_extension(control)

# ------------------------------------------------------------------------------
Exemplo n.º 3
0
assert 'FINISHED_WARCS_DIR' in env, 'FINISHED_WARCS_DIR not set.'

assert 'TMUX' in env or 'STY' in env or env.get('NO_SCREEN') == "1", \
        "Refusing to start outside of screen or tmux, set NO_SCREEN=1 to override"

if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"):
    raise Exception(
        "Needs seesaw@python3/development version 0.1.8b1 or higher. "
        "You have version {0}".format(seesaw.__version__)
    )

assert downloader not in ('ignorednick', 'YOURNICKHERE'), 'please use a real nickname'

RSYNC_URL = env['RSYNC_URL']
REDIS_URL = env['REDIS_URL']
LOG_CHANNEL = shared_config.log_channel()
PIPELINE_CHANNEL = shared_config.pipeline_channel()

# ------------------------------------------------------------------------------
# CONTROL CONNECTION
# ------------------------------------------------------------------------------

control = control.Control(REDIS_URL, LOG_CHANNEL, PIPELINE_CHANNEL)

# ------------------------------------------------------------------------------
# SEESAW EXTENSIONS
# ------------------------------------------------------------------------------

extensions.install_stdout_extension(control)

# ------------------------------------------------------------------------------
Exemplo n.º 4
0
import json
import logging
import os
import random
import time
import re
import sys

from archivebot import shared_config
from archivebot.control import Control
from archivebot.wpull import settings as mod_settings

ident = os.environ['ITEM_IDENT']
redis_url = os.environ['REDIS_URL']
log_key = os.environ['LOG_KEY']
log_channel = shared_config.log_channel()
pipeline_channel = shared_config.pipeline_channel()

control = Control(redis_url, log_channel, pipeline_channel)

settings = mod_settings.Settings()
settings_listener = mod_settings.Listener(redis_url, settings, control, ident)
settings_listener.start()

last_age = 0

logger = logging.getLogger('archivebot.pipeline.wpull_hooks')


def log_ignore(url, pattern):
  packet = dict(