Пример #1
0
    def __init__(self, config, quiet, debug, test=False):
        """Initializer

        :arg WMCore.Configuration config: input TaskWorker configuration
        :arg logging logger: the logger
        :arg bool quiet: it tells if a quiet logger is needed
        :arg bool debug: it tells if needs a verbose logger."""
        def getLogging(quiet, debug):
            """Retrieves a logger and set the proper level

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :return logger: a logger with the appropriate logger level."""

            if self.TEST:
                #if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog('twlog.log', when="midnight")
                logFormatter = \
                    logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s")
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = logging.getLogger()
            logger.debug("Logging level initialized to %s." % loglevel)
            return logger

        self.STOP = False
        self.TEST = test
        self.logger = getLogging(quiet, debug)
        self.config = config
        resthost = None
        self.restURInoAPI = None
        if not self.config.TaskWorker.mode in MODEURL.keys():
            raise ConfigException(
                "No mode provided: need to specify config.TaskWorker.mode in the configuration"
            )
        elif MODEURL[self.config.TaskWorker.mode]['host'] is not None:
            resthost = MODEURL[self.config.TaskWorker.mode]['host']
            self.restURInoAPI = '/crabserver/' + MODEURL[
                self.config.TaskWorker.mode]['instance']
        else:
            resthost = self.config.TaskWorker.resturl  #this should be called resthost in the TaskWorkerConfig -_-
            self.restURInoAPI = '/crabserver/' + MODEURL[
                self.config.TaskWorker.mode]['instance']
        if resthost is None:
            raise ConfigException(
                "No correct mode provided: need to specify config.TaskWorker.mode in the configuration"
            )
        self.server = HTTPRequests(resthost, self.config.TaskWorker.cmscert,
                                   self.config.TaskWorker.cmskey)
        self.logger.debug("Hostcert: %s, hostkey: %s" %
                          (str(self.config.TaskWorker.cmscert),
                           str(self.config.TaskWorker.cmskey)))
        # Retries for any failures
        if not hasattr(self.config.TaskWorker, 'max_retry'):
            self.config.TaskWorker.max_retry = 0
        if not hasattr(self.config.TaskWorker, 'retry_interval'):
            self.config.TaskWorker.retry_interval = [
                retry * 20 * 2
                for retry in range(self.config.TaskWorker.max_retry)
            ]
        if not len(self.config.TaskWorker.retry_interval
                   ) == self.config.TaskWorker.max_retry:
            raise ConfigException(
                "No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry."
            )
        if self.TEST:
            self.slaves = TestWorker(self.config, resthost,
                                     self.restURInoAPI + '/workflowdb')
        else:
            self.slaves = Worker(self.config, resthost,
                                 self.restURInoAPI + '/workflowdb')
        self.slaves.begin()
        recurringActionsNames = getattr(self.config.TaskWorker,
                                        'recurringActions', [])
        self.recurringActions = [
            self.getRecurringActionInst(name) for name in recurringActionsNames
        ]
Пример #2
0
    def __init__(self,
                 config,
                 logWarning,
                 logDebug,
                 sequential=False,
                 console=False,
                 name='master'):
        """Initializer

        :arg WMCore.Configuration config: input TaskWorker configuration
        :arg bool logWarning: it tells if a quiet logger is needed
        :arg bool logDebug: it tells if needs a verbose logger
        :arg bool sequential: it tells if to run in sequential (no subprocesses) mode.
        :arg bool console: it tells if to log to console.
        :arg string name: defines a name for the log of this master process"""
        def createLogdir(dirname):
            """ Create the directory dirname ignoring errors in case it exists. Exit if
                the directory cannot be created.
            """
            try:
                os.mkdir(dirname)
            except OSError as ose:
                if ose.errno != 17:  #ignore the "Directory already exists error"
                    print(str(ose))
                    print("The task worker need to access the '%s' directory" %
                          dirname)
                    sys.exit(1)

        def createAndCleanLogDirectories(logsDir):
            # it can be named with the time stamp a TW started
            createLogdir(logsDir)
            createLogdir(logsDir + '/tasks')
            currentProcessesDir = logsDir + '/processes/'
            createLogdir(currentProcessesDir)
            # when running inside a container process logs will start with same
            # process numbers, i.e. same name, at any container restart.
            # to avoid clashes and confusion, we will put away all previous processes
            # logs when a TW instance starts. To this goal each TW which runs
            # creates a directory where new containers will move its logs, so
            # identify LastLogs_timestamp directory
            latestLogDir = None  # the logs directory could be empty
            files = os.listdir(currentProcessesDir)
            files.sort(
                reverse=True
            )  # if there are multiple Latest*, will hit the latest first
            for f in files:
                if f.startswith('Latest'):
                    latestLogDir = currentProcessesDir + f
                    break
            if files and latestLogDir:
                # rename from Latest to Old
                oldLogsDir = latestLogDir.replace('Latest', 'Old')
                shutil.move(latestLogDir, oldLogsDir)
            else:
                print(
                    "LatestLogDir not found in logs/processes, create a dummy dir to store old files"
                )
                oldLogsDir = currentProcessesDir + 'OldLog-Unknwown'
                createLogdir(oldLogsDir)
            # move process logs for latest TW run to old directory
            for f in files:
                if f.startswith('proc.c3id'):
                    shutil.move(currentProcessesDir + f, oldLogsDir)

            # create a new LateastLogs directory where to store logs from this TaskWorker
            YYMMDD_HHMMSS = time.strftime('%y%m%d_%H%M%S', time.localtime())
            myDir = currentProcessesDir + 'LatestLogs-' + YYMMDD_HHMMSS
            createLogdir(myDir)

        def setRootLogger(logWarning, logDebug, console, name):
            """Sets the root logger with the desired verbosity level
               The root logger logs to logsDir/twlog.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool logWarning: it tells if a quiet logger is needed
            :arg bool logDebug: it tells if needs a verbose logger
            :arg bool console: it tells if to log to console
            :arg string name: define a name for the log file of this master process
            :return logger: a logger with the appropriate logger level."""

            # this must only done for real Master, not when it is used by TapeRecallStatus
            logsDir = config.TaskWorker.logsDir
            if name == 'master':
                createAndCleanLogDirectories(logsDir)

            if console:
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog(logsDir + '/twlog.txt',
                                                when='midnight')
                logFormatter = \
                    logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s")
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if logWarning:
                loglevel = logging.WARNING
            if logDebug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setProcessLogger(name, logsDir)
            logger.info("PID %s.", os.getpid())
            logger.info("Logging level initialized to %s.", loglevel)
            return logger

        def logVersionAndConfig(config=None, logger=None):
            """
            log version number and major config. parameters
            args: config : a configuration object loaded from file
            args: logger : the logger instance to use
            """
            twstartDict = {}
            twstartDict['version'] = __version__
            twstartDict['DBSHostName'] = config.Services.DBSHostName
            twstartDict['name'] = config.TaskWorker.name
            twstartDict['instance'] = config.TaskWorker.instance
            if config.TaskWorker.instance == 'other':
                twstartDict['restHost'] = config.TaskWorker.restHost
                twstartDict['dbInstance'] = config.TaskWorker.dbInstance
            twstartDict['nslaves'] = config.TaskWorker.nslaves
            twstartDict[
                'recurringActions'] = config.TaskWorker.recurringActions
            # one line for automatic parsing
            logger.info('TWSTART: %s', json.dumps(twstartDict))
            # multiple lines for humans to read
            for k, v in twstartDict.items():
                logger.info('%s: %s', k, v)
            return

        self.STOP = False
        self.TEST = sequential
        self.logger = setRootLogger(logWarning, logDebug, console, name)
        self.config = config
        self.restHost = None
        dbInstance = None

        logVersionAndConfig(self.config, self.logger)

        try:
            instance = self.config.TaskWorker.instance
        except:
            msg = "No instance provided: need to specify config.TaskWorker.instance in the configuration"
            raise ConfigException(msg)

        if instance in SERVICE_INSTANCES:
            self.logger.info('Will connect to CRAB service: %s', instance)
            self.restHost = SERVICE_INSTANCES[instance]['restHost']
            dbInstance = SERVICE_INSTANCES[instance]['dbInstance']
        else:
            msg = "Invalid instance value '%s'" % instance
            raise ConfigException(msg)
        if instance == 'other':
            self.logger.info(
                'Will use restHost and dbInstance from config file')
            try:
                self.restHost = self.config.TaskWorker.restHost
                dbInstance = self.config.TaskWorker.dbInstance
            except:
                msg = "Need to specify config.TaskWorker.restHost and dbInstance in the configuration"
                raise ConfigException(msg)
        self.dbInstance = dbInstance

        self.logger.info('Will connect via URL: https://%s/%s', self.restHost,
                         self.dbInstance)

        #Let's increase the server's retries for recoverable errors in the MasterWorker
        #60 means we'll keep retrying for 1 hour basically (we retry at 20*NUMRETRY seconds, so at: 20s, 60s, 120s, 200s, 300s ...)
        self.crabserver = CRABRest(self.restHost,
                                   self.config.TaskWorker.cmscert,
                                   self.config.TaskWorker.cmskey,
                                   retry=20,
                                   logger=self.logger,
                                   userAgent='CRABTaskWorker')
        self.crabserver.setDbInstance(self.dbInstance)
        self.logger.debug("Hostcert: %s, hostkey: %s",
                          str(self.config.TaskWorker.cmscert),
                          str(self.config.TaskWorker.cmskey))
        # Retries for any failures
        if not hasattr(self.config.TaskWorker, 'max_retry'):
            self.config.TaskWorker.max_retry = 0
        if not hasattr(self.config.TaskWorker, 'retry_interval'):
            self.config.TaskWorker.retry_interval = [
                retry * 20 * 2
                for retry in range(self.config.TaskWorker.max_retry)
            ]
        if not len(self.config.TaskWorker.retry_interval
                   ) == self.config.TaskWorker.max_retry:
            raise ConfigException(
                "No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry."
            )
        # use the config to pass some useful global stuff to all workers
        # will use TaskWorker.cmscert/key to talk with CMSWEB
        self.config.TaskWorker.envForCMSWEB = newX509env(
            X509_USER_CERT=self.config.TaskWorker.cmscert,
            X509_USER_KEY=self.config.TaskWorker.cmskey)

        if self.TEST:
            self.slaves = TestWorker(self.config, self.restHost,
                                     self.dbInstance)
        else:
            self.slaves = Worker(self.config, self.restHost, self.dbInstance)
        self.slaves.begin()
        recurringActionsNames = getattr(self.config.TaskWorker,
                                        'recurringActions', [])
        self.recurringActions = [
            self.getRecurringActionInst(name) for name in recurringActionsNames
        ]
Пример #3
0
    def __init__(self, config, quiet, debug, test=False):
        """Initializer

        :arg WMCore.Configuration config: input TaskWorker configuration
        :arg bool quiet: it tells if a quiet logger is needed
        :arg bool debug: it tells if needs a verbose logger
        :arg bool test: it tells if to run in test (no subprocesses) mode."""


        def createLogdir(dirname):
            """ Create the directory dirname ignoring erors in case it exists. Exit if
                the directory cannot be created.
            """
            try:
                os.mkdir(dirname)
            except OSError as ose:
                if ose.errno != 17: #ignore the "Directory already exists error"
                    print(str(ose))
                    print("The task worker need to access the '%s' directory" % dirname)
                    sys.exit(1)


        def setRootLogger(quiet, debug):
            """Sets the root logger with the desired verbosity level
               The root logger logs to logs/twlog.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :return logger: a logger with the appropriate logger level."""

            createLogdir('logs')
            createLogdir('logs/processes')
            createLogdir('logs/tasks')

            if self.TEST:
                #if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog('logs/twlog.txt', when='midnight')
                logFormatter = \
                    logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s")
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setProcessLogger("master")
            logger.debug("PID %s.", os.getpid())
            logger.debug("Logging level initialized to %s.", loglevel)
            return logger


        self.STOP = False
        self.TEST = test
        self.logger = setRootLogger(quiet, debug)
        self.config = config
        resthost = None
        self.restURInoAPI = None
        if not self.config.TaskWorker.mode in MODEURL.keys():
            raise ConfigException("No mode provided: need to specify config.TaskWorker.mode in the configuration")
        elif MODEURL[self.config.TaskWorker.mode]['host'] is not None:
            resthost = MODEURL[self.config.TaskWorker.mode]['host']
            self.restURInoAPI = '/crabserver/' + MODEURL[self.config.TaskWorker.mode]['instance']
        else:
            resthost = self.config.TaskWorker.resturl #this should be called resthost in the TaskWorkerConfig -_-
            self.restURInoAPI = '/crabserver/' + MODEURL[self.config.TaskWorker.mode]['instance']
        if resthost is None:
            raise ConfigException("No correct mode provided: need to specify config.TaskWorker.mode in the configuration")
        #Let's increase the server's retries for recoverable errors in the MasterWorker
        #60 means we'll keep retrying for 1 hour basically (we retry at 20*NUMRETRY seconds, so at: 20s, 60s, 120s, 200s, 300s ...)
        self.server = HTTPRequests(resthost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, retry = 20,
                                   logger = self.logger)
        self.logger.debug("Hostcert: %s, hostkey: %s", str(self.config.TaskWorker.cmscert), str(self.config.TaskWorker.cmskey))
        # Retries for any failures
        if not hasattr(self.config.TaskWorker, 'max_retry'):
            self.config.TaskWorker.max_retry = 0
        if not hasattr(self.config.TaskWorker, 'retry_interval'):
            self.config.TaskWorker.retry_interval = [retry*20*2 for retry in range(self.config.TaskWorker.max_retry)]
        if not len(self.config.TaskWorker.retry_interval) == self.config.TaskWorker.max_retry:
            raise ConfigException("No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry.")
        # use the config to pass some useful global stuff to all workers
        # will use TaskWorker.cmscert/key to talk with CMSWEB
        self.config.TaskWorker.envForCMSWEB = newX509env(X509_USER_CERT = self.config.TaskWorker.cmscert,
                                                         X509_USER_KEY  = self.config.TaskWorker.cmskey)

        if self.TEST:
            self.slaves = TestWorker(self.config, resthost, self.restURInoAPI + '/workflowdb')
        else:
            self.slaves = Worker(self.config, resthost, self.restURInoAPI + '/workflowdb')
        self.slaves.begin()
        recurringActionsNames = getattr(self.config.TaskWorker, 'recurringActions', [])
        self.recurringActions = [self.getRecurringActionInst(name) for name in recurringActionsNames]
Пример #4
0
    def __init__(self, config, quiet, debug, test=False):
        """Initializer

        :arg WMCore.Configuration config: input TaskWorker configuration
        :arg logging logger: the logger
        :arg bool quiet: it tells if a quiet logger is needed
        :arg bool debug: it tells if needs a verbose logger."""
        def getLogging(quiet, debug):
            """Retrieves a logger and set the proper level

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :return logger: a logger with the appropriate logger level."""

            if self.TEST:
                #if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog('twlog.log', when="midnight")
                logFormatter = \
                    logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s")
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = logging.getLogger()
            logger.debug("Logging level initialized to %s." % loglevel)
            return logger

        self.TEST = test
        self.logger = getLogging(quiet, debug)
        self.config = config
        restinstance = None
        self.resturl = '/crabserver/prod/workflowdb'
        if not self.config.TaskWorker.mode in MODEURL.keys():
            raise ConfigException(
                "No mode provided: need to specify config.TaskWorker.mode in the configuration"
            )
        elif MODEURL[self.config.TaskWorker.mode]['host'] is not None:
            restinstance = MODEURL[self.config.TaskWorker.mode]['host']
            self.resturl = self.resturl.replace(
                'prod', MODEURL[self.config.TaskWorker.mode]['instance'])
        else:
            restinstance = self.config.TaskWorker.resturl
            self.resturl = self.resturl.replace(
                'prod', MODEURL[self.config.TaskWorker.mode]['instance'])
        if self.resturl is None or restinstance is None:
            raise ConfigException(
                "No correct mode provided: need to specify config.TaskWorker.mode in the configuration"
            )
        self.server = HTTPRequests(restinstance,
                                   self.config.TaskWorker.cmscert,
                                   self.config.TaskWorker.cmskey,
                                   version=__version__)
        self.logger.debug("Hostcert: %s, hostkey: %s" %
                          (str(self.config.TaskWorker.cmscert),
                           str(self.config.TaskWorker.cmskey)))
        if self.TEST:
            self.slaves = TestWorker(self.config, restinstance, self.resturl)
        else:
            self.slaves = Worker(self.config, restinstance, self.resturl)
        self.slaves.begin()