def __init__(self, config, quiet, debug, test=False): """Initializer :arg WMCore.Configuration config: input TaskWorker configuration :arg logging logger: the logger :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger.""" def getLogging(quiet, debug): """Retrieves a logger and set the proper level :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" if self.TEST: #if we are testing log to the console is easier logging.getLogger().addHandler(logging.StreamHandler()) else: logHandler = MultiProcessingLog('twlog.log', when="midnight") logFormatter = \ logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = logging.getLogger() logger.debug("Logging level initialized to %s." % loglevel) return logger self.STOP = False self.TEST = test self.logger = getLogging(quiet, debug) self.config = config resthost = None self.restURInoAPI = None if not self.config.TaskWorker.mode in MODEURL.keys(): raise ConfigException( "No mode provided: need to specify config.TaskWorker.mode in the configuration" ) elif MODEURL[self.config.TaskWorker.mode]['host'] is not None: resthost = MODEURL[self.config.TaskWorker.mode]['host'] self.restURInoAPI = '/crabserver/' + MODEURL[ self.config.TaskWorker.mode]['instance'] else: resthost = self.config.TaskWorker.resturl #this should be called resthost in the TaskWorkerConfig -_- self.restURInoAPI = '/crabserver/' + MODEURL[ self.config.TaskWorker.mode]['instance'] if resthost is None: raise ConfigException( "No correct mode provided: need to specify config.TaskWorker.mode in the configuration" ) self.server = HTTPRequests(resthost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey) self.logger.debug("Hostcert: %s, hostkey: %s" % (str(self.config.TaskWorker.cmscert), str(self.config.TaskWorker.cmskey))) # Retries for any failures if not hasattr(self.config.TaskWorker, 'max_retry'): self.config.TaskWorker.max_retry = 0 if not hasattr(self.config.TaskWorker, 'retry_interval'): self.config.TaskWorker.retry_interval = [ retry * 20 * 2 for retry in range(self.config.TaskWorker.max_retry) ] if not len(self.config.TaskWorker.retry_interval ) == self.config.TaskWorker.max_retry: raise ConfigException( "No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry." ) if self.TEST: self.slaves = TestWorker(self.config, resthost, self.restURInoAPI + '/workflowdb') else: self.slaves = Worker(self.config, resthost, self.restURInoAPI + '/workflowdb') self.slaves.begin() recurringActionsNames = getattr(self.config.TaskWorker, 'recurringActions', []) self.recurringActions = [ self.getRecurringActionInst(name) for name in recurringActionsNames ]
def __init__(self, config, logWarning, logDebug, sequential=False, console=False, name='master'): """Initializer :arg WMCore.Configuration config: input TaskWorker configuration :arg bool logWarning: it tells if a quiet logger is needed :arg bool logDebug: it tells if needs a verbose logger :arg bool sequential: it tells if to run in sequential (no subprocesses) mode. :arg bool console: it tells if to log to console. :arg string name: defines a name for the log of this master process""" def createLogdir(dirname): """ Create the directory dirname ignoring errors in case it exists. Exit if the directory cannot be created. """ try: os.mkdir(dirname) except OSError as ose: if ose.errno != 17: #ignore the "Directory already exists error" print(str(ose)) print("The task worker need to access the '%s' directory" % dirname) sys.exit(1) def createAndCleanLogDirectories(logsDir): # it can be named with the time stamp a TW started createLogdir(logsDir) createLogdir(logsDir + '/tasks') currentProcessesDir = logsDir + '/processes/' createLogdir(currentProcessesDir) # when running inside a container process logs will start with same # process numbers, i.e. same name, at any container restart. # to avoid clashes and confusion, we will put away all previous processes # logs when a TW instance starts. To this goal each TW which runs # creates a directory where new containers will move its logs, so # identify LastLogs_timestamp directory latestLogDir = None # the logs directory could be empty files = os.listdir(currentProcessesDir) files.sort( reverse=True ) # if there are multiple Latest*, will hit the latest first for f in files: if f.startswith('Latest'): latestLogDir = currentProcessesDir + f break if files and latestLogDir: # rename from Latest to Old oldLogsDir = latestLogDir.replace('Latest', 'Old') shutil.move(latestLogDir, oldLogsDir) else: print( "LatestLogDir not found in logs/processes, create a dummy dir to store old files" ) oldLogsDir = currentProcessesDir + 'OldLog-Unknwown' createLogdir(oldLogsDir) # move process logs for latest TW run to old directory for f in files: if f.startswith('proc.c3id'): shutil.move(currentProcessesDir + f, oldLogsDir) # create a new LateastLogs directory where to store logs from this TaskWorker YYMMDD_HHMMSS = time.strftime('%y%m%d_%H%M%S', time.localtime()) myDir = currentProcessesDir + 'LatestLogs-' + YYMMDD_HHMMSS createLogdir(myDir) def setRootLogger(logWarning, logDebug, console, name): """Sets the root logger with the desired verbosity level The root logger logs to logsDir/twlog.txt and every single logging instruction is propagated to it (not really nice to read) :arg bool logWarning: it tells if a quiet logger is needed :arg bool logDebug: it tells if needs a verbose logger :arg bool console: it tells if to log to console :arg string name: define a name for the log file of this master process :return logger: a logger with the appropriate logger level.""" # this must only done for real Master, not when it is used by TapeRecallStatus logsDir = config.TaskWorker.logsDir if name == 'master': createAndCleanLogDirectories(logsDir) if console: logging.getLogger().addHandler(logging.StreamHandler()) else: logHandler = MultiProcessingLog(logsDir + '/twlog.txt', when='midnight') logFormatter = \ logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if logWarning: loglevel = logging.WARNING if logDebug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = setProcessLogger(name, logsDir) logger.info("PID %s.", os.getpid()) logger.info("Logging level initialized to %s.", loglevel) return logger def logVersionAndConfig(config=None, logger=None): """ log version number and major config. parameters args: config : a configuration object loaded from file args: logger : the logger instance to use """ twstartDict = {} twstartDict['version'] = __version__ twstartDict['DBSHostName'] = config.Services.DBSHostName twstartDict['name'] = config.TaskWorker.name twstartDict['instance'] = config.TaskWorker.instance if config.TaskWorker.instance == 'other': twstartDict['restHost'] = config.TaskWorker.restHost twstartDict['dbInstance'] = config.TaskWorker.dbInstance twstartDict['nslaves'] = config.TaskWorker.nslaves twstartDict[ 'recurringActions'] = config.TaskWorker.recurringActions # one line for automatic parsing logger.info('TWSTART: %s', json.dumps(twstartDict)) # multiple lines for humans to read for k, v in twstartDict.items(): logger.info('%s: %s', k, v) return self.STOP = False self.TEST = sequential self.logger = setRootLogger(logWarning, logDebug, console, name) self.config = config self.restHost = None dbInstance = None logVersionAndConfig(self.config, self.logger) try: instance = self.config.TaskWorker.instance except: msg = "No instance provided: need to specify config.TaskWorker.instance in the configuration" raise ConfigException(msg) if instance in SERVICE_INSTANCES: self.logger.info('Will connect to CRAB service: %s', instance) self.restHost = SERVICE_INSTANCES[instance]['restHost'] dbInstance = SERVICE_INSTANCES[instance]['dbInstance'] else: msg = "Invalid instance value '%s'" % instance raise ConfigException(msg) if instance == 'other': self.logger.info( 'Will use restHost and dbInstance from config file') try: self.restHost = self.config.TaskWorker.restHost dbInstance = self.config.TaskWorker.dbInstance except: msg = "Need to specify config.TaskWorker.restHost and dbInstance in the configuration" raise ConfigException(msg) self.dbInstance = dbInstance self.logger.info('Will connect via URL: https://%s/%s', self.restHost, self.dbInstance) #Let's increase the server's retries for recoverable errors in the MasterWorker #60 means we'll keep retrying for 1 hour basically (we retry at 20*NUMRETRY seconds, so at: 20s, 60s, 120s, 200s, 300s ...) self.crabserver = CRABRest(self.restHost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, retry=20, logger=self.logger, userAgent='CRABTaskWorker') self.crabserver.setDbInstance(self.dbInstance) self.logger.debug("Hostcert: %s, hostkey: %s", str(self.config.TaskWorker.cmscert), str(self.config.TaskWorker.cmskey)) # Retries for any failures if not hasattr(self.config.TaskWorker, 'max_retry'): self.config.TaskWorker.max_retry = 0 if not hasattr(self.config.TaskWorker, 'retry_interval'): self.config.TaskWorker.retry_interval = [ retry * 20 * 2 for retry in range(self.config.TaskWorker.max_retry) ] if not len(self.config.TaskWorker.retry_interval ) == self.config.TaskWorker.max_retry: raise ConfigException( "No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry." ) # use the config to pass some useful global stuff to all workers # will use TaskWorker.cmscert/key to talk with CMSWEB self.config.TaskWorker.envForCMSWEB = newX509env( X509_USER_CERT=self.config.TaskWorker.cmscert, X509_USER_KEY=self.config.TaskWorker.cmskey) if self.TEST: self.slaves = TestWorker(self.config, self.restHost, self.dbInstance) else: self.slaves = Worker(self.config, self.restHost, self.dbInstance) self.slaves.begin() recurringActionsNames = getattr(self.config.TaskWorker, 'recurringActions', []) self.recurringActions = [ self.getRecurringActionInst(name) for name in recurringActionsNames ]
def __init__(self, config, quiet, debug, test=False): """Initializer :arg WMCore.Configuration config: input TaskWorker configuration :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :arg bool test: it tells if to run in test (no subprocesses) mode.""" def createLogdir(dirname): """ Create the directory dirname ignoring erors in case it exists. Exit if the directory cannot be created. """ try: os.mkdir(dirname) except OSError as ose: if ose.errno != 17: #ignore the "Directory already exists error" print(str(ose)) print("The task worker need to access the '%s' directory" % dirname) sys.exit(1) def setRootLogger(quiet, debug): """Sets the root logger with the desired verbosity level The root logger logs to logs/twlog.txt and every single logging instruction is propagated to it (not really nice to read) :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" createLogdir('logs') createLogdir('logs/processes') createLogdir('logs/tasks') if self.TEST: #if we are testing log to the console is easier logging.getLogger().addHandler(logging.StreamHandler()) else: logHandler = MultiProcessingLog('logs/twlog.txt', when='midnight') logFormatter = \ logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = setProcessLogger("master") logger.debug("PID %s.", os.getpid()) logger.debug("Logging level initialized to %s.", loglevel) return logger self.STOP = False self.TEST = test self.logger = setRootLogger(quiet, debug) self.config = config resthost = None self.restURInoAPI = None if not self.config.TaskWorker.mode in MODEURL.keys(): raise ConfigException("No mode provided: need to specify config.TaskWorker.mode in the configuration") elif MODEURL[self.config.TaskWorker.mode]['host'] is not None: resthost = MODEURL[self.config.TaskWorker.mode]['host'] self.restURInoAPI = '/crabserver/' + MODEURL[self.config.TaskWorker.mode]['instance'] else: resthost = self.config.TaskWorker.resturl #this should be called resthost in the TaskWorkerConfig -_- self.restURInoAPI = '/crabserver/' + MODEURL[self.config.TaskWorker.mode]['instance'] if resthost is None: raise ConfigException("No correct mode provided: need to specify config.TaskWorker.mode in the configuration") #Let's increase the server's retries for recoverable errors in the MasterWorker #60 means we'll keep retrying for 1 hour basically (we retry at 20*NUMRETRY seconds, so at: 20s, 60s, 120s, 200s, 300s ...) self.server = HTTPRequests(resthost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, retry = 20, logger = self.logger) self.logger.debug("Hostcert: %s, hostkey: %s", str(self.config.TaskWorker.cmscert), str(self.config.TaskWorker.cmskey)) # Retries for any failures if not hasattr(self.config.TaskWorker, 'max_retry'): self.config.TaskWorker.max_retry = 0 if not hasattr(self.config.TaskWorker, 'retry_interval'): self.config.TaskWorker.retry_interval = [retry*20*2 for retry in range(self.config.TaskWorker.max_retry)] if not len(self.config.TaskWorker.retry_interval) == self.config.TaskWorker.max_retry: raise ConfigException("No correct max_retry and retry_interval specified; len of retry_interval must be equal to max_retry.") # use the config to pass some useful global stuff to all workers # will use TaskWorker.cmscert/key to talk with CMSWEB self.config.TaskWorker.envForCMSWEB = newX509env(X509_USER_CERT = self.config.TaskWorker.cmscert, X509_USER_KEY = self.config.TaskWorker.cmskey) if self.TEST: self.slaves = TestWorker(self.config, resthost, self.restURInoAPI + '/workflowdb') else: self.slaves = Worker(self.config, resthost, self.restURInoAPI + '/workflowdb') self.slaves.begin() recurringActionsNames = getattr(self.config.TaskWorker, 'recurringActions', []) self.recurringActions = [self.getRecurringActionInst(name) for name in recurringActionsNames]
def __init__(self, config, quiet, debug, test=False): """Initializer :arg WMCore.Configuration config: input TaskWorker configuration :arg logging logger: the logger :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger.""" def getLogging(quiet, debug): """Retrieves a logger and set the proper level :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" if self.TEST: #if we are testing log to the console is easier logging.getLogger().addHandler(logging.StreamHandler()) else: logHandler = MultiProcessingLog('twlog.log', when="midnight") logFormatter = \ logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = logging.getLogger() logger.debug("Logging level initialized to %s." % loglevel) return logger self.TEST = test self.logger = getLogging(quiet, debug) self.config = config restinstance = None self.resturl = '/crabserver/prod/workflowdb' if not self.config.TaskWorker.mode in MODEURL.keys(): raise ConfigException( "No mode provided: need to specify config.TaskWorker.mode in the configuration" ) elif MODEURL[self.config.TaskWorker.mode]['host'] is not None: restinstance = MODEURL[self.config.TaskWorker.mode]['host'] self.resturl = self.resturl.replace( 'prod', MODEURL[self.config.TaskWorker.mode]['instance']) else: restinstance = self.config.TaskWorker.resturl self.resturl = self.resturl.replace( 'prod', MODEURL[self.config.TaskWorker.mode]['instance']) if self.resturl is None or restinstance is None: raise ConfigException( "No correct mode provided: need to specify config.TaskWorker.mode in the configuration" ) self.server = HTTPRequests(restinstance, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, version=__version__) self.logger.debug("Hostcert: %s, hostkey: %s" % (str(self.config.TaskWorker.cmscert), str(self.config.TaskWorker.cmskey))) if self.TEST: self.slaves = TestWorker(self.config, restinstance, self.resturl) else: self.slaves = Worker(self.config, restinstance, self.resturl) self.slaves.begin()