class UploadLogFile(ModuleBase): """ Handle log file uploads in the production jobs """ ############################################################################# def __init__(self): """Module initialization. """ super(UploadLogFile, self).__init__() self.version = __RCSID__ self.log = gLogger.getSubLogger("UploadLogFile") self.PRODUCTION_ID = None self.JOB_ID = None self.workflow_commons = None self.request = None self.logFilePath = "" self.logLFNPath = "" self.logdir = "" self.logSE = self.ops.getValue("/LogStorage/LogSE", "LogSE") self.root = gConfig.getValue("/LocalSite/Root", os.getcwd()) self.logSizeLimit = self.ops.getValue("/LogFiles/SizeLimit", 20 * 1024 * 1024) self.logExtensions = [] self.failoverSEs = gConfig.getValue("/Resources/StorageElementGroups/Tier1-Failover", []) self.diracLogo = self.ops.getValue( "/SAM/LogoURL", "https://lhcbweb.pic.es/DIRAC/images/logos/DIRAC-logo-transp.png" ) self.rm = ReplicaManager() self.experiment = "CLIC" self.enable = True self.failoverTest = False # flag to put log files to failover by default self.jobID = "" ###################################################################### def applicationSpecificInputs(self): if self.step_commons.has_key("Enable"): self.enable = self.step_commons["Enable"] if not type(self.enable) == type(True): self.log.warn("Enable flag set to non-boolean value %s, setting to False" % self.enable) self.enable = False if self.step_commons.has_key("TestFailover"): self.enable = self.step_commons["TestFailover"] if not type(self.failoverTest) == type(True): self.log.warn("Test failover flag set to non-boolean value %s, setting to False" % self.failoverTest) self.failoverTest = False if os.environ.has_key("JOBID"): self.jobID = os.environ["JOBID"] self.log.verbose("Found WMS JobID = %s" % self.jobID) else: self.log.info("No WMS JobID found, disabling module via control flag") self.enable = False if self.workflow_commons.has_key("LogFilePath") and self.workflow_commons.has_key("LogTargetPath"): self.logFilePath = self.workflow_commons["LogFilePath"] self.logLFNPath = self.workflow_commons["LogTargetPath"] else: self.log.info("LogFilePath parameter not found, creating on the fly") result = getLogPath(self.workflow_commons) if not result["OK"]: self.log.error("Could not create LogFilePath", result["Message"]) return result self.logFilePath = result["Value"]["LogFilePath"][0] self.logLFNPath = result["Value"]["LogTargetPath"][0] if not type(self.logFilePath) == type(" "): self.logFilePath = self.logFilePath[0] if not type(self.logLFNPath) == type(" "): self.logLFNPath = self.logLFNPath[0] example_file = self.logFilePath if "/ilc/prod/clic" in example_file: self.experiment = "CLIC" elif "/ilc/prod/ilc/sid" in example_file: self.experiment = "ILC_SID" elif "/ilc/prod/ilc/mc-dbd" in example_file: self.experiment = "ILC_ILD" else: self.log.warn("Failed to determine experiment, reverting to default: %s" % self.experiment) if self.workflow_commons.has_key("Request"): self.request = self.workflow_commons["Request"] else: self.request = RequestContainer() self.request.setRequestName("job_%s_request.xml" % self.jobID) self.request.setJobID(self.jobID) self.request.setSourceComponent("Job_%s" % self.jobID) return S_OK("Parameters resolved") ###################################################################### def execute(self): """ Main execution method """ self.log.info("Initializing %s" % self.version) # Add global reporting tool self.resolveInputVariables() res = shellCall(0, "ls -al") if res["OK"] and res["Value"][0] == 0: self.log.info("The contents of the working directory...") self.log.info(str(res["Value"][1])) else: self.log.error("Failed to list the log directory", str(res["Value"][2])) self.log.info("Job root is found to be %s" % (self.root)) self.log.info("PRODUCTION_ID = %s, JOB_ID = %s " % (self.PRODUCTION_ID, self.JOB_ID)) self.logdir = os.path.realpath("./job/log/%s/%s" % (self.PRODUCTION_ID, self.JOB_ID)) self.log.info("Selected log files will be temporarily stored in %s" % self.logdir) res = self.finalize() self.workflow_commons["Request"] = self.request return res ############################################################################# def finalize(self): """ finalize method performs final operations after all the job steps were executed. Only production jobs are treated. """ self.log.verbose("Starting UploadLogFile finalize") ########################################## # First determine the files which should be saved self.log.info("Determining the files to be saved in the logs.") res = self.determineRelevantFiles() if not res["OK"]: self.log.error("Completely failed to select relevant log files.", res["Message"]) return S_OK() # because if the logs are lost, it's not the end of the world. selectedFiles = res["Value"] self.log.info( "The following %s files were selected to be saved:\n%s" % (len(selectedFiles), string.join(selectedFiles, "\n")) ) ######################################### # Create a temporary directory containing these files self.log.info("Populating a temporary directory for selected files.") res = self.populateLogDirectory(selectedFiles) if not res["OK"]: self.log.error("Completely failed to populate temporary log file directory.", res["Message"]) self.setApplicationStatus("Failed To Populate Log Dir") return S_OK() # because if the logs are lost, it's not the end of the world. self.log.info("%s populated with log files." % self.logdir) ######################################### # Create a tailored index page # self.log.info('Creating an index page for the logs') # result = self.__createLogIndex(selectedFiles) # if not result['OK']: # self.log.error('Failed to create index page for logs', res['Message']) if not self.enable: self.log.info("Module is disabled by control flag") return S_OK("Module is disabled by control flag") ######################################### # Make sure all the files in the log directory have the correct permissions result = self.__setLogFilePermissions(self.logdir) if not result["OK"]: self.log.error("Could not set permissions of log files to 0755 with message:\n%s" % (result["Message"])) ######################################### # Attempt to uplaod logs to the LogSE self.log.info("Transferring log files to the %s" % self.logSE) res = S_ERROR() if not self.failoverTest: self.log.info("PutDirectory %s %s %s" % (self.logFilePath, os.path.realpath(self.logdir), self.logSE)) res = self.rm.putStorageDirectory( {self.logFilePath: os.path.realpath(self.logdir)}, self.logSE, singleDirectory=True ) self.log.verbose(res) if res["OK"]: self.log.info("Successfully upload log directory to %s" % self.logSE) # TODO: The logURL should be constructed using the LogSE and StorageElement() # storageElement = StorageElement(self.logSE) # pfn = storageElement.getPfnForLfn(self.logFilePath)['Value'] # logURL = getPfnForProtocol(res['Value'],'http')['Value'] logURL = "%s" % self.logFilePath self.setJobParameter("Log LFN", logURL) self.log.info("Logs for this job may be retrieved with dirac-ilc-get-prod-log -F %s" % logURL) return S_OK() ######################################### # Recover the logs to a failover storage element self.log.error( "Completely failed to upload log files to %s, will attempt upload to failover SE" % self.logSE, res["Message"], ) tarFileDir = os.path.dirname(self.logdir) self.logLFNPath = "%s.gz" % self.logLFNPath tarFileName = os.path.basename(self.logLFNPath) start = os.getcwd() os.chdir(self.logdir) logTarFiles = os.listdir(self.logdir) # comm = 'tar czvf %s %s' % (tarFileName,string.join(logTarFiles,' ')) tfile = tarfile.open(tarFileName, "w:gz") for item in logTarFiles: tfile.add(item) tfile.close() # res = shellCall(0,comm) if not os.path.exists(tarFileName): res = S_ERROR("File was not created") os.chdir(start) if not res["OK"]: self.log.error("Failed to create tar file from directory", "%s %s" % (self.logdir, res["Message"])) self.setApplicationStatus("Failed To Create Log Tar Dir") return S_OK() # because if the logs are lost, it's not the end of the world. # if res['Value'][0]: #i.e. non-zero status # self.log.error('Failed to create tar file from directory','%s %s' % (self.logdir,res['Value'])) # self.setApplicationStatus('Failed To Create Log Tar Dir') # return S_OK()#because if the logs are lost, it's not the end of the world. ############################################################ # Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) ##determine the experiment self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs) random.shuffle(self.failoverSEs) self.log.info( "Attempting to store file %s to the following SE(s):\n%s" % (tarFileName, string.join(self.failoverSEs, ", ")) ) result = failoverTransfer.transferAndRegisterFile( tarFileName, "%s/%s" % (tarFileDir, tarFileName), self.logLFNPath, self.failoverSEs, fileGUID=None, fileCatalog=["FileCatalog", "LcgFileCatalog"], ) if not result["OK"]: self.log.error("Failed to upload logs to all destinations") self.setApplicationStatus("Failed To Upload Logs") return S_OK() # because if the logs are lost, it's not the end of the world. # Now after all operations, retrieve potentially modified request object result = failoverTransfer.getRequestObject() if not result["OK"]: self.log.error(result) return S_ERROR("Could not retrieve modified request") self.request = result["Value"] res = self.createLogUploadRequest(self.logSE, self.logLFNPath) if not res["OK"]: self.log.error("Failed to create failover request", res["Message"]) self.setApplicationStatus("Failed To Upload Logs To Failover") else: self.log.info("Successfully created failover request") self.workflow_commons["Request"] = self.request return S_OK() ############################################################################# def determineRelevantFiles(self): """ The files which are below a configurable size will be stored in the logs. This will typically pick up everything in the working directory minus the output data files. """ logFileExtensions = ["*.txt", "*.log", "*.out", "*.output", "*.xml", "*.sh", "*.info", "*.err", "*.root"] self.logExtensions = self.ops.getValue("/LogFiles/%s/Extensions" % self.experiment, []) if self.logExtensions: self.log.info("Using list of log extensions from CS:\n%s" % (", ".join(self.logExtensions))) logFileExtensions = self.logExtensions else: self.log.info("Using default list of log extensions:\n%s" % (", ".join(logFileExtensions))) candidateFiles = [] for ext in logFileExtensions: self.log.debug("Looking at log file wildcard: %s" % ext) globList = glob.glob(ext) for check in globList: if os.path.isfile(check): self.log.debug("Found locally existing log file: %s" % check) candidateFiles.append(check) selectedFiles = [] try: for candidate in candidateFiles: fileSize = os.stat(candidate)[6] if fileSize < self.logSizeLimit: selectedFiles.append(candidate) else: self.log.error( "Log file found to be greater than maximum of %s bytes" % self.logSizeLimit, candidate ) return S_OK(selectedFiles) except Exception, x: self.log.exception("Exception while determining files to save.", "", str(x)) return S_ERROR("Could not determine log files")