def surls2metalink(self,replicas,metalinkFile): """ Convert list of replicas (of multiple files) to metalink Input argument, replicas, is dict with guid as key, and a list of surls Mappings from surl to https turl will come from ddm eventually to cover surls from remote SEs. For now just add the mapping for the local SE from copysetup. """ site_name=self.site_name local_se_token=site_name+"_DATADISK" tolog("local SE token: %s"%(local_se_token)) # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace dirAcc = getDirectAccessDic(readpar('copysetupin')) if not dirAcc: dirAcc = getDirectAccessDic(readpar('copysetup')) # extract srm host for key srmhost=None if dirAcc: srmhost = self.hostFromSurl(dirAcc['oldPrefix']) for guid in replicas.keys(): reps = replicas[guid] tolog("Got replicas=%s for guid=%s" % (str(reps), guid)) try: token_file=open('token_file', 'r') except IOError, e: tolog ("!!WARNING!! Failed to open file: %s"%(e)) raise Exception("!!FAILED!!1099!! Cannot open file with token!")
def _check_space(self, ub): """Checking space of a local directory""" # "source setup.sh" if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' fail = 0 ret = '' if ub == "" or ub == "None" or ub == None: # seprodpath can have a complex structure in case of space tokens # although currently not supported in this site mover, prepare the code anyway # (use the first list item only) dst_loc_se = self.getDirList(readpar('seprodpath'))[0] if dst_loc_se == "": dst_loc_se = readpar('sepath') if dst_loc_se == "": tolog( "WARNING: Can not perform alternative space check since sepath is not set" ) return -1 else: tolog("Attempting to use df for checking SE space: %s" % (dst_loc_se)) return self.check_space_df(dst_loc_se) else: try: f = urllib.urlopen(ub + '/storages/default') except Exception, e: tolog('!!WARNING!!2999!! Fetching default storage failed!') return -1 else:
def getFileTransferInfo(self, transferType, buildJob): """ Get all relevant fields related to file transfer """ copysetup = readpar('copysetupin') # create the direct access dictionary fileTransferInfo = getDirectAccessDic(copysetup) # if copysetupin did not contain direct access info, try the copysetup instead if not fileTransferInfo: copysetup = readpar('copysetup') fileTransferInfo = getDirectAccessDic(copysetup) # should the copytool be used? useCopyTool = False useFileStager = False useDirectAccess = False oldPrefix = "" newPrefix = "" dInfo = None if fileTransferInfo: dInfo = True # no direct access / remote I/O, use standard copytool (copy-to-scratch) if fileTransferInfo['useCopyTool']: useCopyTool = True # do not set the LFC host for file stager if fileTransferInfo['useFileStager']: useFileStager = True if fileTransferInfo['directIn']: useDirectAccess = True oldPrefix = fileTransferInfo['oldPrefix'] newPrefix = fileTransferInfo['newPrefix'] # override settings for transferType direct if transferType == 'direct': useCopyTool = False useFileStager = False useDirectAccess = True # should pilot create TURL based PFC? (not done here, but setup needs to be aware of it) # if dInfo and useDirectAccess and oldPrefix == "" and newPrefix == "": if (transferType == 'direct' or (useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == "") and not buildJob: # if (transferType == 'direct' or (not useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == ""): usePFCTurl = True else: usePFCTurl = False # force usePFCTurl for all jobs if not buildJob and useDirectAccess: tolog("Forced usePFCTurl (reset old/newPrefix)") usePFCTurl = True oldPrefix = "" newPrefix = "" if os.environ.get("TestXRootD", 'False') == 'True': import re re.sub(r'\/xrootdsetup\.sh', '/xrootdsetup-dev.sh', copysetup) return dInfo, useCopyTool, useDirectAccess, useFileStager, oldPrefix, newPrefix, copysetup, usePFCTurl
def extractSingularityOptions(): """ Extract any singularity options from catchall """ # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'" #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall") # ${workdir} should be there, otherwise the pilot cannot add the current workdir # if not there, add it # First try with reading new parameters from schedconfig container_options = readpar("container_options") if container_options == "": tolog( "container_options either does not exist in queuedata or is empty, trying with catchall instead" ) catchall = readpar("catchall") #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?") found = re.findall(pattern, catchall) if len(found) > 0: container_options = found[0] if container_options != "": if container_options.endswith("'") or container_options.endswith('"'): container_options = container_options[:-1] # add the workdir if missing if not "${workdir}" in container_options and " --contain" in container_options: container_options = container_options.replace( " --contain", ",${workdir} --contain") tolog("Note: added missing ${workdir} to singularity_options") return container_options
def updateCopysetups(cmd3, transferType=None, useCT=None, directIn=None, useFileStager=None): """ Update the relevant copysetup fields for remote I/O or file stager """ si = SiteInformation() _copysetupin = readpar('copysetupin') _copysetup = readpar('copysetup') if _copysetupin != "": si.updateCopysetup(cmd3, 'copysetupin', _copysetupin, transferType=transferType, useCT=useCT, directIn=directIn, useFileStager=useFileStager) else: si.updateCopysetup(cmd3, 'copysetup', _copysetup, transferType=transferType, useCT=useCT, directIn=directIn, useFileStager=useFileStager)
def getFAXRedirectors(computingSite, sourceSite, jobId): """ Get the FAX redirectors primarily from the google server, fall back to schedconfig.faxredirector value """ fax_redirectors_dictionary = {} # Is the sourceSite set? if sourceSite and sourceSite.lower() != 'null': # Get the FAX redirectors (if the method returns an empty dictionary, the keys and values will be set below) fax_redirectors_dictionary = _getFAXRedirectors(computingSite, sourceSite, jobId) # Verify the dictionary if fax_redirectors_dictionary.has_key('computingsite') and fax_redirectors_dictionary['computingsite'] != None: if fax_redirectors_dictionary['computingsite'] == "" or fax_redirectors_dictionary['computingsite'].lower() == "null": fax_redirectors_dictionary['computingsite'] = readpar('faxredirector') tolog("!!WARNING!!5555!! FAX computingsite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['computingsite']) else: fax_redirectors_dictionary['computingsite'] = readpar('faxredirector') tolog("!!WARNING!!5556!! FAX computingsite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['computingsite']) if fax_redirectors_dictionary.has_key('sourcesite') and fax_redirectors_dictionary['sourcesite'] != None: if fax_redirectors_dictionary['sourcesite'] == "" or fax_redirectors_dictionary['sourcesite'].lower() == "null": fax_redirectors_dictionary['sourcesite'] = readpar('faxredirector') tolog("!!WARNING!!5555!! FAX sourcesite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['sourcesite']) else: fax_redirectors_dictionary['sourcesite'] = readpar('faxredirector') tolog("!!WARNING!!5556!! FAX aourcesite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['sourcesite']) else: tolog("sourceSite is not set, use faxredirector value from AGIS") _faxredirector = readpar('faxredirector') _faxredirector = updateRedirector(_faxredirector) fax_redirectors_dictionary['computingsite'] = _faxredirector fax_redirectors_dictionary['sourcesite'] = _faxredirector return fax_redirectors_dictionary
def extractSingularityOptions(): """ Extract any singularity options from catchall """ # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'" #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall") # ${workdir} should be there, otherwise the pilot cannot add the current workdir # if not there, add it # First try with reading new parameters from schedconfig container_options = readpar("container_options") if container_options == "": tolog("container_options either does not exist in queuedata or is empty, trying with catchall instead") catchall = readpar("catchall") #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?") found = re.findall(pattern, catchall) if len(found) > 0: container_options = found[0] if container_options != "": if container_options.endswith("'") or container_options.endswith('"'): container_options = container_options[:-1] # add the workdir if missing if not "${workdir}" in container_options and " --contain" in container_options: container_options = container_options.replace(" --contain", ",${workdir} --contain") tolog("Note: added missing ${workdir} to singularity_options") return container_options
def surls2metalink(self, replicas, metalinkFile): """ Convert list of replicas (of multiple files) to metalink Input argument, replicas, is dict with guid as key, and a list of surls Mappings from surl to https turl will come from ddm eventually to cover surls from remote SEs. For now just add the mapping for the local SE from copysetup. """ site_name = self.site_name local_se_token = site_name + "_DATADISK" tolog("local SE token: %s" % (local_se_token)) # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace dirAcc = getDirectAccessDic(readpar('copysetupin')) if not dirAcc: dirAcc = getDirectAccessDic(readpar('copysetup')) # extract srm host for key srmhost = None if dirAcc: srmhost = self.hostFromSurl(dirAcc['oldPrefix']) for guid in replicas.keys(): reps = replicas[guid] tolog("Got replicas=%s for guid=%s" % (str(reps), guid)) try: token_file = open('token_file', 'r') except IOError, e: tolog("!!WARNING!! Failed to open file: %s" % (e)) raise Exception("!!FAILED!!1099!! Cannot open file with token!")
def _check_space(self, ub): """Checking space of a local directory""" # "source setup.sh" if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' fail = 0 ret = '' if ub == "" or ub == "None" or ub == None: # seprodpath can have a complex structure in case of space tokens # although currently not supported in this site mover, prepare the code anyway # (use the first list item only) dst_loc_se = self.getDirList(readpar('seprodpath'))[0] if dst_loc_se == "": dst_loc_se = readpar('sepath') if dst_loc_se == "": tolog("WARNING: Can not perform alternative space check since sepath is not set") return -1 else: tolog("Attempting to use df for checking SE space: %s" % (dst_loc_se)) return self.check_space_df(dst_loc_se) else: try: f = urllib.urlopen(ub + '/storages/default') except Exception, e: tolog('!!WARNING!!2999!! Fetching default storage failed!') return -1 else:
def getFileTransferInfo(self, transferType, buildJob): """ Get all relevant fields related to file transfer """ copysetup = readpar('copysetupin') # create the direct access dictionary fileTransferInfo = getDirectAccessDic(copysetup) # if copysetupin did not contain direct access info, try the copysetup instead if not fileTransferInfo: copysetup = readpar('copysetup') fileTransferInfo = getDirectAccessDic(copysetup) # should the copytool be used? useCopyTool = False useFileStager = False useDirectAccess = False lfcHost = readpar('lfchost') oldPrefix = "" newPrefix = "" dInfo = None if fileTransferInfo: dInfo = True # no direct access / remote I/O, use standard copytool (copy-to-scratch) if fileTransferInfo['useCopyTool']: useCopyTool = True # do not set the LFC host for file stager if fileTransferInfo['useFileStager']: useFileStager = True if fileTransferInfo['directIn']: useDirectAccess = True oldPrefix = fileTransferInfo['oldPrefix'] newPrefix = fileTransferInfo['newPrefix'] # override settings for transferType direct if transferType == 'direct': useCopyTool = False useFileStager = False useDirectAccess = True if oldPrefix == "" and newPrefix == "": lfcHost = "" # should pilot create TURL based PFC? (not done here, but setup needs to be aware of it) # if dInfo and useDirectAccess and oldPrefix == "" and newPrefix == "": if (transferType == 'direct' or (useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == "") and not buildJob: # if (transferType == 'direct' or (not useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == ""): usePFCTurl = True else: usePFCTurl = False return dInfo, useCopyTool, useDirectAccess, useFileStager, oldPrefix, newPrefix, copysetup, usePFCTurl, lfcHost
def surls2metalink(self,replicas,metalinkFile): """ Convert list of replicas (of multiple files) to metalink Input argument, replicas, is dict with guid as key, and a list of surls Mappings from surl to https turl will come from ddm eventually to cover surls from remote SEs. For now just add the mapping for the local SE from copysetup. """ # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace dirAcc = getDirectAccessDic(readpar('copysetupin')) if not dirAcc: dirAcc = getDirectAccessDic(readpar('copysetup')) # extract srm host for key if dirAcc: srmhost = self.hostFromSurl(dirAcc['oldPrefix']) if srmhost: self.surl2https_map[srmhost] = (dirAcc['oldPrefix'],dirAcc['newPrefix']) # Start building metalink metalink='<?xml version="1.0" encoding="utf-8"?>\n' metalink+='<metalink version="3.0" generator="Pilot" xmlns="http://www.metalinker.org/">\n' metalink+='<files>\n' for guid in replicas.keys(): reps = replicas[guid] # surl can have __DQ2blah at the end - strip it name = reps[0].sfn.split('/')[-1] extindex = name.rfind('__DQ2-') if extindex > 0: name = name[:extindex] metalink+='<file name="%s">\n'%name metalink+='<size>%s</size>'%reps[0].filesize metalink+='<verification><hash type="adler32">%s</hash></verification>\n'%reps[0].csumvalue metalink+='<resources>\n' # if the surl matches a list of https sites, then add a url for rep in reps: srmhost = self.hostFromSurl(rep.sfn) if srmhost in self.surl2https_map.keys(): pair = self.surl2https_map[srmhost] metalink+='<url type="https" >%s</url>\n'% \ re.sub(pair[0],pair[1],rep.sfn) else: tolog("Not found: %s"%rep.sfn) metalink+='</resources></file>\n' metalink+='</files></metalink>\n' print metalink mlfile = open(metalinkFile,'w') mlfile.write(metalink) mlfile.close()
def getProdSourceLabel(self): """ determine the job type """ prodSourceLabel = None # not None value; can be user (user analysis job), ddm (panda mover job, sitename should contain DDM) # test will return a testEvgen/testReco job, ptest will return a job sent with prodSourceLabel ptest if self.__env['uflag']: if self.__env['uflag'] == 'self' or self.__env['uflag'] == 'ptest': if self.__env['uflag'] == 'ptest': prodSourceLabel = self.__env['uflag'] elif self.__env['uflag'] == 'self': prodSourceLabel = 'user' else: prodSourceLabel = self.__env['uflag'] # for PandaMover jobs the label must be ddm if "DDM" in self.__env['thisSite'].sitename or (self.__env['uflag'] == 'ddm' and self.__env['thisSite'].sitename == 'BNL_ATLAS_test'): prodSourceLabel = 'ddm' elif "Install" in self.__env['thisSite'].sitename: # old, now replaced with prodSourceLabel=install prodSourceLabel = 'software' if pUtil.readpar('status').lower() == 'test' and self.__env['uflag'] != 'ptest' and self.__env['uflag'] != 'ddm': prodSourceLabel = 'test' # override for release candidate pilots if self.__env['pilot_version_tag'] == "RC": prodSourceLabel = "rc_test" if self.__env['pilot_version_tag'] == "DDM": prodSourceLabel = "ddm" return prodSourceLabel
def addMD5sum(self, lfn, md5sum): """ add md5sum to lfn """ if os.environ.has_key('LD_LIBRARY_PATH'): tolog("LD_LIBRARY_PATH prior to lfc import: %s" % os.environ['LD_LIBRARY_PATH']) else: tolog( "!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import" ) import lfc os.environ['LFC_HOST'] = readpar('lfchost') stat = lfc.lfc_filestatg() exitcode = lfc.lfc_statg(lfn, "", stat) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn)) return exitcode exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, 'MD', md5sum) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" % (err_num, lfn, md5sum)) return exitcode tolog("Successfully set md5sum for %s" % (lfn)) return exitcode
def getGlobalFilePaths(self, dsname): """ Get the global file paths using to_native_lfn """ tolog("Guessing the global path using to_native_lfn()..") # this method will in fact only ever return a single path, but keep 'paths' as a list for consistency with getGlobalFilePathsDQ2() paths = [] # get the global redirector redirector = readpar("faxredirector") # 'root://glrd.usatlas.org/' # correct the redirector in case the protocol and/or trailing slash are missing redirector = self.updateRedirector(redirector) # get the pre-path native_path = self.to_native_lfn(dsname, "DUMMYLFN") native_path = native_path.replace("DUMMYLFN", "") # the real lfn will be added by the caller # remove the /grid substring native_path = native_path.replace("/grid", "") # construct the global path paths.append(redirector + native_path) tolog("Will use global path: %s" % (paths[0])) return paths
def getObjectstoresListXXX(queuename): """ Get the objectstores list from the proper queuedata for the relevant queue """ # queuename is needed as long as objectstores field is not available in normal queuedata (temporary) objectstores = None # First try to get the objectstores field from the normal queuedata try: from pUtil import readpar _objectstores = readpar("objectstores") except: # tolog("Field \'objectstores\' not yet available in queuedata") _objectstores = None # Get the field from AGIS if not _objectstores: s = True # Download the new queuedata in case it has not been downloaded already if not os.path.exists(getNewQueuedataFilename()): s = getNewQueuedata(queuename) if s: _objectstores = getField("objectstores") if _objectstores: objectstores = _objectstores return objectstores
def getObjectstoresListXXX(queuename): """ Get the objectstores list from the proper queuedata for the relevant queue """ # queuename is needed as long as objectstores field is not available in normal queuedata (temporary) objectstores = None # First try to get the objectstores field from the normal queuedata try: from pUtil import readpar _objectstores = readpar('objectstores') except: #tolog("Field \'objectstores\' not yet available in queuedata") _objectstores = None # Get the field from AGIS if not _objectstores: s = True # Download the new queuedata in case it has not been downloaded already if not os.path.exists(getNewQueuedataFilename()): s = getNewQueuedata(queuename) if s: _objectstores = getField('objectstores') if _objectstores: objectstores = _objectstores return objectstores
def getSubprocessName(self, eventService): """ Select which subprocess is to be run by the Monitor """ # The default subprocess is RunJob (name='Normal', which performs payload setup, stage-in, payload execution and stage-out). # An alternative subprocess is the runEvent module which downloads events from an Event Server, executes a payload # and stages ou output files asynchronously as they are ready. # Note: send the entire job object to this method since there might be other subprocesses created at a later time which # will be identified by this method using some other job data member # Default subprocess name name = "RunJob" # Select alternative subprocess names for HPCs isHPC, _name = extractHPCInfo(readpar('catchall')) if isHPC: name = "RunJob" + _name # e.g. "RunJobTitan" is the proper subprocess name for the Titan plug-in # for es merge jobs if _name == "Hpc": name = "RunJob" # Are we going to run an event service job? if eventService: tolog("Encountered an event service job") if isHPC: name = "RunJob%sEvent" % (_name) else: name = "RunJobEvent" tolog("Selected subprocess: %s" % (name)) return name
def addMD5sum(self, lfn, md5sum): """ add md5sum to lfn """ if os.environ.has_key('LD_LIBRARY_PATH'): tolog("LD_LIBRARY_PATH prior to lfc import: %s" % os.environ['LD_LIBRARY_PATH']) else: tolog("!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import") import lfc os.environ['LFC_HOST'] = readpar('lfchost') # b="." # buffer = b.zfill(200) # ret = lfc.lfc_seterrbuf(buffer, len(buffer)) stat = lfc.lfc_filestatg() exitcode = lfc.lfc_statg(lfn, "", stat) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn)) return exitcode exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, 'MD', md5sum) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" % (err_num, lfn, md5sum)) return exitcode tolog("Successfully set md5sum for %s" % (lfn)) return exitcode
def extractSingularityOptions(): """ Extract any singularity options from catchall """ # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'" #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall") # ${workdir} should be there, otherwise the pilot cannot add the current workdir # if not there, add it catchall = readpar("catchall") #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" tolog("catchall: %s" % catchall) pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?") found = re.findall(pattern, catchall) if len(found) > 0: singularity_options = found[0] if singularity_options.endswith("'") or singularity_options.endswith( '"'): singularity_options = singularity_options[:-1] # add the workdir if missing if not "${workdir}" in singularity_options and " --contain" in singularity_options: singularity_options = singularity_options.replace( " --contain", ",${workdir} --contain") tolog("Note: added missing ${workdir} to singularity_options") else: singularity_options = "" return singularity_options
def getSubprocessName(self, eventService): """ Select which subprocess is to be run by the Monitor """ # The default subprocess is RunJob (name='Normal', which performs payload setup, stage-in, payload execution and stage-out). # An alternative subprocess is the runEvent module which downloads events from an Event Server, executes a payload # and stages ou output files asynchronously as they are ready. # Note: send the entire job object to this method since there might be other subprocesses created at a later time which # will be identified by this method using some other job data member # Default subprocess name name = "RunJob" # Select alternative subprocess names for HPCs isHPC, _name = extractHPCInfo(readpar('catchall')) if isHPC: name = "RunJob" + _name # e.g. "RunJobTitan" is the proper subprocess name for the Titan plug-in # for es merge jobs if _name and _name.startswith("Hpc"): name = "RunJob" # Are we going to run an event service job? if eventService: tolog("Encountered an event service job") if isHPC: name = "RunJob%sEvent" % (_name) else: name = "RunJobEvent" tolog("Selected subprocess: %s" % (name)) return name
def fixStageInPath(self, path): """Fix the path""" if path[:3] == "srm" and '?SFN=' in path: self.log("Found SFN part in file path: %s" % (path)) elif path[:3] == "srm": try: hostname = path.split('/',3)[2] except Exception as e: self.log("'!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping path variable as it is: %s (%s)' %\ (path, str(e))") else: # srm = 'srm://head01.aglt2.org' srm = 'srm://' + hostname # does seopt contain any matching srm's? sematch = self.getSEMatchFromSEOpt(srm) if sematch != "": path = path.replace(srm, sematch) self.log("Replaced %s with %s (from seopt) in path: %s" % (srm, sematch, path)) else: se = readpar('se').split(",")[0] _dummytoken, se = self.extractSE(se) tolog("Using SE: %s" % (se)) path = path.replace(srm, se) self.log("Replaced %s with %s (from se) in path: %s" % (srm, se, path)) # add port number from se to getfile if necessary path = self.addPortToPath(se, path) return path
def fixStageInPath(self, path): """Fix the path""" if path[:3] == "srm" and '?SFN=' in path: self.log("Found SFN part in file path: %s" % (path)) elif path[:3] == "srm": try: hostname = path.split('/', 3)[2] except Exception as e: self.log( "'!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping path variable as it is: %s (%s)' %\ (path, str(e))") else: # srm = 'srm://head01.aglt2.org' srm = 'srm://' + hostname # does seopt contain any matching srm's? sematch = self.getSEMatchFromSEOpt(srm) if sematch != "": path = path.replace(srm, sematch) self.log("Replaced %s with %s (from seopt) in path: %s" % (srm, sematch, path)) else: se = readpar('se').split(",")[0] _dummytoken, se = self.extractSE(se) tolog("Using SE: %s" % (se)) path = path.replace(srm, se) self.log("Replaced %s with %s (from se) in path: %s" % (srm, se, path)) # add port number from se to getfile if necessary path = self.addPortToPath(se, path) return path
def getPreDestination(self, sitemover, analJob, token, prodSourceLabel, alt=False): """ get the pre destination """ destination = "" if not analJob: # process the destination path with getDirList since it can have a complex structure # as well as be a list of destination paths matching a corresponding space token if prodSourceLabel == 'ddm' and readpar('seprodpath') == '': sepath = readpar('sepath', alt=alt) else: sepath = readpar('seprodpath', alt=alt) destinationList = sitemover.getDirList(sepath) # decide which destination path to use depending on the space token for the current file if token: # find the proper path destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt) if destination == "": tolog("!!WARNING!!2990!! seprodpath not properly defined: seprodpath = %s, destinationList = %s, using sepath instead" %\ (sepath, str(destinationList))) sepath = readpar('sepath', alt=alt) destinationList = sitemover.getDirList(sepath) destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt) if destination == "": tolog("!!WARNING!!2990!! sepath not properly defined: sepath = %s, destinationList = %s" %\ (sepath, str(destinationList))) else: # space tokens are not used destination = destinationList[0] else: sepath = readpar('sepath', alt=alt) destinationList = sitemover.getDirList(sepath) # decide which destination path to use depending on the space token for the current file if token: # find the proper path destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt) if destination == "": tolog("!!WARNING!!2990!! sepath not properly defined: sepath = %s, destinationList = %s" %\ (sepath, str(destinationList))) else: # space tokens are not used destination = destinationList[0] return destination
def core_get_data(self, envsetup, token, source_surl, local_fullname, experiment): """ special get function developed for storm sites """ error = PilotErrors() # Transform the surl into a full surl full_se_endpoint = self.extractSE(readpar('se').split(",")[0])[1] prefix = os.path.commonprefix([source_surl, full_se_endpoint]) if prefix: # Can use the bdii-free form source_surl = full_se_endpoint + source_surl[len(prefix):] _cmd_str = '%s lcg-gt --nobdii --setype srmv2 "%s" file' % ( envsetup, source_surl) else: # Fallback solution, use old lcg-gt form # get the TURL using the SURL tolog( "!!WARNING!1234!! Source surl does not match %s, cannot use the bdii-independent lcg-gt" % full_se_endpoint) _cmd_str = '%s lcg-gt "%s" file' % (envsetup, source_surl) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() s, o = commands.getstatusoutput(_cmd_str) t1 = os.times() t = t1[4] - t0[4] tolog("Command finished after %f s" % (t)) if s == 0: # get the experiment object thisExperiment = getExperiment(experiment) # add the full stage-out command to the job setup script to_script = _cmd_str to_script = to_script.lstrip(' ') # remove any initial spaces if to_script.startswith('/'): to_script = 'source ' + to_script thisExperiment.updateJobSetupScript( os.path.dirname(local_fullname), to_script=to_script) source_turl, req_token = o.split('\n') source_turl = source_turl.replace('file://', '') tolog("Creating link from %s to %s" % (source_turl, local_fullname)) try: os.symlink(source_turl, local_fullname) _cmd_str = '%s lcg-sd %s %s 0' % (envsetup, source_surl, req_token) tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) # Do we need to check the exit status of lcg-sd? What do we do if it fails? tolog("get_data succeeded") except Exception, e: pilotErrorDiag = "Exception caught: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) tolog("get_data failed") return error.ERR_STAGEINFAILED, pilotErrorDiag
def isTier3(self): """ Is the given site a Tier-3? """ # Note: defined by DB if readpar('ddm') == "local": status = True else: status = False return status
def setRecoveryDirs(self): """ Set the recovery directories """ dirs = [] if self.__site: _dir = self.__site.wntmpdir else: _dir = "" if _dir == "": if os.environ.has_key('TMPDIR'): _dir = os.environ['TMPDIR'] elif os.environ.has_key('OSG_WN_TMP'): _dir = os.environ['OSG_WN_TMP'] elif os.path.exists("/tmp"): _dir = "/tmp" elif os.path.exists("/scratch"): _dir = "/scratch" else: self.__pilotErrorDiag = "Could not locate any scratch dirs" tolog(self.__errorString % self.__pilotErrorDiag) _dir = "" if _dir != "": dirs.append(_dir) extradir = readpar('wntmpdir') if extradir != "" and extradir != "None" and extradir not in dirs: dirs.append(extradir) # check queuedata for external recovery directory # an empty externalRecoveryDir means that recovery should only search local WN disk for lost jobs # make sure the recovery directory actually exists (will not be added to dir list if empty) externalRecoveryDir = self.verifyRecoveryDir(readpar('recoverdir')) if externalRecoveryDir != "": dirs.append(externalRecoveryDir) if dirs != []: tolog("Job recovery will probe: %s" % str(dirs)) self.__recoveryDirs = dirs else: self.__pilotErrorDiag = "Did not identify any base recovery directories" tolog(self.__errorString % self.__pilotErrorDiag)
def forceAlternativeStageOut(self, flag=False): """ Force stage-out to use alternative SE """ # See allowAlternativeStageOut() # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced. tolog("ATLAS") if "force_alt_stageout" in readpar('catchall') and not flag: status = True else: status = False return status
def setEnvVars(sitename): """ Set ATLAS_CONDDB if necessary """ if not os.environ.has_key('ATLAS_CONDDB'): atlas_conddb = readpar('gatekeeper') if atlas_conddb != "to.be.set": os.environ["ATLAS_CONDDB"] = atlas_conddb tolog("Note: ATLAS_CONDDB was not set by the pilot wrapper script") tolog("The pilot has set ATLAS_CONDDB to: %s" % (atlas_conddb)) else: tolog("Warning: ATLAS_CONDDB was not set by the pilot wrapper and schedconfig.gatekeeper value is to.be.set (pilot will take no action)") # set specially requested env vars os.environ["PANDA_SITE_NAME"] = sitename tolog("Set PANDA_SITE_NAME = %s" % (sitename)) copytool = readpar("copytoolin") if copytool == "": copytool = readpar("copytool") if "^" in copytool: copytool = copytool.split("^")[0] os.environ["COPY_TOOL"] = copytool tolog("Set COPY_TOOL = %s" % (copytool))
def core_get_data(self, envsetup, token, source_surl, local_fullname, experiment): """ special get function developed for storm sites """ error = PilotErrors() # Transform the surl into a full surl full_se_endpoint = self.extractSE(readpar('se').split(",")[0])[1] prefix = os.path.commonprefix([source_surl, full_se_endpoint]) if prefix: # Can use the bdii-free form source_surl = full_se_endpoint + source_surl[len(prefix):] _cmd_str = '%s lcg-gt --nobdii --setype srmv2 "%s" file' % (envsetup, source_surl) else: # Fallback solution, use old lcg-gt form # get the TURL using the SURL tolog("!!WARNING!1234!! Source surl does not match %s, cannot use the bdii-independent lcg-gt" % full_se_endpoint) _cmd_str = '%s lcg-gt "%s" file' % (envsetup, source_surl) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() s, o = commands.getstatusoutput(_cmd_str) t1 = os.times() t = t1[4] - t0[4] tolog("Command finished after %f s" % (t)) if s == 0: # get the experiment object thisExperiment = getExperiment(experiment) # add the full stage-out command to the job setup script to_script = _cmd_str to_script = to_script.lstrip(' ') # remove any initial spaces if to_script.startswith('/'): to_script = 'source ' + to_script thisExperiment.updateJobSetupScript(os.path.dirname(local_fullname), to_script=to_script) source_turl, req_token = o.split('\n') source_turl = source_turl.replace('file://','') tolog("Creating link from %s to %s" % (source_turl, local_fullname)) try: os.symlink(source_turl, local_fullname) _cmd_str = '%s lcg-sd %s %s 0' % (envsetup, source_surl, req_token) tolog("Executing command: %s" % (_cmd_str)) s,o = commands.getstatusoutput(_cmd_str) # Do we need to check the exit status of lcg-sd? What do we do if it fails? tolog("get_data succeeded") except Exception, e: pilotErrorDiag = "Exception caught: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) tolog("get_data failed") return error.ERR_STAGEINFAILED, pilotErrorDiag
def executePayload(runCommandList, job): """ execute the payload """ # do not hide the proxy for PandaMover since it needs it or for sites that has sc.proxy = donothide #if 'DDM' not in jobSite.sitename and readpar('proxy') != 'donothide': # # create the proxy guard object (must be created here before the sig2exc()) # proxyguard = ProxyGuard() # # # hide the proxy # hP_ret = proxyguard.hideProxy() # if not hP_ret: # tolog("Warning: Proxy exposed to payload") # run the payload process, which could take days to finish t0 = os.times() res_tuple = (0, 'Undefined') # loop over all run commands (only >1 for multi-trfs) current_job_number = 0 getstatusoutput_was_interrupted = False number_of_jobs = len(runCommandList) for cmd in runCommandList: current_job_number += 1 try: # add the full job command to the job_setup.sh file to_script = cmd.replace(";", ";\n") addToJobSetupScript(to_script, job.workdir) tolog("Executing job command %d/%d: %s" % (current_job_number, number_of_jobs, cmd)) if readpar('glexec').lower() in ['true', 'uid']: # execute trf under glexec res_tuple = executePayloadGLExec(cmd, job) else: # execute trf normally res_tuple = commands.getstatusoutput(cmd) except Exception, e: tolog("!!FAILED!!3000!! Failed to run command %s" % str(e)) getstatusoutput_was_interrupted = True if failureCode: job.result[2] = failureCode tolog("!!FAILED!!3000!! Failure code: %d" % (failureCode)) break else: if res_tuple[0] == 0: tolog("Job command %d/%d finished" % (current_job_number, number_of_jobs)) else: tolog("Job command %d/%d failed: res = %s" % (current_job_number, number_of_jobs, str(res_tuple))) break
def allowAlternativeStageOut(self, flag=False): """ Is alternative stage-out allowed? """ # E.g. if stage-out to primary SE (at Tier-2) fails repeatedly, is it allowed to attempt stage-out to secondary SE (at Tier-1)? # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced. if "allow_alt_stageout" in readpar('catchall') and not flag: status = True else: status = False # if enableT1stageout.lower() == "true" or enableT1stageout.lower() == "retry": # status = True # else: # status = False return status
def setNumberOfCores(self) : """ Report the number of cores in the WN """ # 1. Grab corecount from queuedata # 2. If corecount is number and corecount > 1, set ATHENA_PROC_NUMBER env variable to this value # 3. If corecount is 0, null, or doesn't exist, then don't set the env. variable # 4. If corecount is '-1', then get number of cores from /proc/cpuinfo, and set the env. variable accordingly. cores = [] nCores = None # grab the schedconfig value try: nCores = int(readpar('corecount')) except ValueError: # covers the case 'NULL' tolog("corecount not an integer in queuedata") except Exception, e: tolog("corecount not set in queuedata: %s" % str(e))
def setNumberOfCores(self): """ Report the number of cores in the WN """ # 1. Grab corecount from queuedata # 2. If corecount is number and corecount > 1, set ATHENA_PROC_NUMBER env variable to this value # 3. If corecount is 0, null, or doesn't exist, then don't set the env. variable # 4. If corecount is '-1', then get number of cores from /proc/cpuinfo, and set the env. variable accordingly. cores = [] nCores = None # grab the schedconfig value try: nCores = int(readpar('corecount')) except ValueError: # covers the case 'NULL' tolog("corecount not an integer in queuedata") except Exception, e: tolog("corecount not set in queuedata: %s" % str(e))
def forceAlternativeStageOut(self, **pdict): """ Force stage-out to use alternative SE """ # See allowAlternativeStageOut() # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced. status = False flag = pdict.get('flag', False) altStageOut = pdict.get('altStageOut', False) objectstore = pdict.get('objectstore', False) if not objectstore: if altStageOut == "force": status = True elif "force_alt_stageout" in readpar('catchall') and not flag: status = True else: status = False return status
def getContainerName(user="******"): # E.g. container_type = 'singularity:pilot;docker:wrapper' # getContainerName(user='******') -> return 'singularity' container_name = "" container_type = readpar('container_type') if container_type != "" and user in container_type: try: container_names = container_type.split(';') for name in container_names: t = name.split(':') if user == t[1]: container_name = t[0] except Exception as e: tolog("Failed to parse the container name: %s, %s" % (container_type, e)) else: tolog("Container type not specified in queuedata") return container_name
def _useDirectAccess(LAN=True, WAN=False): """ Should direct i/o be used over LAN or WAN? """ useDA = False if LAN: par = 'direct_access_lan' elif WAN: par = 'direct_access_wan' else: tolog("!!WARNING!!3443!! Bad LAN/WAN combination: LAN=%s, WAN=%s" % (str(LAN), str(WAN))) par = '' if par != '': da = readpar(par) if da: da = da.lower() if da == "true": useDA = True return useDA
def getSpecialSetupCommand(self): """ Set special_setup_cmd if necessary """ # Note: this special setup command is hardly used and could probably be removed # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method # could be added to the run command by using method addSPSetupToCmd(). # the special command is also forwarded to the get and put functions (currently not used) special_setup_cmd = "" # add envsetup to the special command setup on tier-3 sites # (unknown if this is still needed) si = getSiteInformation(self.__experiment) if si.isTier3(): _envsetup = readpar('envsetup') if _envsetup != "": special_setup_cmd += _envsetup if not special_setup_cmd.endswith(';'): special_setup_cmd += ";" return special_setup_cmd
def addMD5sum(self, lfn, md5sum): """ add md5sum to lfn """ if os.environ.has_key("LD_LIBRARY_PATH"): tolog("LD_LIBRARY_PATH prior to lfc import: %s" % os.environ["LD_LIBRARY_PATH"]) else: tolog("!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import") import lfc os.environ["LFC_HOST"] = readpar("lfchost") stat = lfc.lfc_filestatg() exitcode = lfc.lfc_statg(lfn, "", stat) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn)) return exitcode exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, "MD", md5sum) if exitcode != 0: # print "error:",buffer err_num = lfc.cvar.serrno tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" % (err_num, lfn, md5sum)) return exitcode tolog("Successfully set md5sum for %s" % (lfn)) return exitcode
def updateQueuedataFromJobParameters(self, jobParameters): """ Extract queuedata overwrite command from job parameters and update queuedata """ tolog("called updateQueuedataFromJobParameters with: %s" % (jobParameters)) # extract and remove queuedata overwrite command from job parameters if "--overwriteQueuedata" in jobParameters: tolog("Encountered an --overwriteQueuedata command in the job parameters") # (jobParameters might be updated [queuedata overwrite command should be removed if present], so they needs to be returned) jobParameters, queuedataUpdateDictionary = self.extractQueuedataOverwrite(jobParameters) # update queuedata if queuedataUpdateDictionary != {}: tolog("Queuedata will be updated from job parameters") for field in queuedataUpdateDictionary.keys(): ec = self.replaceQueuedataField(field, queuedataUpdateDictionary[field]) tolog("Updated %s in queuedata: %s (read back from file)" % (field, self.readpar(field))) # disable FAX if set in schedconfig if "--disableFAX" in jobParameters: tolog("Encountered a --disableFAX command in the job parameters") # remove string from jobParameters jobParameters = jobParameters.replace(" --disableFAX", "") # update queuedata if necessary if readpar("allowfax").lower() == "true": field = "allowfax" ec = self.replaceQueuedataField(field, "False") tolog("Updated %s in queuedata: %s (read back from file)" % (field, self.readpar(field))) else: tolog("No need to update queuedata for --disableFAX (allowfax is not set to True)") return jobParameters
def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None): """ Update the job status with the jobdispatcher web server. State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp) log = log extracts xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function) jr = job recovery mode """ tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState())) # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately # removes any work directory after the LSF job finishes which of course makes job recovery impossible) if not self.__jobrec: if job.result[0] == 'holding' and site.sitename != "CERNVM": job.result[0] = 'failed' tolog("This site does not support job recovery: HOLDING state reset to FAILED") # note: any changed job state above will be lost for fake server updates, does it matter? # get the node structure expected by the server node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log) # skip the server update (e.g. on NG) if not self.__updateServer: tolog("(fake server update)") return 0, node tolog("xmlstr = %s" % (xmlstr)) # get the xml node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr) # stdout tail in case job.debug == 'true' if job.debug.lower() == "true" and stdout_tail != "": # protection for potentially large tails stdout_tail = stdout_tail[-2048:] node['stdout'] = stdout_tail tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail))) # also send the full stdout to a text indexer if required if stdout_path != "": if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path): tolog("Will send payload stdout to text indexer") # get the user name, which we will use to create a proper filename from SiteMover import SiteMover s = SiteMover() username = s.extractUsername(job.prodUserID) # get setup path for xrdcp try: si = getSiteInformation(job.experiment) setup_path = si.getLocalROOTSetup() filename = "PanDA_payload_stdout-%s.txt" % (job.jobId) dateDirs = self.getDateDirs() remotePath = os.path.join(os.path.join(username, dateDirs), filename) url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath) cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url) tolog("Executing command: %s" % (cmd)) rc, rs = getstatusoutput(cmd) tolog("rc=%d, rs=%s" % (rc, rs)) except Exception, e: tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e)) else: tolog("stdout_path not set")
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() #if proxycheck: # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2) # if s != 0: # self.prepareReport('NO_PROXY', report) # return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) #else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) #here begins the new magic... from Vincenzo Lavorini sitemover = SiteMover.SiteMover() v_path = sitemover.getPathFromScope(scope, filename) rucio_c = Client() if "ATLAS" in token: token_ok = token[+5:] else: token_ok = token local_se_token = self.site_name + "_" + token_ok v_hostname = [ j['hostname'] for j in rucio_c.get_protocols(local_se_token) ] v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)] v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)] v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0]) tolog("prova1 address is %s" % (v_address)) if "rucio/" in v_address and "/rucio" in v_path: v_address = v_address[:-7] tolog("prova2 address is %s" % (v_address)) elif "rucio" in v_address and "rucio" in v_path: v_address = v_address[:-6] tolog("prova3 address is %s" % (v_address)) full_http_surl = v_address + v_path tolog("prova3 full_http__surl is %s" % (full_http_surl)) full_surl = surl if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz #putfile=surl #tolog("putfile: %s" % (putfile)) #tolog("full_surl: %s" % (full_surl)) # get https surl #full_http_surl = full_surl.replace("srm://", "https://") # get the DQ2 site name from ToA ---why? Is it needed? #try: # _dq2SiteName = self.getDQ2SiteName(surl=putfile) #except Exception, e: # tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) #else: # report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) # tolog("DQ2 site name: %s" % (_dq2SiteName)) if testLevel == "1": source = "thisisjustatest" # determine which timeout option to use #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout) timeout_option = "--connect-timeout 300" sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # check htcopy if it is existed or env is set properly #_cmd_str = 'which htcopy' #try: # s, o = commands.getstatusoutput(_cmd_str) #except Exception, e: # tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o)) # o = str(e) #if s != 0: # tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) # o = o.replace('\n', ' ') # tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o))) #return 999999 # cleanup the SURL if necessary (remove port and srm substring) #if token: # used lcg-cp options: # --srcsetype: specify SRM version # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -s: space token description # -b: BDII disabling # -t: time-out # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\ # (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn) # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file] # [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc] # [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2] # [-b,--nobdii] [-t timeout] [-v,--verbose] [-V,--vo vo] [--version] src_file dest_file # surl = putfile[putfile.index('srm://'):] #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token) #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl) #else: # surl is the same as putfile #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl) #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl) _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % ( self.sslKey, self.sslKey, self.sslKey, self.sslCertDir, full_http_surl, source) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True) _cmd_out, _cmd_stderr = _cmd.communicate() report['relativeStart'] = time() report['transferStart'] = time() report['validateStart'] = time() t1 = os.times() t = t1[4] - t0[4] tolog("Curl command output = %s" % (_cmd_out)) tolog("Command finished after %f s" % (t)) if "bytes uploaded" not in _cmd_out: tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str)) ''' # check if file was partially transferred, if so, remove it _ec = self.removeFile(envsetup, self.timeout, dst_gpfn) if _ec == -2: pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out if "Could not establish context" in o: pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) elif "No such file or directory" in o: pilotErrorDiag += "No such file or directory: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('NO_FILE_DIR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) elif "globus_xio: System error" in o: pilotErrorDiag += "Globus system error: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('GLOBUS_FAIL', report) return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag) else: if len(o) == 0 and t >= self.timeout: pilotErrorDiag += "Copy command self timed out after %d s" % (t) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CP_TIMEOUT', report) return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag) else: if len(o) == 0: pilotErrorDiag += "Copy command returned error code %d but no output" % (ec) else: pilotErrorDiag += o self.prepareReport('CP_ERROR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) ''' verified = False #getting the remote checksum from Rucio: token_file = open('token_fle', 'r') token_rucio = token_file.readline() pos2print = token_rucio.find("CN") token_rucio2print = token_rucio[:pos2print] + '(Hidden token)' tolog("Token I am using: %s" % (token_rucio2print)) httpredirector = readpar('httpredirector') trial_n = 1 remote_checksum = "none" while (remote_checksum == "none" and trial_n < 8): trial_n += 1 if not httpredirector: #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename) cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio, scope, filename) cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio2print, scope, filename) else: if "http" in httpredirector: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, scope, filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucioi2print, httpredirector, scope, filename) else: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, reps[0].scope, reps[0].filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio2print, httpredirector, reps[0].scope, reps[0].filename) tolog("Getting remote checksum: command to be executed: %s" % (cmd2print)) checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) remote_checksum, stderr = checksum_cmd.communicate() tolog("Remote checksum as given by rucio %s" % (remote_checksum)) if (remote_checksum == "none"): tolog("In checking checksum: command std error: %s" % (stderr)) pilotErrorDiag = "Cannot get the checksum of file on SE" tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag)) tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n)) time.sleep(3) # try to get the remote checksum with lcg-get-checksum #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl) #if not remote_checksum: # # try to grab the remote file info using lcg-ls command # remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl) #else: # tolog("Setting remote file size to None (not needed)") # remote_fsize = None # compare the checksums if the remote checksum was extracted tolog("Remote checksum: %s" % str(remote_checksum)) tolog("Local checksum: %s" % (fchecksum)) if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum) tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag)) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True else: tolog( "Skipped primary checksum verification (remote checksum not known)" ) # if lcg-ls could not be used if "/pnfs/" in surl and not remote_checksum: # for dCache systems we can test the checksum with the use method tolog( "Detected dCache system: will verify local checksum with the local SE checksum" ) # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0.... path = surl[surl.find('/pnfs/'):] # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....# tolog("File path: %s" % (path)) _filename = os.path.basename(path) _dir = os.path.dirname(path) # get the remote checksum tolog("Local checksum: %s" % (fchecksum)) try: remote_checksum = self.getdCacheChecksum(_dir, _filename) except Exception, e: pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str( e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) else: if remote_checksum == "NOSUCHFILE": pilotErrorDiag = "The pilot will fail the job since the remote file does not exist" tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('NOSUCHFILE', report) return self.put_data_retfail(error.ERR_NOSUCHFILE, pilotErrorDiag, surl=full_surl) elif remote_checksum: tolog("Remote checksum: %s" % (remote_checksum)) else: tolog("Could not get remote checksum") if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, _filename, remote_checksum, fchecksum) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True
def getJobExecutionCommandObsolete(self, job, jobSite, pilot_initdir): """ Define and test the command(s) that will be used to execute the payload """ # Input tuple: (method is called from RunJob*) # job: Job object # jobSite: Site object # pilot_initdir: launch directory of pilot.py # # Return tuple: # pilot_error_code, pilot_error_diagnostics, job_execution_command, special_setup_command, JEM, cmtconfig # where # pilot_error_code : self.__error.<PILOT ERROR CODE as defined in PilotErrors class> (value should be 0 for successful setup) # pilot_error_diagnostics: any output from problematic command or explanatory error diagnostics # job_execution_command : command to execute payload, e.g. cmd = "source <path>/setup.sh; <path>/python trf.py [options]" # special_setup_command : any special setup command that can be insterted into job_execution_command and is sent to stage-in/out methods # JEM : Job Execution Monitor activation state (default value "NO", meaning JEM is not to be used. See JEMstub.py) # cmtconfig : cmtconfig symbol from the job def or schedconfig, e.g. "x86_64-slc5-gcc43-opt" [NOT USED IN THIS CLASS] pilotErrorDiag = "" cmd = "" special_setup_cmd = "" pysiteroot = "" siteroot = "" JEM = "NO" cmtconfig = "" # Is it's an analysis job or not? analysisJob = isAnalysisJob(job.trf) # Set the INDS env variable (used by runAthena) if analysisJob: self.setINDS(job.realDatasetsIn) # Command used to download runAthena or runGen wgetCommand = 'wget' # special setup for NG status, pilotErrorDiag, cmd = self.setupNordugridTrf( job, analysisJob, wgetCommand, pilot_initdir) if status != 0: return status, pilotErrorDiag, "", special_setup_cmd, JEM, cmtconfig # add FRONTIER debugging and RUCIO env variables cmd = self.addEnvVars2Cmd(cmd, job.jobId, job.taskID, job.processingType, jobSite.sitename, analysisJob) if readpar('cloud') == "DE": # Should JEM be used? metaOut = {} try: import sys from JEMstub import updateRunCommand4JEM # If JEM should be used, the command will get updated by the JEMstub automatically. cmd = updateRunCommand4JEM(cmd, job, jobSite, tolog, metaOut=metaOut) except: # On failure, cmd stays the same tolog( "Failed to update run command for JEM - will run unmonitored." ) # Is JEM to be used? if metaOut.has_key("JEMactive"): JEM = metaOut["JEMactive"] tolog("Use JEM: %s (dictionary = %s)" % (JEM, str(metaOut))) elif '--enable-jem' in cmd: tolog( "!!WARNING!!1111!! JEM can currently only be used on certain sites in DE" ) # Pipe stdout/err for payload to files cmd += " 1>%s 2>%s" % (job.stdout, job.stderr) tolog("\nCommand to run the job is: \n%s" % (cmd)) tolog("ATLAS_PYTHON_PILOT = %s" % (os.environ['ATLAS_PYTHON_PILOT'])) if special_setup_cmd != "": tolog("Special setup command: %s" % (special_setup_cmd)) return 0, pilotErrorDiag, cmd, special_setup_cmd, JEM, cmtconfig
self.prepareReport('DONE', report) return (0, pilotErrorDiag, r_gpfn, fsize, fchecksum, 'P') def getPnfsid(self, pnfs, guid): """ get PNFSID from BNL LFC """ try: import lfc except Exception, e: pilotErrorDiag = "getPnfsid() could not import lfc module: %s" % str( e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) return None os.environ['LFC_HOST'] = readpar('lfchost') s, replicas = lfc.lfc_getreplicax('', guid, '') if s != 0: pilotErrorDiag = "Fail to get PNFSID for guid: %s" % guid tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) return None else: for replica in replicas: if pnfs in replica.sfn: pnfsid = replica.setname if pnfsid == "-1": pilotErrorDiag = "getPnfsid() returned -1: File does not exist in dCache" tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) return None elif pnfsid == "": pilotErrorDiag = "getPnfsid() returned nothing: PNFSID will not be used."
srmhost = self.hostFromSurl(dirAcc['oldPrefix']) for guid in replicas.keys(): reps = replicas[guid] tolog("Got replicas=%s for guid=%s" % (str(reps), guid)) try: token_file = open('token_file', 'r') except IOError, e: tolog("!!WARNING!! Failed to open file: %s" % (e)) raise Exception("!!FAILED!!1099!! Cannot open file with token!") else: token_rucio = token_file.readline() pos2print = token_rucio.find("CN") token_rucio2print = token_rucio[:pos2print] + '(Hidden token)' tolog("Token I am using: %s" % (token_rucio2print)) httpredirector = readpar('httpredirector') if not httpredirector: cmd = "curl -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip" % ( token_rucio, reps[0].scope, reps[0].filename) cmd2print = "curl -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip" % ( token_rucio2print, reps[0].scope, reps[0].filename) else: if "http" in httpredirector: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip" % ( token_rucio, httpredirector, reps[0].scope, reps[0].filename) cmd2print = "curl -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip" % ( token_rucio2print, httpredirector, reps[0].scope, reps[0].filename) else:
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file (local access to the dCache file) is assumed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') analJob = pdict.get('analJob', False) timeout = pdict.get('timeout', 5 * 3600) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'BNLdCache', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) copyprefixin = readpar('copyprefixin') if copyprefixin != '': # Extract the copy prefix pfrom, pto = copyprefixin.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefixin to file: %s" % (loc_pfn)) else: copyprefix = readpar('copyprefix') if copyprefix != '': # Extract the copy prefix pfrom, pto = copyprefix.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefix to file: %s" % (loc_pfn)) report['relativeStart'] = time.time() pnfsid = self.getPnfsid(loc_pfn, guid) # for analysis jobs, skip input file if on tape or if lib file if analJob: if not self.isLibFile(loc_pfn): if pnfsid == None: isStaged = self.isFileStaged(_setup_str, loc_pfn) else: _com = "/cacheinfos/isFileInPool?pnfsid=%s" % (pnfsid) isStaged = self.isFileStaged( _setup_str, loc_pfn, url="ddmv02.usatlas.bnl.gov:8000", com=_com) if not isStaged: pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % ( loc_pfn) self.prepareReport('FILE_ON_TAPE', report) return error.ERR_FILEONTAPE, pilotErrorDiag else: tolog("Skipping file stage check for lib file") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) if pnfsid == None: _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path) else: _cmd_str = '%sdccp pnfs://dcdcap.usatlas.bnl.gov:22125/%s %s' % ( _setup_str, pnfsid, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time.time() try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" % str(e)) s = 1 o = str(e) telapsed = timeout
# Get runJob object runJob = RunJobEdison() # Setup HPC specific parameters for Edison runJob.cpu_number_per_node = 24 runJob.walltime = 120 runJob.max_nodes = 10 runJob.number_of_threads = 1 runJob.min_walltime = 10 runJob.waittime = 15 runJob.nodes = 2 runJob.partition_comp = 'edison' runJob.project_id = "" runJob.executed_queue = readpar('localqueue') # Define a new parent group os.setpgrp() # Protect the runJob code with exception handling hP_ret = False try: # always use this filename as the new jobDef module name import newJobDef jobSite = Site.Site() return_tuple = runJob.argumentParser() tolog("argumentParser returned: %s" % str(return_tuple)) jobSite.setSiteInfo(return_tuple)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'local' # mark the relative start report['relativeStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-','') if not path: tolog('path is empty, using current directory') path = os.getcwd() # build setup string envsetup = self.getEnvsetup(get=True) # should the root file be copied or read directly by athena? directIn = False dInfo = getDirectAccessDic(readpar('copysetupin')) # if copysetupin did not contain direct access info, try the copysetup instead if not dInfo: dInfo = getDirectAccessDic(readpar('copysetup')) tolog("dInfo: %s" % str(dInfo)) # check if we should use the copytool if dInfo: directIn = dInfo['directIn'] if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) return 0, self.__pilotErrorDiag else: tolog("Normal file transfer") else: tolog("not directIn") # build the get command _params = "" if fchecksum and fchecksum != 'None' and fchecksum != 0 and fchecksum != "0" and not self.isDummyChecksum(fchecksum): csumtype = self.getChecksumType(fchecksum) # special case for md5sum (command only understands 'md5' and 'adler32', and not 'ad' and 'md5sum') if csumtype == 'md5sum': csumtype = 'md5' execStr = self.__localget % (envsetup, _params, gpfn, os.path.join(path, lfn)) tolog("Executing command: %s" % (execStr)) report['transferStart'] = time() try: status, telapsed, cout, cerr = timed_command(execStr, self.__timeout) except Exception, e: self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str(e) tolog(self.__warningStr % self.__pilotErrorDiag) status = 1 output = str(e) telapsed = self.__timeout