def initFromEnv(self): """ Init the class taking the required information from the environment """ self.command = 'scram' self["SCRAM_ARCH"] = None if 'SCRAM_ARCH' in os.environ: self["SCRAM_ARCH"] = os.environ["SCRAM_ARCH"] else: #I am not sure we should keep this fallback.. # subprocess.check_output([self.command, 'arch']).strip() # Python 2.7 and later self["SCRAM_ARCH"] = subprocess.Popen([self.command, 'arch'], stdout=subprocess.PIPE)\ .communicate()[0].strip() try: self["CMSSW_BASE"] = os.environ["CMSSW_BASE"] self["CMSSW_VERSION"] = os.environ["CMSSW_VERSION"] # Commenting these two out. I don't think they are really needed # self.cmsswReleaseBase = os.environ["CMSSW_RELEASE_BASE"] # self.localRT = os.environ["LOCALRT"] except KeyError as ke: self["CMSSW_BASE"] = None self["CMSSW_VERSION"] = None # self.cmsswReleaseBase = None # self.localRT = None msg = "Please make sure you have setup the CMS enviroment (cmsenv). Cannot find %s in your env" % str( ke) msg += "\nPlease refer to https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookCRAB3Tutorial#CMS_environment for how to setup the CMS enviroment." raise EnvironmentException(msg)
def proxy(self): try: proxy = Proxy(self.defaultDelegation) except CredentialException, ex: self.logger.debug(ex) raise EnvironmentException('Problem with Grid environment: %s ' % ex._message)
def initFromEnv(self): """ Init the class taking the required information from the environment """ #self.command = 'scram' # SB I think this line is not needed self["SCRAM_ARCH"] = None if 'SCRAM_ARCH' in os.environ: self["SCRAM_ARCH"] = os.environ["SCRAM_ARCH"] else: stdout, _, _ = execute_command(command='scram arch') self["SCRAM_ARCH"] = stdout try: self["CMSSW_BASE"] = os.environ["CMSSW_BASE"] self["CMSSW_VERSION"] = os.environ["CMSSW_VERSION"] # Commenting these two out. I don't think they are really needed # self.cmsswReleaseBase = os.environ["CMSSW_RELEASE_BASE"] # self.localRT = os.environ["LOCALRT"] except KeyError as ke: self["CMSSW_BASE"] = None self["CMSSW_VERSION"] = None # self.cmsswReleaseBase = None # self.localRT = None msg = "Please make sure you have setup the CMS enviroment (cmsenv). Cannot find %s in your env" % str( ke) msg += "\nPlease refer to https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookCRAB3Tutorial#CMS_environment for how to setup the CMS enviroment." raise EnvironmentException(msg)
def __init__(self, logger=None): self.logger = logger self.command = 'scram' self.scramArch = None if os.environ.has_key("SCRAM_ARCH"): self.scramArch = os.environ["SCRAM_ARCH"] else: # subprocess.check_output([self.command, 'arch']).strip() # Python 2.7 and later self.scramArch = subprocess.Popen([self.command, 'arch'], stdout=subprocess.PIPE)\ .communicate()[0].strip() try: self.cmsswBase = os.environ["CMSSW_BASE"] self.cmsswReleaseBase = os.environ["CMSSW_RELEASE_BASE"] self.cmsswVersion = os.environ["CMSSW_VERSION"] self.localRT = os.environ["LOCALRT"] except KeyError: self.cmsswBase = None self.cmsswReleaseBase = None self.cmsswVersion = None self.localRT = None msg = "Please make sure you have setup the CMS enviroment (cmsenv)." msg += "\nPlease refer to https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookCRAB3Tutorial#CMS_environment for how to setup the CMS enviroment." raise EnvironmentException(msg) self.logger.debug("Found %s for %s with base %s" % (self.cmsswVersion, self.scramArch, self.cmsswBase))
def proxy(self): try: proxy = Proxy(self.defaultDelegation) except CredentialException as ex: self.logger.debug(ex) raise EnvironmentException('Problem with Grid environment: %s ' % str(ex)) return proxy
def moveCfgFile(self, cfgOutputName): bootCfgname = os.path.join(os.environ['CRAB3_BOOTSTRAP_DIR'], BOOTSTRAP_CFGFILE) bootCfgPklname = os.path.join(os.environ['CRAB3_BOOTSTRAP_DIR'], BOOTSTRAP_CFGFILE_PKL) bootCfgDumpname = os.path.join(os.environ['CRAB3_BOOTSTRAP_DIR'], BOOTSTRAP_CFGFILE_DUMP) if not os.path.isfile(bootCfgname) or not os.path.isfile(bootCfgPklname): msg = "The CRAB3_BOOTSTRAP_DIR environment variable is set, but I could not find %s or %s" % (bootCfgname, bootCfgPklname) raise EnvironmentException(msg) else: try: destination = os.path.dirname(cfgOutputName) shutil.move(bootCfgname, destination) shutil.move(bootCfgPklname, destination) if os.path.isfile(bootCfgDumpname): shutil.move(bootCfgDumpname, destination) except Exception as ex: msg = "Cannot move either %s or %s to %s. Error is: %s" % (bootCfgname, bootCfgPklname, destination, ex) raise EnvironmentException(msg)
def getCfgInfo(self): bootFilename = os.path.join(os.environ['CRAB3_BOOTSTRAP_DIR'], BOOTSTRAP_INFOFILE) if not os.path.isfile(bootFilename): msg = "The CRAB3_BOOTSTRAP_DIR environment variable is set, but I could not find %s" % bootFilename raise EnvironmentException(msg) else: with open(bootFilename) as fd: return json.load(fd)
def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root, _, files in os.walk(dir_, followlinks=True): for file_ in files: os.stat(os.path.join(root, file_)) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_, msg) raise EnvironmentException(err)
def crabcachepath(self): if 'CRAB3_CACHE_FILE' in os.environ: if os.path.isabs(os.environ['CRAB3_CACHE_FILE']): return os.environ['CRAB3_CACHE_FILE'] else: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Invalid path in environment variable CRAB3_CACHE_FILE: %s" % (os.environ['CRAB3_CACHE_FILE']) msg += " Please export a valid full path." raise EnvironmentException(msg) else: return str(os.path.expanduser('~')) + '/.crab3'
def initFromFile(self): """ Init the class taking the required information from the boostrap file """ bootFilename = os.path.join(os.environ['CRAB3_BOOTSTRAP_DIR'], BOOTSTRAP_ENVFILE) if not os.path.isfile(bootFilename): msg = "The CRAB3_BOOTSTRAP_DIR environment variable is set, but I could not find %s" % bootFilename raise EnvironmentException(msg) else: with open(bootFilename) as fd: self.update(json.load(fd))
def crabcachepath(self): if 'CRAB3_CACHE_FILE' in os.environ and os.path.isabs( os.environ['CRAB3_CACHE_FILE']): return os.environ['CRAB3_CACHE_FILE'] elif 'CRAB3_CACHE_FILE' in os.environ and not os.path.isabs( os.environ['CRAB3_CACHE_FILE']): msg = '%sError%s: An invalid path is use for CRAB3_CACHE_FILE, please export a valid full path' % ( colors.RED, colors.NORMAL) raise EnvironmentException(msg) else: return str(os.path.expanduser('~')) + '/.crab3'
def createNewVomsProxy(self, timeLeftThreshold=0, proxyCreatedByCRAB=False, proxyOptsSetPlace=None): """ Handles the proxy creation: - checks if a valid proxy still exists - performs the creation if it is expired - returns a dictionary with keys: filename timelect actimeleft userdn """ proxyInfo = {} ## TODO add the change to have user-cert/key defined in the config. #proxy = self.vomsProxy() try: proxy = VomsProxy(logger=self.defaultDelegation['logger']) proxy.setVOGroupVORole(group=self.defaultDelegation['group'], role=self.defaultDelegation['role']) except ProxyCreationException as ex: self.logger.debug(ex) raise EnvironmentException('Problem with Grid environment: %s ' % str(ex)) self.logger.debug("Checking credentials") proxyFileName = proxy.getFilename() proxyInfo['filename'] = proxyFileName if not os.path.isfile(proxyFileName): self.logger.debug("Proxy file %s not found" % (proxyFileName)) proxyTimeLeft = 0 else: self.logger.debug("Found proxy file %s" % (proxyFileName)) self.logger.debug("Getting proxy life time left") proxyTimeLeft = proxy.getTimeLeft() hours, minutes, seconds = int(proxyTimeLeft / 3600), int( (proxyTimeLeft % 3600) / 60), int((proxyTimeLeft % 3600) % 60) self.logger.debug("Proxy valid for %02d:%02d:%02d hours" % (hours, minutes, seconds)) ## Create a new proxy if the current one is expired proxyInfo['timeleft'] = proxyTimeLeft if proxyTimeLeft < timeLeftThreshold or self.proxyChanged: msg = "Creating new proxy for %s hours" % ( self.defaultDelegation['proxyValidity']) self.logger.debug(msg) ## Create the proxy. proxy.create() proxyTimeLeft = proxy.getTimeLeft() proxyInfo['timeleft'] = proxyTimeLeft if proxyTimeLeft > 0: self.logger.debug("Proxy created.") else: raise ProxyCreationException("Problems creating proxy.") return proxyInfo
def run(self, filecacheurl=None): """ Override run() for JobType """ configArguments = { 'addoutputfiles': [], 'adduserfiles': [], 'tfileoutfiles': [], 'edmoutfiles': [], } if getattr(self.config.Data, 'useParent', False) and getattr( self.config.Data, 'secondaryInputDataset', None): msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together." raise ConfigurationException(msg) # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({ 'jobarch': scram.getScramArch(), 'jobsw': scram.getCmsswVersion() }) # Build tarball if self.workdir: tarUUID = PandaInterface.wrappedUuidGen() self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID + 'default.tgz') cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE) else: raise EnvironmentException( 'Problem with uuidgen while preparing for Sandbox upload.') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset ## Create CMSSW config. self.logger.debug("self.config: %s" % (self.config)) self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName)) ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent ## in the sense that a second loading of the same pset may not produce the same ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW ## pset twice. However, some "complicated" psets seem to evade the caching. ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that ## it can be reused later if wanted (for example, in PrivateMC when checking if ## the pset has an LHE source) instead of having to load the pset again. ## As for what does "complicated" psets mean, Daniel Riley said that there are ## some psets where one module modifies the configuration from another module. self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## If there is a CMSSW pset, do a basic validation of it. if not bootstrapDone() and self.config.JobType.psetName: valid, msg = self.cmsswCfg.validateConfig() if not valid: raise ConfigurationException(msg) ## We need to put the pickled CMSSW configuration in the right place. ## Here, we determine if the bootstrap script already run and prepared everything ## for us. In such case we move the file, otherwise we pickle.dump the pset if not bootstrapDone(): # Write out CMSSW config self.cmsswCfg.writeFile(cfgOutputName) else: # Move the pickled and the configuration files created by the bootstrap script self.moveCfgFile(cfgOutputName) ## Interrogate the CMSSW pset for output files (only output files produced by ## PoolOutputModule or TFileService are identified automatically). Do this ## automatic detection even if JobType.disableAutomaticOutputCollection = True, ## so that we can still classify the output files in EDM, TFile and additional ## output files in the Task DB (and the job ad). ## TODO: Do we really need this classification at all? cmscp and PostJob read ## the FJR to know if an output file is EDM, TFile or other. edmfiles, tfiles = self.cmsswCfg.outputFiles() ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile ## output files that are not listed in JobType.outputFiles. if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): outputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) ] edmfiles = [file for file in edmfiles if file in outputFiles] tfiles = [file for file in tfiles if file in outputFiles] ## Get the list of additional output files that have to be collected as given ## in JobType.outputFiles, but remove duplicates listed already as EDM files or ## TFiles. addoutputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles + tfiles ] self.logger.debug( "The following EDM output files will be collected: %s" % edmfiles) self.logger.debug( "The following TFile output files will be collected: %s" % tfiles) self.logger.debug( "The following user output files will be collected: %s" % addoutputFiles) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) ## Give warning message in case no output file was detected in the CMSSW pset ## nor was any specified in the CRAB configuration. if not configArguments['edmoutfiles'] and not configArguments[ 'tfileoutfiles'] and not configArguments['addoutputfiles']: msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration" msg += " and no output file was explicitly specified in the CRAB configuration." else: msg += " CRAB could not detect any output file in the CMSSW configuration" msg += " nor was any explicitly specified in the CRAB configuration." msg += " Hence CRAB will not collect any output file from this task." self.logger.warning(msg) ## UserTarball calls ScramEnvironment which can raise EnvironmentException. ## Since ScramEnvironment is already called above and the exception is not ## handled, we are sure that if we reached this point it will not raise EnvironmentException. ## But otherwise we should take this into account. with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', []) ] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [ os.path.basename(f) for f in inputFiles ] try: uploadResult = tb.upload(filecacheurl=filecacheurl) except HTTPException as hte: if 'X-Error-Info' in hte.headers: reason = hte.headers['X-Error-Info'] reason_re = re.compile( r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$' ) re_match = reason_re.match(reason) if re_match: ISBSize = int(re_match.group(1)) ISBSizeLimit = int(re_match.group(2)) reason = "%sError%s:" % (colors.RED, colors.NORMAL) reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % ( ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024) ISBContent = sorted(tb.content, reverse=True) biggestFileSize = ISBContent[0][0] ndigits = int( math.ceil(math.log(biggestFileSize + 1, 10))) reason += "\nInput sanbox content sorted by size[Bytes]:" for (size, name) in ISBContent: reason += ("\n%" + str(ndigits) + "s\t%s") % (size, name) raise ClientException(reason) raise hte except Exception as e: msg = ( "Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n" "More details can be found in %s" % (e, self.logger.logfile)) LOGGERS['CRAB3'].exception( msg) #the traceback is only printed into the logfile raise ClientException(msg) configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = "%s.tar.gz" % uploadResult self.logger.debug("Result uploading input files: %(cachefilename)s " % configArguments) # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug( "Attaching list of user-specified primary input files.") userFilesList = map(string.strip, userFilesList) userFilesList = [file for file in userFilesList if file] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries." msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) configArguments['primarydataset'] = getattr( self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles') lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name)) try: lumi_list = getLumiList(lumi_mask_name, logger=self.logger) except ValueError as ex: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex) raise ConfigurationException(msg) run_ranges = getattr(self.config.Data, 'runRange', None) if run_ranges: run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) if not lumi_list: msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null." raise ConfigurationException(msg) else: if len(run_list) > 50000: msg = "CRAB configuration parameter Data.runRange includes %s runs." % str( len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs=run_list) else: msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'" raise ConfigurationException(msg) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [ str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(' ', '') for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments
def __init__(self, logger, cmdargs=None, disable_interspersed_args=False): """ Initialize common client parameters """ if not hasattr(self, 'name'): self.name = self.__class__.__name__ ConfigCommand.__init__(self) ## The command logger. self.logger = logger self.logfile = self.logger.logfile localSystem = subprocess.check_output(['uname', '-a']).strip('\n') try: localOS = subprocess.check_output( ['grep', 'PRETTY_NAME', '/etc/os-release'], stderr=subprocess.STDOUT).strip('\n') localOS = localOS.split('=')[1].strip('"') except: try: localOS = subprocess.check_output(['lsb_release', '-d']).strip('\n') localOS = localOS.split(':')[1].strip() except: localOS = "Unknown Operating System" self.logger.debug("Running on: " + localSystem + " - " + localOS) opensslInfo = subprocess.check_output(["openssl", "version"]).strip('\n') self.logger.debug("OpenSSl version: %s", opensslInfo) opensslVersion = opensslInfo.split()[1] nDots = opensslVersion.count(".") if float(opensslVersion.rsplit(".", nDots - 1)[0]) > 1: raise EnvironmentException( "Your OpenSSl version (%s) is not supported. Supported versions are < 1.1" % opensslVersion) self.logger.debug("Executing command: '%s'" % str(self.name)) self.proxy = None self.restClass = CRABClient.Emulator.getEmulator('rest') ## Get the command configuration. self.cmdconf = commandsConfiguration.get(self.name) if not self.cmdconf: raise RuntimeError( "Canot find command %s in commandsConfiguration inside ClientMapping. Are you a developer" "trying to add a command without it's correspondant configuration?" % self.name) ## Get the CRAB cache file. self.cachedinfo = None self.crab3dic = self.getConfiDict() ## The options parser. self.parser = CRABCmdOptParser(self.name, self.__doc__, disable_interspersed_args) ## Define the command options. self.setSuperOptions() ## Parse the command options/arguments. cmdargs = cmdargs or [] (self.options, self.args) = self.parser.parse_args(cmdargs) self.transferringIds = None self.dest = None self.validateLogpathOption() ## Validate first the SubCommand options SubCommand.validateOptions(self) ## then the config option for the submit command self.validateConfigOption() ## Get the VO group/role from the command options (if the command requires these ## options). proxyOptsSetPlace = { 'set_in': { 'group': "default", 'role': "default" }, 'for_set_use': "" } msgadd = [] self.voGroup, self.voRole = "", "NULL" if self.cmdconf['requiresProxyVOOptions']: proxyOptsSetPlace['for_set_use'] = "cmdopts" if self.options.voGroup is not None: self.voGroup = self.options.voGroup proxyOptsSetPlace['set_in']['group'] = "cmdopts" msgadd.append("VO group '%s'" % (self.voGroup)) if self.options.voRole is not None: self.voRole = self.options.voRole if self.options.voRole != "" else "NULL" proxyOptsSetPlace['set_in']['role'] = "cmdopts" msgadd.append("VO role '%s'" % (self.voRole)) if msgadd: msg = "Using %s as specified in the crab command options." % ( " and ".join(msgadd)) self.logger.debug(msg) ## Create the object that will do the proxy operations. We don't really care ## what VO role and group and server URL we pass to the constructor, because ## these are not used until we do the proxy delegation to the myproxy server. ## And this happens in handleProxy(), which is called after we load the ## configuration file and retrieve the final values for those parameters. ## handleProxy() takes care of passing those parameters to self.proxy. self.proxy = CredentialInteractions('', '', self.voRole, self.voGroup, self.logger, '') ## If the user didn't use the --proxy command line option, and if there isn't a ## valid proxy already, we create a new one with the current VO role and group ## (as commented above, we don't really care what are the VO role and group so ## far). self.proxyCreated = False if not self.options.proxy and self.cmdconf['initializeProxy']: self.proxyCreated = self.proxy.createNewVomsProxySimple( timeLeftThreshold=720) ## If there is an input configuration file: if hasattr(self.options, 'config') and self.options.config is not None: proxyOptsSetPlace['for_set_use'] = "config" ## Load the configuration file and validate it. self.loadConfig(self.options.config, self.args) ## Create the CRAB project directory. self.requestarea, self.requestname, self.logfile = createWorkArea(self.logger, \ getattr(self.configuration.General, 'workArea', None), \ getattr(self.configuration.General, 'requestName', None)) ## Get the VO group/role from the configuration file. msgadd = [] if hasattr(self.configuration, 'User') and hasattr( self.configuration.User, 'voGroup'): self.voGroup = self.configuration.User.voGroup proxyOptsSetPlace['set_in']['group'] = "config" msgadd.append("VO group '%s'" % (self.voGroup)) if hasattr(self.configuration, 'User') and hasattr( self.configuration.User, 'voRole'): self.voRole = self.configuration.User.voRole if self.configuration.User.voRole != "" else "NULL" proxyOptsSetPlace['set_in']['role'] = "config" msgadd.append("VO role '%s'" % (self.voRole)) if msgadd: msg = "Using %s as specified in the CRAB configuration file." % ( " and ".join(msgadd)) self.logger.debug(msg) ## If the VO group/role was not given in the command options, take it from the request cache. if self.cmdconf['requiresDirOption']: self.setCachedProxy(proxyOptsSetPlace) ## If the server URL isn't already set, we check the args and then the config. if not hasattr(self, 'serverurl') and self.cmdconf['requiresREST']: self.instance, self.serverurl = self.serverInstance() elif not self.cmdconf['requiresREST']: self.instance, self.serverurl = None, None ## Update (or create) the CRAB cache file. self.updateCRABCacheFile() ## At this point there should be a valid proxy, because we have already checked that and ## eventually created a new one. If the proxy was not created by CRAB, we check that the ## VO role/group in the proxy are the same as specified by the user in the configuration ## file (or in the command line options). If it is not, we ask the user if he wants to ## overwrite the current proxy. If he doesn't want to overwrite it, we don't continue ## and ask him to provide the VO role/group as in the existing proxy. ## Finally, delegate the proxy to myproxy server. self.handleProxy(proxyOptsSetPlace) ## Validate the command options self.validateOptions() ## Logging user command and options used for debuging purpose. self.logger.debug('Command use: %s' % self.name) self.logger.debug('Options use: %s' % cmdargs) if self.cmdconf['requiresREST']: self.checkversion(getUrl(self.instance, resource='info')) self.uri = getUrl(self.instance) self.logger.debug("Instance is %s" % (self.instance)) self.logger.debug("Server base url is %s" % (self.serverurl)) if self.cmdconf['requiresREST']: self.logger.debug("Command url %s" % (self.uri))
def run(self, filecacheurl = None): """ Override run() for JobType """ configArguments = {'addoutputfiles' : [], 'adduserfiles' : [], 'tfileoutfiles' : [], 'edmoutfiles' : [], } # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({'jobarch' : scram.scramArch, 'jobsw' : scram.cmsswVersion, }) # Build tarball if self.workdir: tarUUID = PandaInterface.wrappedUuidGen() self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID +'default.tgz') cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py') else: raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset # configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None) # Create CMSSW config self.logger.debug("self.config: %s" % self.config) self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName) cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here. edmfiles, tfiles = cmsswCfg.outputFiles() addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles] self.logger.debug("The following EDM output files will be collected: %s" % edmfiles) self.logger.debug("The following TFile output files will be collected: %s" % tfiles) self.logger.debug("The following user output files will be collected: %s" % addoutputFiles) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) # Write out CMSSW config cmsswCfg.writeFile(cfgOutputName) ## UserTarball calls ScramEnvironment which can raise EnvironmentException. ## Since ScramEnvironment is already called above and the exception is not ## handled, we are sure that if we reached this point it will not raise EnvironmentException. ## But otherwise we should take this into account. with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles] uploadResults = tb.upload(filecacheurl = filecacheurl) self.logger.debug("Result uploading input files: %s " % str(uploadResults)) configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = uploadResults[0] isbchecksum = uploadResults[1] # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug("Attaching list of user-specified primary input files.") userFilesList = map(string.strip, userFilesList) userFilesList = [file for file in userFilesList if file] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s: CRAB configuration parameter Data.userInputFiles contains duplicated entries." % (colors.RED, colors.NORMAL) msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) ## Get the user-specified primary dataset name. primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles') # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar" primaryDataset = "/" + os.path.join(*primaryDataset.split("/")) if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset): self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset)) configArguments['inputdata'] = primaryDataset lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name) lumi_list = getLumiList(lumi_mask_name, logger = self.logger) run_ranges = getattr(self.config.Data, 'runRange', None) run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) else: if len(run_list) > 50000: msg = "Data.runRange includes %s runs." % str(len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs = run_list) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments, isbchecksum