def split(self, job): '''The method creates and returns an array of subjobs starting from the job passed as parameter''' logger = Ganga.Utility.logging.getLogger() logger.debug('SBSubmission split called.') subjobs = [] # check the input mode (dir, none or list) if job.inputdata.input_mode in ('dir', 'none'): # in dir or none mode, user has to define the desired number of subjobs if job.inputdata.number_of_subjobs <= 0: raise ApplicationConfigurationError( None, 'You must define the number of subjobs.') for i in xrange(job.inputdata.number_of_subjobs): j = self.createSubjob(job) subjobs.append(j) elif job.inputdata.input_mode == 'list': # in list mode user has to define one or more input paths (i.e. one or more files): each list file contains # a list of path of files that become the input of a job if len(job.inputdata.input_path) <= 0: raise ApplicationConfigurationError( None, 'You must define an input file list.') # for each list file (i.e. each element in input_path array) a subjob will be created for f in job.inputdata.input_path: j = self.createSubjob(job) j.inputsandbox = [f] subjobs.append(j) else: raise ApplicationConfigurationError(None, 'input_mode not recognized.') logger.debug('%d subjobs created.' % len(subjobs)) return subjobs
def getOptsFiles(self): """ This function returns a sanitized absolute path to the self.options file from user input """ if self.options: for this_opt in self.options: if isinstance(this_opt, LocalFile): ## FIXME LocalFile should return the basename and folder in 2 attibutes so we can piece it together, now it doesn't full_path = path.join(this_opt.localDir, this_opt.namePattern) if not path.exists(full_path): raise ApplicationConfigurationError( None, "Opts File: \'%s\' has been specified but does not exist please check and try again!" % full_path) elif isinstance(this_opt, DiracFile): pass else: logger.error("opts: %s" % self.options) raise ApplicationConfigurationError( None, "Opts file type %s not yet supported please contact Ganga devs if you require this support" % getName(this_opt)) return self.options else: raise ApplicationConfigurationError( None, "No Opts File has been specified, please provide one!")
def master_prepare(self, app, appconfig): # check file is set OK if not app.script.name: msg = 'Root.script.name must be set.' raise ApplicationConfigurationError(None, msg) if not os.path.exists(app.script.name): msg = 'Script must exist!' raise ApplicationConfigurationError(None, msg) # check root version global rootVersions if not rootVersions: from Dirac import Dirac result = Dirac.execAPI('result = DiracCommands.getRootVersions()') if not result_ok(result): logger.error('Could not obtain available ROOT versions: %s' \ % str(result)) logger.error('ROOT version will not be validated.') else: rootVersions = result['Value'] if rootVersions: found = False versions = [] for v in rootVersions: versions.append(v) if app.version.find(v) >= 0: found = True break if not found: msg = 'Invalid ROOT version: %s. Valid versions: %s' \ % (app.version, str(versions)) raise ApplicationConfigurationError(None, msg) inputsandbox = app._getParent().inputsandbox[:] c = StandardJobConfig('', inputsandbox, [], [], None) return c
def configure(self, masterappconfig): self.args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # Need to handle the possibility of multiple output files ! # setup the output file for arg in self.args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in args. You must use the outputfile variable instead, even if you have multiple output files.' ) if self.outputfile == None: raise ApplicationConfigurationError( None, 'No output file given. Fill the outputfile variable.') else: if type(self.outputfile) == type([]): for OutFi in self.outputfile: self.args.append('-o') self.args.append(OutFi) else: self.args.append('-o') self.args.append(self.outputfile) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( None, 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError(None, 'No input data file given.') self.args.extend(fileList) argsStr = ' '.join(self.args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += self.exe + ' ' + argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer if self.exe.find('.exe') > -1: scriptname = self.exe.replace('.exe', '.sh') else: scriptname = self.exe + '.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def configure(self, masterappconfig): args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == []: raise ApplicationConfigurationError(None, 'No cmt setup script given.') infiles = job.inputdata.get_dataset_filenames() if len(infiles) <> 1: raise ApplicationConfigurationError(None, 'Wrong Dataset values') jn = "%08d" % int(infiles[0]) outConf = '' outConf += "# Automatically generated config file\n\n" outConf += "/atmt2k/step/outputFileName corsika_atmpitsim_" + jn + ".root\n" outConf += "/atmt2k/pga/inputFileName " + self.confopts[ 'fluxfile'] + "\n" outConf += "/atmt2k/pga/isOyamaFlux false\n" outConf += "/atmt2k/pga/inputRandSeed " + str( random.randint(1, 9999999)) + "\n\n" outConf += "/run/verbose 0\n" outConf += "/event/verbose 0\n" outConf += "/tracking/verbose 0\n" outConf += "/run/beamOn " + self.confopts['nev'] + "\n" mac = "corsika_atmpitsim_" + jn + ".mac" job.getInputWorkspace().writefile(FileBuffer(mac, outConf), executable=0) argsStr = ' '.join(args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' for f in self.cmtsetup: script += 'source ' + f + '\n' script += 'cd ' + job.outputdir + '\n' script += 'RunAtmPitSim.exe ' + argsStr + ' ' + os.path.join( job.inputdir, mac) + '\n' script += 'mac=$(dirname $(which RunAtmPitSim.exe))/../app/ntuple_2_nuance.C\n' script += 'cp $mac .\n' script += 'root -l -b <<EOF\n' script += '.L ntuple_2_nuance.C+\n' script += 'totxt("corsika_atmpitsim_' + jn + '");\n' script += '.q\n' script += 'EOF\n' script += 'mv NoMuons.txt NoMuons_' + jn + '.txt\n' script += 'mv Config.conf Config_' + jn + '.conf\n' job.getInputWorkspace().writefile(FileBuffer('runND280.sh', script), executable=1) self._scriptname = job.inputdir + 'runND280.sh' # Job name given job.name = jn return (None, None)
def configure(self, masterappconfig): self.args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # setup the output file for arg in self.args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in args. You must use the outputfile variable instead.' ) if self.outputfile == None: raise ApplicationConfigurationError( None, 'No output file given. Fill the outputfile variable.') else: self.args.append('-o') self.args.append(self.outputfile) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox fileList = job.inputdir + 'FileList' if not job.inputdata.set_dataset_into_list(fileList): raise ApplicationConfigurationError( None, 'Problem with the preparation of the list of input files') self.args.append(fileList) argsStr = ' '.join(self.args) # ANT: Create the bash script here and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += self.exe + ' ' + argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer if self.exe.find('.exe') > -1: scriptname = self.exe.replace('.exe', '.sh') else: scriptname = self.exe + '.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def _check_inputs(self): """Checks the validity of some of user's entries for Gaudi schema""" # Warn user that no optsfiles given if len(self.optsfile) == 0: logger.warning("The 'optsfile' is not set. I hope this is OK!") else: # Check for non-exting optsfiles defined nonexistentOptFiles = [] for f in self.optsfile: from Ganga.GPIDev.Lib.File.File import File if type(f) is str: myFile = File(f) else: myFile = f myFile.name = fullpath(myFile.name) if not os.path.isfile(myFile.name): nonexistentOptFiles.append(myFile) if len(nonexistentOptFiles): tmpmsg = "The 'optsfile' attribute contains non-existent file/s: [" for _f in nonexistentOptFiles: tmpmsg += "'%s', " % _f.name msg = tmpmsg[:-2] + ']' raise ApplicationConfigurationError(None, msg)
def _custom_package(self): if self.user_workarea or not self.use_custom_package: return if 'CMTPATH' not in os.environ: raise ApplicationConfigurationError(None, 'Can not guess the user workarea') self.user_workarea = os.environ['CMTPATH'].split(':')[0]
def generateDiracInput(app): """ Construct a DIRAC input which does not need to be unique to each job but is required to have a unique checksum. This generates a unique file, uploads it to DRIAC and then stores the LFN in app.uploadedInput Args: app (GaudiExec): This expects a GaudiExec app to be passed so that the constructed """ input_files, input_folders = collectPreparedFiles(app) job = app.getJobObject() if input_folders: raise ApplicationConfigurationError(None, 'Prepared folders not supported yet, please fix this in future') else: prep_dir = app.getSharedPath() addTimestampFile(prep_dir) prep_file = prep_dir + '.tgz' tmp_dir = tempfile.gettempdir() compressed_file = os.path.join(tmp_dir, '__'+os.path.basename(prep_file)) if not job.master: rjobs = job.subjobs else: rjobs = [job] with tarfile.open(compressed_file, "w:gz") as tar_file: for name in input_files: # FIXME Add support for subfiles here once it's working across multiple IGangaFile objects in a consistent way # Not hacking this in for now just in-case we end up with a mess as a result tar_file.add(name, arcname=os.path.basename(name)) new_df = uploadLocalFile(job, os.path.basename(compressed_file), tmp_dir) app.uploadedInput = new_df
def prepareCommand(app): """ Returns the command which is to be run on the worker node Args: app (GaudiExec): This expects only the GaudiExec app """ all_opts_files = app.getOptsFiles() opts_names = [] for opts_file in all_opts_files: if isinstance(opts_file, (LocalFile, DiracFile)): # Ideally this would NOT need the basename, however LocalFile is special in this regard. # TODO Fix this after fixing LocalFile opts_names.append(os.path.basename(opts_file.namePattern)) else: raise ApplicationConfigurationError(None, "The filetype: %s is not yet supported for use as an opts file.\nPlease contact the Ganga devs is you wish this implemented." % getName(opts_file)) sourceEnv = app.getEnvScript() if not app.useGaudiRun: full_cmd = sourceEnv + './run python %s' % app.getWrapperScriptName() else: full_cmd = sourceEnv + "./run gaudirun.py %s %s" % (' '.join(opts_names), GaudiExecDiracRTHandler.data_file) if app.extraOpts: full_cmd += ' ' + app.getExtraOptsFileName() if app.extraArgs: full_cmd += " " + " ".join(app.extraArgs) return full_cmd
def master_configure(self): self._validate_version() job = self.getJobObject() self._master_configure() inputs = self._check_inputs() optsfiles = [fileitem.name for fileitem in self.optsfile] recoptsfiles = [fileitem.name for fileitem in self.recoptsfile] anaoptsfiles = [fileitem.name for fileitem in self.anaoptsfile] try: parser = PythonOptionsParser(optsfiles,self.extraopts,self.shell) if recoptsfiles: recparser = PythonOptionsParser(recoptsfiles,self.extraopts,self.shell) if anaoptsfiles: anaparser = PythonOptionsParser(anaoptsfiles,self.extraopts,self.shell) except ApplicationConfigurationError, e: debug_dir = job.getDebugWorkspace().getPath() f = open(debug_dir + '/gaudirun.stdout','w') f.write(e.message) f.close() msg = 'Unable to parse job options! Please check options ' \ 'files and extraopts. The output from gaudyrun.py can be ' \ 'found in %s. You can also view this from within ganga '\ 'by doing job.peek(\'../debug/gaudirun.stdout\').' % f.name #logger.error(msg) raise ApplicationConfigurationError(None,msg)
def check(self): '''Checks done during submit phase. Protected elements are written.''' if self.dataset_id == '': raise ApplicationConfigurationError( None, 'You must define an input dataset') if self.events_per_subjobs == 0: raise ApplicationConfigurationError( None, 'You must define events_per_subjobs') kwargs = dict() kwargs['dataset_id'] = self.dataset_id manager = SBDatasetManager.SBDatasetManager() datasets = manager.getDataset(**kwargs) # only one dataset if len(datasets) == 0: msg = 'Input dataset %s not found' % self.dataset_id raise ApplicationConfigurationError(None, msg) assert len(datasets) == 1, 'Dataset consistency error' dataset = datasets[0] # status if dataset['status'] not in ['open', 'closed']: msg = 'Input dataset %s status is not open or closed' % self.dataset_id raise ApplicationConfigurationError(None, msg) # session if dataset['session'] not in ['fastsim', 'fullsim']: msg = 'Input dataset %s session is not fastsim or fullsim' % self.dataset_id raise ApplicationConfigurationError(None, msg) if self.events_total == 0: self.events_total = int(dataset['parameters']['evt_tot']) if self.events_total != 0 and self.events_total > int( dataset['parameters']['evt_tot']): msg = 'Input dataset %s total events is %d' % ( self.dataset_id, dataset['parameters']['evt_tot']) raise ApplicationConfigurationError(None, msg) if self.events_per_subjobs < int(dataset['parameters']['evt_file']): msg = 'events_per_subjobs must be >= %s' % dataset['parameters'][ 'evt_file'] raise ApplicationConfigurationError(None, msg) if self.events_per_subjobs >= self.events_total: msg = 'events_per_subjobs cannot be >= events_total' raise ApplicationConfigurationError(None, msg) lfns = self.__getLFNs(dataset['parameters']['evt_file']) self.__createInputPath(lfns, dataset['parameters']['evt_file']) self.input_mode = 'list'
def check(self): '''Checks done during submit phase. Protected elements are written.''' # controllare che session sia FastSim o FullSim if self.soft_version == '' and self.session == '': self.__sbcurrent() allowed_session = ['FastSim', 'FullSim'] if self.session not in allowed_session: raise ApplicationConfigurationError( None, 'session must be %s' % allowed_session) if self.number_of_subjobs < 1 or self.number_of_subjobs > 250: raise ApplicationConfigurationError( None, 'number_of_subjobs must be between 1 and 250') sql = '''SELECT DISTINCT soft_version FROM session_site_soft WHERE session_name = %s ORDER BY soft_version''' supported_soft_version = db.read(sql, (self.session, )) # convert from list of dict to list of strings supported_soft_version = [ s['soft_version'] for s in supported_soft_version ] if self.soft_version not in supported_soft_version: raise ApplicationConfigurationError( None, 'supported soft_version are: %s' % supported_soft_version) if self.session == 'FastSim' and self.background_frame == True: results = db.read('''SELECT prod_series, lfn_dir FROM background_frame WHERE valid = true ORDER BY validation_timestamp DESC LIMIT 1''') self.input_path = list() self.input_path.append(results[0]['lfn_dir']) self.input_mode = 'dir' self.background_frame_prod_series = results[0]['prod_series'] logger.info('Last approved (%s) background frame has been setup \ for job input' % self.background_frame_prod_series)
def validate_argument(x, exe=None): if isinstance(x, str): if exe: if not x: raise ApplicationConfigurationError(None, 'exe not specified') if len(x.split()) > 1: raise ApplicationConfigurationError(None, 'exe "%s" contains white spaces' % x) dirn, filen = os.path.split(x) if not filen: raise ApplicationConfigurationError(None, 'exe "%s" is a directory' % x) if dirn and not os.path.isabs(dirn) and self.is_prepared is None: raise ApplicationConfigurationError(None, 'exe "%s" is a relative path' % x) if not os.path.basename(x) == x: if not os.path.isfile(x): raise ApplicationConfigurationError(None, '%s: file not found' % x) else: try: # int arguments are allowed -> later converted to strings if isinstance(x, int): return if not x.exists(): raise ApplicationConfigurationError(None, '%s: file not found' % x.name) except AttributeError as err: raise ApplicationConfigurationError(err, '%s (%s): unsupported type, must be a string or File' % (str(x), str(type(x))))
def _check_inputs(self): """Checks the validity of user's entries for GaudiPython schema""" # Always check for None OR empty #logger.info("self.module: %s" % str(self.module)) if isType(self.module, str): self.module = File(self.module) if self.module.name == None: raise ApplicationConfigurationError( None, "Application Module not requested") elif self.module.name == "": raise ApplicationConfigurationError( None, "Application Module not requested") else: # Always check we've been given a FILE! self.module.name = fullpath(self.module.name) if not os.path.isfile(self.module.name): msg = 'Module file %s not found.' % self.module.name raise ApplicationConfigurationError(None, msg)
def prepare(self, force=False): """ This method creates a set of prepared files for the application to pass to the RTHandler Args: force (bool): Forces a prepare to be run """ if (self.is_prepared is not None) and not force: raise ApplicationPrepareError( '%s application has already been prepared. Use prepare(force=True) to prepare again.' % getName(self)) # lets use the same criteria as the configure() method for checking file existence & sanity # this will bail us out of prepare if there's somthing odd with the job config - like the executable # file is unspecified, has a space or is a relative path self.configure(self) logger.info('Preparing %s application.' % getName(self)) self.is_prepared = ShareDir() logger.info('Created shared directory: %s' % (self.is_prepared.name)) this_build_target = self.buildGangaTarget() try: # copy any 'preparable' objects into the shared directory send_to_sharedir = self.copyPreparables() # add the newly created shared directory into the metadata system # if the app is associated with a persisted object self.checkPreparedHasParent(self) self.copyIntoPrepDir(this_build_target) all_opts_files = self.getOptsFiles() for opts_file in all_opts_files: if isinstance(opts_file, LocalFile): self.copyIntoPrepDir( path.join(opts_file.localDir, path.basename(opts_file.namePattern))) elif isinstance(opts_file, DiracFile): # NB safe to put it here as should have expressly setup a path for this job by now. # We cannot _not_ place this here based upon the backend. # Always have to put it here regardless of if we're on DIRAC or Local so prepared job can be copied. opts_file.get(localPath=self.getSharedPath()) else: raise ApplicationConfigurationError( None, "Opts file type %s not yet supported please contact Ganga devs if you require this support" % getName(opts_file)) self.post_prepare() except Exception as err: logger.debug("Err: %s" % str(err)) self.unprepare() raise self.cleanGangaTargetArea(this_build_target) return 1
def _check_inputs(self): """Checks the validity of user's entries for Ostap schema""" if not self.scripts and not self.commands and not self.arguments: raise ApplicationConfigurationError( None, "Application scripts are not defined") if isinstance(self.scripts, str): self.scripts = [File(self.scripts)] for f in self.scripts: f.name = fullpath(f.name)
def loaddriver(): """Create new driver based on Robot configuration options. Example of relevant configuration options: [Robot] Driver_Run = ['submit', 30, 'extract', 'report'] Driver_Repeat = False Driver_Action_submit = GangaRobot.Lib.Core.CoreSubmitter.CoreSubmitter Driver_Action_extract = GangaRobot.Lib.Core.CoreExtractor.CoreExtractor Driver_Action_report = GangaRobot.Lib.Core.CoreReporter.CoreReporter """ KEY_RUN = 'Driver_Run' KEY_REPEAT = 'Driver_Repeat' KEY_ACTION_PREFIX = 'Driver_Action_' config = Utility.getconfig() run = config[KEY_RUN] repeat = config[KEY_REPEAT] actions = {} #load action classes for key in config: if key.startswith(KEY_ACTION_PREFIX): action = key[len(KEY_ACTION_PREFIX):] fqcn = config[key] try: actions[action] = _loadclass(fqcn) except Exception as e: raise ApplicationConfigurationError( e, "Cannot load class '%s'." % fqcn) #check actions exist for run for action in run: if not action in actions: try: int(action) except ValueError as e: raise ApplicationConfigurationError( e, "Unknown action '%s'." % action) return Driver(run=run, actions=actions, repeat=repeat)
def getFullOutputDir(self, outputDir): if outputDir.startswith('/'): availablePrefix = gConfig.getValue( '/Resources/Applications/IHEPLustreDir/AvailablePrefix', []) for ap in availablePrefix: if outputDir.startswith(ap): return outputDir raise ApplicationConfigurationError( None, 'Lustre directory not available for DIRAC jobs: %s' % outputDir) else: username = getProxyInfo()['Value'].get('username') if not username: raise ApplicationConfigurationError(None, 'Cannot find username') commonPrefix = gConfig.getValue( '/Resources/Applications/IHEPLustreDir/DefaultPrefix', '/scratchfs/bes') userLustreDir = gConfig.getValue( '/Resources/Applications/IHEPLustreDir/UserDefaultDir/%s' % username, '%s/%s' % (commonPrefix, username)) return os.path.join(userLustreDir, outputDir)
def collectPreparedFiles(app): """ Collect the files from the Application in the prepared state Args: app (GaudiExec): This expects only the GaudiExec app """ if not isinstance(app.is_prepared, ShareDir): raise ApplicationConfigurationError(None, 'Failed to prepare Application Correctly') shared_dir = app.getSharedPath() input_files, input_folders = [], [] for root, dirs, files in os.walk(shared_dir, topdown=True): for name in files: input_files.append(os.path.join(root, name)) for name in dirs: input_folders.append(os.path.join(root, name)) for file_ in app.getJobObject().inputfiles: if isinstance(file_, LocalFile): shutil.copy(os.path.join(file_.localDir, os.path.basename(file_.namePattern)), shared_dir) input_files.append(os.path.join(shared_dir, file_.namePattern)) elif not isinstance(file_, DiracFile): raise ApplicationConfigurationError(None, "File type: %s Not _yet_ supported in GaudiExec" % type(file_)) return input_files, input_folders
def readInputData(self,optsfiles,extraopts=False): """Returns a BesDataSet object from a list of options files. The optional argument extraopts will decide if the extraopts string inside the application is considered or not. Usage examples: # Create an BesDataset object with the data found in the optionsfile l=DaVinci(version='v22r0p2').readInputData([\"~/cmtuser/\" \ \"DaVinci_v22r0p2/Tutorial/Analysis/options/Bs2JpsiPhi2008.py\"]) # Get the data from an options file and assign it to the jobs inputdata field j.inputdata = j.application.readInputData([\"~/cmtuser/\" \ \"DaVinci_v22r0p2/Tutorial/Analysis/options/Bs2JpsiPhi2008.py\"]) # Assuming you have data in your extraopts, you can use the extraopts. # In this case your extraopts need to be fully parseable by gaudirun.py # So you must make sure that you have the proper import statements. # e.g. from Gaudi.Configuration import * # If you mix optionsfiles and extraopts, as usual extraopts may # overwright your options # # Use this to create a new job with data from extraopts of an old job j=Job(inputdata=jobs[-1].application.readInputData([],True)) """ def dummyfile(): temp_fd,temp_filename=tempfile.mkstemp(text=True,suffix='.py') os.write(temp_fd,"#Dummy file to keep the Optionsparser happy") os.close(temp_fd) return temp_filename if type(optsfiles)!=type([]): optsfiles=[optsfiles] # use a dummy file to keep the parser happy if len(optsfiles)==0: optsfiles.append(dummyfile()) self._getshell() inputs = self._check_inputs() if extraopts: extraopts=self.extraopts else: extraopts="" try: parser = PythonOptionsParser(optsfiles,extraopts,self.shell) except Exception, e: msg = 'Unable to parse the job options. Please check options ' \ 'files and extraopts.' raise ApplicationConfigurationError(None,msg)
def _check_inputs(self): """Checks the validity of some of user's entries for Gaudi schema""" self._check_gaudi_inputs(self.optsfile,self.appname) if self.package is None: msg = "The 'package' attribute must be set for application. " raise ApplicationConfigurationError(None,msg) inputs = None if len(self.optsfile)==0: logger.warning("The 'optsfile' is not set. I hope this is OK!") packagedir = self.shell.env[self.appname.upper()+'ROOT'] opts = os.path.expandvars(os.path.join(packagedir,'options', self.appname + '.py')) if opts: self.optsfile.append(opts) else: logger.error('Cannot find the default opts file for ' % \ self.appname + os.sep + self.version) inputs = ['optsfile'] return inputs
def getDefaultRunSite(self): '''This method is called only if backend is LCG. It populates the run_site list with all compatible sites.''' sql = '''SELECT site FROM session_site_soft WHERE session_name = %s AND soft_version = %s''' param = [self.session, self.soft_version] if self.background_frame: sql += ''' AND site IN ( SELECT site FROM background_frame_site WHERE prod_series = %s)''' param.append(self.background_frame_prod_series) results = db.read(sql, param) if len(results) == 0: raise ApplicationConfigurationError( None, 'No site found with the specified requirements') for result in results: self.run_site.append(result['site'])
def _validate_version(self): if not re.match('^\d+\.\d+\.\d+(\.p\d+)?$', self.version): msg = 'The BOSS version format is not correct: %s. It should be like "6.6.3" or "6.6.4.p01"' % self.version raise ApplicationConfigurationError(None,msg)
def _get_opts_dict_and_pkl_string(self): '''Parse the options using gaudirun.py and create a dictionary of the configuration and pickle the options. The app handler will make a copy of the .pkl file for each job.''' tmp_pkl = tempfile.NamedTemporaryFile(suffix='.pkl') tmp_py = tempfile.NamedTemporaryFile(suffix='.py') py_opts = tempfile.NamedTemporaryFile(suffix='.py') py_opts.write(self._join_opts_files()) py_opts.flush() gaudirun = 'gaudirun.py -n -v -o %s %s' \ % (tmp_py.name, py_opts.name) opts_str = '' err_msg = '' options = {} rc, stdout, m = shellEnv_cmd(gaudirun, self.env) if stdout.find('Gaudi.py') >= 0: msg = 'The version of gaudirun.py required for your application is not supported.' raise ValueError(None, msg) elif stdout.find('no such option: -o') >= 0: gaudirun = 'gaudirun.py -n -v -p %s %s' % (tmp_pkl.name, py_opts.name) rc, stdout, m = shellEnv_cmd(gaudirun, self.env) rc = 0 if stdout and rc == 0: opts_str = stdout err_msg = 'Please check %s -v %s' % (cmdbase, py_opts.name) err_msg += ' returns valid python syntax' else: cmd = 'gaudirun.py -n -p %s %s' % (tmp_pkl.name, py_opts.name) rc, stdout, m = shellEnv_cmd(cmd, self.env) if rc == 0 and stdout: opts_str = tmp_py.read() err_msg = 'Please check gaudirun.py -o file.py produces a ' \ 'valid python file.' if stdout and rc == 0: try: options = eval(opts_str) except Exception as err: logger.error('Cannot eval() the options file. Exception: %s', err) from traceback import print_exc logger.error(' ', print_exc()) raise ApplicationConfigurationError(None, stdout + '###SPLIT###' + m) try: opts_pkl_string = tmp_pkl.read() except IOError as err: logger.error('Cannot read() the temporary pickle file: %s', tmp_pkl.name) raise err if not rc == 0: logger.debug('Failed to run: %s', gaudirun) raise ApplicationConfigurationError(None, stdout + '###SPLIT###' + m) tmp_pkl.close() py_opts.close() tmp_py.close() return (options, opts_pkl_string)
def configure(self,masterappconfig): args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == []: raise ApplicationConfigurationError(None,'No cmt setup script given.') for arg in args: if arg == '-c': raise ApplicationConfigurationError(None,'Option "-c" given in args. You must use the configfile variable instead.') # setup the config file for this job if self.configfile == None: raise ApplicationConfigurationError(None,'No config file given. Use args list or configfile field.') # check if given config file exists if not os.path.exists(self.configfile): raise ApplicationConfigurationError(None,'The given config file "'+self.configfile+'" was not found.') if not os.path.isfile(self.configfile): raise ApplicationConfigurationError(None,'The given config file "'+self.configfile+'" is not a file.') # Right here, take the input config file and change it as needed # If found inputfile, just put the first file in the inputdata # If this is inputfile_list, then it is in cherry pick so we can put all the files from inputdata inConf = open(self.configfile) outConf = '' for line in inConf: inputfile_listfnd = re.match(r"^inputfile_list\s*=", line) inputfilefnd = re.match(r"^inputfile\s*=", line) midas_filefnd = re.match(r"^midas_file\s*=", line) if inputfile_listfnd or inputfilefnd or midas_filefnd: if job.inputdata == None: raise ApplicationConfigurationError(None,'The given config file requires an input file but the inputdata of the job is not defined.') # TODO: Check if there is an inputdata infiles = job.inputdata.get_dataset_filenames() if len(infiles) < 1: raise ApplicationConfigurationError(None,'The given config file contains "inputfile" but not input file was given') if inputfile_listfnd: line = 'inputfile_list = ' + ' '.join(infiles) + '\n' elif inputfilefnd: if len(infiles) > 1: raise ApplicationConfigurationError(None,'The given config file contains "inputfile" but more than one input file was given') line = 'inputfile = ' + infiles[0] + '\n' elif midas_filefnd: if len(infiles) > 1: raise ApplicationConfigurationError(None,'The given config file contains "midas_file" but more than one file was given') line = 'midas_file = ' + infiles[0] + '\n' outConf += line job.getInputWorkspace().writefile(FileBuffer('nd280Config.cfg',outConf),executable=0) args.append('-c') args.append(job.inputdir+'nd280Config.cfg') argsStr = ' '.join(args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' for f in self.cmtsetup: script += 'source '+f+'\n' script += 'runND280 '+argsStr+'\n' job.getInputWorkspace().writefile(FileBuffer('runND280.sh',script),executable=1) self._scriptname = job.inputdir+'runND280.sh' return (None,None)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ Prepare the RTHandler in order to submit to the Dirac backend Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appconfig (unknown): Output passed from the application configuration call appmasterconfig (unknown): Output passed from the application master_configure call jobmasterconfig (tuple): Output from the master job prepare step """ # NB this needs to be removed safely # Get the inputdata and input/output sandbox in a sorted way inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # We know we don't need this one inputsandbox = [] job = app.getJobObject() # We can support inputfiles and opts_file here. Locally should be submitted once, remotely can be referenced. all_opts_files = app.getOptsFiles() for opts_file in all_opts_files: if isinstance(opts_file, DiracFile): inputsandbox += ['LFN:'+opts_file.lfn] # Sort out inputfiles we support for file_ in job.inputfiles: if isinstance(file_, DiracFile): inputsandbox += ['LFN:'+file_.lfn] if isinstance(file_, LocalFile): if job.master is not None and file_ not in job.master.inputfiles: shutil.copy(os.path.join(file_.localDir, file_.namePattern), app.getSharedPath()) inputsandbox += [os.path.join(app.getSharedPath(), file_.namePattern)] else: logger.error("Filetype: %s nor currently supported, please contact Ganga Devs if you require support for this with the DIRAC backend" % getName(file_)) raise ApplicationConfigurationError(None, "Unsupported filetype: %s with DIRAC backend" % getName(file_)) master_job = job.master or job app.uploadedInput = master_job.application.uploadedInput app.jobScriptArchive = master_job.application.jobScriptArchive logger.debug("uploadedInput: %s" % app.uploadedInput) rep_data = app.uploadedInput.getReplicas() logger.debug("Replica info: %s" % rep_data) inputsandbox += ['LFN:'+app.uploadedInput.lfn] inputsandbox += ['LFN:'+app.jobScriptArchive.lfn] logger.debug("Input Sand: %s" % inputsandbox) logger.debug("input_data: %s" % input_data) outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] scriptToRun = getScriptName(app) # Already added to sandbox uploaded as LFN # This code deals with the outputfiles as outputsandbox and outputdata for us lhcbdirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(lhcbdiracAPI_script_template(), DIRAC_IMPORT='from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT='from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), EXE=os.path.join('jobScript', scriptToRun), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_GaudiExec.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcbdirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=[], PLATFORM=app.platform, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX=repr([f for f in inputsandbox]), ) # NB # inputsandbox here isn't used by the DIRAC backend as we explicitly define the INPUT_SANDBOX here! # Return the output needed for the backend to submit this job return StandardJobConfig(dirac_script, inputbox=[], outputbox=[])
def configure(self, masterappconfig): if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # __________ TREx first ____________ trex_args = convertIntToStringArgs(self.trex_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in trex_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in trex_args. The module will define the output filename.' ) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( None, 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError(None, 'No input data file given.') trex_args.extend(fileList) firstFile = fileList[0].split('/')[-1] # Define the output trex_args.append('-o') if self.filenamesubstr == None: trex_outputfile = 'recoOutput.root' else: trex_outputfile = firstFile.replace(self.filenamesubstr, "trex") trex_args.append(trex_outputfile) # __________ Now oaAnalysis ____________ oaana_args = convertIntToStringArgs(self.oaana_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in oaana_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in oaana_args. You must use the oaana_outputfile variable instead.' ) oaana_args.append('-o') if self.filenamesubstr == None: oaana_outputfile = 'recoOutput.root' else: oaana_outputfile = firstFile.replace(self.filenamesubstr, "anal") # protection against failed substitution if oaana_outputfile == trex_outputfile: oaana_outputfile = oaana_outputfile.replace( ".root", "_anal.root") oaana_args.append(oaana_outputfile) # Use the reco output as an input for the VFT processing. if self.oaana_only: oaana_args.extend(fileList) else: oaana_args.append(trex_outputfile) trex_argsStr = ' '.join(trex_args) oaana_argsStr = ' '.join(oaana_args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' if not self.oaana_only: script += 'RunTREx.exe ' + trex_argsStr + '\n' script += 'RunOAAnalysis.exe ' + oaana_argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer scriptname = 'TRExPlusOAAnalysis.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def configure(self,masterappconfig): args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == []: raise ApplicationConfigurationError(None,'No cmt setup script given.') for arg in args: if arg == '-c': raise ApplicationConfigurationError(None,'Option "-c" given in args. You must use the configfile variable instead.') confopts = self.confopts # use input file from a "dataset" if job.inputdata == None: raise ApplicationConfigurationError(None,'The given config file requires an input file but the inputdata of the job is not defined.') infiles = job.inputdata.get_dataset_filenames() if len(infiles) < 1: raise ApplicationConfigurationError(None,'The given config file contains "inputfile" but not input file was given') if len(infiles) > 1: raise ApplicationConfigurationError(None,'The given config file contains "inputfile" but more than one input file was given') confopts.update({'kinfile':infiles[0],'inputfile':infiles[0]}) # extract "run number" from an input filename mtch = re.search(r"(\d{8})",os.path.basename(infiles[0])) if mtch: job.name = mtch.group(1) jobid = mtch.group(1) else: raise ApplicationConfigurationError(None,'Can not extract run number') confopts.update({'run_number':str(int(jobid))}) # create TND280Log config file (in job output dir) logConf = "log.default.level = LogLevel\n" logConf += "error.default.level = SevereLevel\n" job.getOutputWorkspace().writefile(FileBuffer('nd280log.config',logConf),executable=0) # create config file cfg = ND280Configs.ND280Config('cosmicmc',confopts) inConf = cfg.CreateConfig() outConf = inConf job.getInputWorkspace().writefile(FileBuffer('nd280.cfg',outConf),executable=0) # create a script for a backend args.append('-c') args.append(job.inputdir+'nd280.cfg') argsStr = ' '.join(args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' for f in self.cmtsetup: script += 'source '+f+'\n' script += 'cd '+job.outputdir+'\n' script += 'runND280 -t . '+argsStr+'\n' #script += 'echo runND280 '+argsStr+' > oa_cs_mu_00003333_' + confopts['stage'] + '.root\n' job.getInputWorkspace().writefile(FileBuffer('runND280.sh',script),executable=1) self._scriptname = job.inputdir+'runND280.sh' return (None,None)
def _validate_input(self): if self.metadata.has_key('streamId') and not re.match('^stream(?!0+$)\d+$', self.metadata['streamId']): msg = 'The streamId format is not correct: %s. It should be like "stream001" but can not be "stream000"' % self.metadata['streamId'] raise ApplicationConfigurationError(None,msg)