示例#1
0
def get_master_input_sandbox(job, extra):
    sandbox = job.inputsandbox[:]
    sandbox += extra.master_input_files[:]
    buffers = extra.master_input_buffers
    sandbox += [FileBuffer(n, s) for (n, s) in buffers.items()]
    logger.debug("Master input sandbox: %s", str(sandbox))
    return sandbox
示例#2
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        job = app.getJobObject()
        version = appsubconfig["version"]
        outDir = appsubconfig["outDir"]
        docDir = appsubconfig["docDir"]
        elementList = docDir.split(os.sep)
        docList = appsubconfig["docList"]
        softwareDir = appsubconfig["softwareDir"]

        lineList = []
        inbox = []
        outbox = []

        headList, headBox = self.head( job = job, version = version, \
           softwareDir = softwareDir )
        lineList.extend(headList)
        outbox.extend(headBox)

        bodyList, bodyBox = self.body( job = job, docDir = docDir, \
           docList = docList )
        lineList.extend(bodyList)
        inbox.extend(bodyBox)

        tailList, tailBox = self.tail(job=job)
        lineList.extend(tailList)
        outbox.extend(tailBox)

        jobScript = "\n".join(lineList)
        jobWrapper = FileBuffer("PDF.sh", jobScript, executable=1)

        outbox.extend(job.outputsandbox)

        return StandardJobConfig\
           ( exe = jobWrapper, inputbox = inbox, outputbox = outbox )
示例#3
0
    def configure(self, masterappconfig):

        self.args = convertIntToStringArgs(self.args)

        job = self.getJobObject()

        if self.cmtsetup == None:
            raise ApplicationConfigurationError(None,
                                                'No cmt setup script given.')

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in self.args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in args. You must use the outputfile variable instead, even if you have multiple output files.'
                )

        if self.outputfile == None:
            raise ApplicationConfigurationError(
                None, 'No output file given. Fill the outputfile variable.')
        else:
            if type(self.outputfile) == type([]):
                for OutFi in self.outputfile:
                    self.args.append('-o')
                    self.args.append(OutFi)
            else:
                self.args.append('-o')
                self.args.append(self.outputfile)

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                None, 'The inputdata variable is not defined.')
        fileList = job.inputdata.get_dataset_filenames()
        if len(fileList) < 1:
            raise ApplicationConfigurationError(None,
                                                'No input data file given.')
        self.args.extend(fileList)

        argsStr = ' '.join(self.args)
        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        script += self.exe + ' ' + argsStr + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        if self.exe.find('.exe') > -1:
            scriptname = self.exe.replace('.exe', '.sh')
        else:
            scriptname = self.exe + '.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#4
0
    def preparejob(self, jobconfig, master_input_sandbox):

        job = self.getJobObject()
        # print str(job.backend_output_postprocess)
        mon = job.getMonitoringService()
        import Ganga.Core.Sandbox as Sandbox
        subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()))

        appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings()
        if self.nice:
            appscriptpath = ['nice', '-n %d' % self.nice] + appscriptpath
        if self.nice < 0:
            logger.warning('increasing process priority is often not allowed, your job may fail due to this')

        sharedoutputpath = job.getOutputWorkspace().getPath()
        ## FIXME DON'T just use the blind list here, request the list of files to be in the output from a method.
        outputpatterns = jobconfig.outputbox
        environment = dict() if jobconfig.env is None else jobconfig.env

        import tempfile
        workdir = tempfile.mkdtemp(dir=config['location'])

        import inspect
        script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))),
                                                        'LocalHostExec.py')

        from Ganga.GPIDev.Lib.File import FileUtils
        script = FileUtils.loadScript(script_location, '')

        script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox))

        from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, getWNCodeForInputdataListCreation
        from Ganga.Utility.Config import getConfig
        jobidRepr = repr(job.getFQID('.'))


        script = script.replace('###OUTPUTSANDBOXPOSTPROCESSING###', getWNCodeForOutputSandbox(job, ['stdout', 'stderr', '__syslog__'], jobidRepr))
        script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ''))
        script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ''))
        script = script.replace('###CREATEINPUTDATALIST###', getWNCodeForInputdataListCreation(job, ''))

        script = script.replace('###APPLICATION_NAME###', repr(getName(job.application)))
        script = script.replace('###INPUT_SANDBOX###', repr(subjob_input_sandbox + master_input_sandbox))
        script = script.replace('###SHAREDOUTPUTPATH###', repr(sharedoutputpath))
        script = script.replace('###APPSCRIPTPATH###', repr(appscriptpath))
        script = script.replace('###OUTPUTPATTERNS###', str(outputpatterns))
        script = script.replace('###JOBID###', jobidRepr)
        script = script.replace('###ENVIRONMENT###', repr(environment))
        script = script.replace('###WORKDIR###', repr(workdir))
        script = script.replace('###INPUT_DIR###', repr(job.getStringInputDir()))

        self.workdir = workdir

        script = script.replace('###GANGADIR###', repr(getConfig('System')['GANGA_PYTHONPATH']))

        wrkspace = job.getInputWorkspace()
        scriptPath = wrkspace.writefile(FileBuffer('__jobscript__', script), executable=1)

        return scriptPath
示例#5
0
def downloadWrapper(app):
    from os.path import join, split
    from Ganga.GPIDev.Lib.File import FileBuffer
    import string

    from Ganga.GPIDev.Lib.File import getSharedPath

    rootsys = join('.', 'root')
    rootenv = {'ROOTSYS': rootsys}

    script = app.script
    if script == File():
        if not app.usepython:
            script = File(defaultScript())
        else:
            script = File(defaultPyRootScript())
    else:
        script = File(os.path.join(os.path.join(Ganga.GPIDev.Lib.File.getSharedPath(), app.is_prepared.name), os.path.basename(app.script.name)))

    commandline = ''
    scriptPath = join('.', script.subdir, split(script.name)[1])
    if not app.usepython:
        # Arguments to the ROOT script needs to be a comma separated list
        # enclosed in (). Strings should be enclosed in escaped double quotes.
        arglist = []
        for arg in app.args:
            if isinstance(arg, str):
                arglist.append('\\\'' + arg + '\\\'')
            else:
                arglist.append(arg)
        rootarg = '\(\"' + string.join([str(s) for s in arglist], ',') + '\"\)'

        # use root
        commandline = 'root.exe -b -q ' + scriptPath + rootarg + ''
    else:
        # use python
        pyarg = string.join([str(s) for s in app.args], ' ')
        commandline = '\'%(PYTHONCMD)s ' + scriptPath + ' ' + pyarg + ' -b \''

    logger.debug("Command line: %s: ", commandline)

    # Write a wrapper script that installs ROOT and runs script
    script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))),
                                                   'wrapperScriptTemplate.py')
    from Ganga.GPIDev.Lib.File import FileUtils
    wrapperscript = FileUtils.loadScript(script_location, '')

    wrapperscript = wrapperscript.replace('###COMMANDLINE###', commandline)
    wrapperscript = wrapperscript.replace('###ROOTVERSION###', app.version)
    wrapperscript = wrapperscript.replace('###SCRIPTPATH###', scriptPath)
    wrapperscript = wrapperscript.replace('###USEPYTHON###', str(app.usepython))

    logger.debug('Script to run on worker node\n' + wrapperscript)
    scriptName = "rootwrapper_generated_%s.py" % randomString()
    runScript = FileBuffer(scriptName, wrapperscript, executable=1)

    inputsandbox = app._getParent().inputsandbox + [script]
    return runScript, inputsandbox, rootenv
示例#6
0
    def configure(self, masterappconfig):

        self.args = convertIntToStringArgs(self.args)

        job = self.getJobObject()

        if self.cmtsetup == None:
            raise ApplicationConfigurationError(None,
                                                'No cmt setup script given.')

        # setup the output file
        for arg in self.args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in args. You must use the outputfile variable instead.'
                )

        if self.outputfile == None:
            raise ApplicationConfigurationError(
                None, 'No output file given. Fill the outputfile variable.')
        else:
            self.args.append('-o')
            self.args.append(self.outputfile)

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        fileList = job.inputdir + 'FileList'
        if not job.inputdata.set_dataset_into_list(fileList):
            raise ApplicationConfigurationError(
                None,
                'Problem with the preparation of the list of input files')
        self.args.append(fileList)

        argsStr = ' '.join(self.args)
        # ANT: Create the bash script here and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        script += self.exe + ' ' + argsStr + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        if self.exe.find('.exe') > -1:
            scriptname = self.exe.replace('.exe', '.sh')
        else:
            scriptname = self.exe + '.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#7
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        run_script = self.__create_run_script(app, appsubconfig,
                                              appmasterconfig, jobmasterconfig,
                                              inputsandbox, outputsandbox)
        return StandardJobConfig(FileBuffer('gaudi-script.py',
                                            run_script,
                                            executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
示例#8
0
   def createNewJob(self):
      """Create any jobs required for this unit"""      
      j = GPI.Job()
      j._impl.backend = self._getParent().backend.clone()
      j._impl.application = self._getParent().application.clone()
      j.inputdata = self.inputdata.clone()

      trf = self._getParent()
      task = trf._getParent()

      # copy across the outputfiles
      for f in trf.outputfiles:
         j.outputfiles += [f.clone()]

      j.inputsandbox = trf.inputsandbox

      if type(self.eventswanted) == type(''):
        subLines = self.eventswanted
      else:
        subLines = '\n'.join(self.eventswanted)
      # Base for the naming of each subjob's CSV file
      incsvfile = j._impl.application.csvfile
      tmpname = os.path.basename(incsvfile)
      if len(tmpname.split('.')) > 1:
        patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
      else:
        patterncsv = tmpname+"_sub%d"

      from Ganga.GPIDev.Lib.File import FileBuffer
      thiscsv = patterncsv % self.subpartid

      # Create the CSV file for this Unit
      j._impl.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0)
      j._impl.application.csvfile = j._impl.getInputWorkspace().getPath()+thiscsv
      j.inputsandbox.append(j._impl.getInputWorkspace().getPath()+thiscsv)

      # Base for the naming of each subjob's output file
      tmpname = os.path.basename(j._impl.application.outputfile)
      if len(tmpname.split('.')) > 1:
        patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
      else:
        patternout = tmpname+"_sub%d"
      j._impl.application.outputfile = patternout % self.subpartid

      # Sort out the splitter
      if trf.splitter:
         j.splitter = trf.splitter.clone()
         
      return j
示例#9
0
    def wrapper(self, regexp, version, timeout, kernel):
        """Write a wrapper Python script that executes the notebooks"""
        wrapperscript = FileUtils.loadScript(self.templatelocation(), '')

        wrapperscript = wrapperscript.replace('###NBFILES###', str(regexp))
        wrapperscript = wrapperscript.replace('###VERSION###', str(version))
        wrapperscript = wrapperscript.replace('###TIMEOUT###', str(timeout))
        wrapperscript = wrapperscript.replace('###KERNEL###', str(kernel))
        wrapperscript = wrapperscript.replace('###UUID###', str(uuid.uuid4()))

        logger.debug('Script to run on worker node\n' + wrapperscript)
        scriptName = "notebook_wrapper_generated.py"
        runScript = FileBuffer(scriptName, wrapperscript, executable=1)

        return runScript
示例#10
0
    def split(self, job):
        import os

        subjobs = []

        subsets = splitCSVFile(job.application.csvfile, self.nbevents)

        # Less files than number of jobs wanted => easy
        logger.info('Creating %d subjobs ...', len(allLines))

        # Base for the naming of each subjob's CSV file
        tmpname = os.path.basename(incsvfile)
        if len(tmpname.split('.')) > 1:
            patterncsv = '.'.join(
                tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1]
        else:
            patterncsv = tmpname + "_sub%d"

        # Base for the naming of each subjob's output file
        tmpname = os.path.basename(job.application.outputfile)
        if len(tmpname.split('.')) > 1:
            patternout = '.'.join(
                tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1]
        else:
            patternout = tmpname + "_sub%d"

        for s, sub in enumerate(subsets):
            j = addProxy(self.createSubjob(job))

            j.inputdata = job.inputdata

            subLines = '\n'.join(sub)

            from Ganga.GPIDev.Lib.File import FileBuffer
            thiscsv = patterncsv % s
            # Save in the main job's inputdir now, then the file will be moved to
            # the inputdir of each subjobs.
            job.getInputWorkspace().writefile(FileBuffer(thiscsv, subLines),
                                              executable=0)
            j.application.csvfile = os.path.join(job.inputdir, thiscsv)
            j.application.outputfile = patternout % s

            # Prepare the output filenames which must be unique

            subjobs.append(stripProxy(j))

        return subjobs
示例#11
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        job = app.getJobObject()
        version = appsubconfig["version"]
        libList = appsubconfig["libList"]
        classifierDir = appsubconfig["classifierDir"]
        outDir = appsubconfig["outDir"]
        imageDir = appsubconfig["imageDir"]
        elementList = imageDir.split(os.sep)
        imageList = appsubconfig["imageList"]
        tagFile = appsubconfig["tagFile"]

        lineList = []
        inbox = []
        outbox = []

        headList, headBox = self.head( job = job, version = version, \
           libList = libList, classifierDir = classifierDir )
        lineList.extend(headList)
        outbox.extend(headBox)

        bodyList, bodyBox = self.body( job = job, imageDir = imageDir, \
           imageList = imageList, tagFile = tagFile )
        lineList.extend(bodyList)
        inbox.extend(bodyBox)

        tailList, tailBox = self.tail(job=job)
        lineList.extend(tailList)
        outbox.extend(tailBox)

        jobScript = "\n".join(lineList)
        jobWrapper = FileBuffer("Classify.sh", jobScript, executable=1)

        outbox.extend(job.outputsandbox)

        return StandardJobConfig\
           ( exe = jobWrapper, inputbox = inbox, outputbox = outbox )
示例#12
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        job = app.getJobObject()
        exePath = appsubconfig["exePath"]
        outDir = appsubconfig["outDir"]
        imageDir = appsubconfig["imageDir"]
        elementList = imageDir.split(os.sep)
        imageSubdir = \
           os.sep.join( elementList[ elementList.index( "images" ) : ] )
        urlRoot = \
           os.path.join( "http://hovercraft.hep.phy.cam.ac.uk", imageSubdir )
        imageList = appsubconfig["imageList"]

        lineList = []
        outbox = []

        headList, headBox = self.head(job=job, exePath=exePath)
        lineList.extend(headList)
        outbox.extend(headBox)

        bodyList, bodyBox = self.body( job = job, imageDir = imageDir, \
           urlRoot = urlRoot, imageList = imageList )
        lineList.extend(bodyList)
        outbox.extend(bodyBox)

        tailList, tailBox = self.tail(job=job)
        lineList.extend(tailList)
        outbox.extend(tailBox)

        jobScript = "\n".join(lineList)
        jobWrapper = FileBuffer("VansegLocal.sh", jobScript, executable=1)

        outbox.extend(job.outputsandbox)

        return StandardJobConfig\
           ( exe = jobWrapper, outputbox = outbox )
示例#13
0
    def configure(self, masterappconfig):
        if self.cmtsetup == None:
            raise ApplicationConfigurationError(None,
                                                'No cmt setup script given.')

        # __________ TREx first ____________
        trex_args = convertIntToStringArgs(self.trex_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in trex_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in trex_args. The module will define the output filename.'
                )

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                None, 'The inputdata variable is not defined.')
        fileList = job.inputdata.get_dataset_filenames()
        if len(fileList) < 1:
            raise ApplicationConfigurationError(None,
                                                'No input data file given.')
        trex_args.extend(fileList)

        firstFile = fileList[0].split('/')[-1]
        # Define the output
        trex_args.append('-o')
        if self.filenamesubstr == None:
            trex_outputfile = 'recoOutput.root'
        else:
            trex_outputfile = firstFile.replace(self.filenamesubstr, "trex")

        trex_args.append(trex_outputfile)

        # __________ Now oaAnalysis ____________
        oaana_args = convertIntToStringArgs(self.oaana_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in oaana_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in oaana_args. You must use the oaana_outputfile variable instead.'
                )

        oaana_args.append('-o')
        if self.filenamesubstr == None:
            oaana_outputfile = 'recoOutput.root'
        else:
            oaana_outputfile = firstFile.replace(self.filenamesubstr, "anal")
            # protection against failed substitution
            if oaana_outputfile == trex_outputfile:
                oaana_outputfile = oaana_outputfile.replace(
                    ".root", "_anal.root")
        oaana_args.append(oaana_outputfile)

        # Use the reco output as an input for the VFT processing.
        if self.oaana_only:
            oaana_args.extend(fileList)
        else:
            oaana_args.append(trex_outputfile)

        trex_argsStr = ' '.join(trex_args)
        oaana_argsStr = ' '.join(oaana_args)
        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        if not self.oaana_only:
            script += 'RunTREx.exe ' + trex_argsStr + '\n'
        script += 'RunOAAnalysis.exe ' + oaana_argsStr + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        scriptname = 'TRExPlusOAAnalysis.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#14
0
    def preparejob(self, jobconfig, master_input_sandbox):
        """Method for preparing job script"""

        job = self.getJobObject()

        from Ganga.GPIDev.Lib.File import File
        from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR
        import Ganga.Utility.files
        import inspect

        fileutils = File(inspect.getsourcefile(Ganga.Utility.files),
                         subdir=PYTHON_DIR)
        inputfiles = jobconfig.getSandboxFiles() + [fileutils]
        inbox = job.createPackedInputSandbox(inputfiles)

        inbox.extend(master_input_sandbox)
        inpDir = job.getInputWorkspace(create=True).getPath()
        outDir = job.getOutputWorkspace(create=True).getPath()
        workdir = tempfile.mkdtemp()
        self.workdir = workdir
        exeString = jobconfig.getExeString()
        argList = jobconfig.getArgStrings()
        argString = " ".join(map(lambda x: " %s " % x, argList))

        outputSandboxPatterns = jobconfig.outputbox
        patternsToZip = []
        wnCodeForPostprocessing = ''
        wnCodeToDownloadInputFiles = ''

        if (len(job.outputfiles) > 0):

            from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatternsForInteractive, getWNCodeForOutputPostprocessing
            (outputSandboxPatterns,
             patternsToZip) = getOutputSandboxPatternsForInteractive(job)

            wnCodeForPostprocessing = 'def printError(message):pass\ndef printInfo(message):pass' + \
                getWNCodeForOutputPostprocessing(job, '')

        all_inputfiles = [this_file for this_file in job.inputfiles]
        if job.master is not None:
            all_inputfiles.extend(
                [this_file for this_file in job.master.inputfiles])

        wnCodeToDownloadInputFiles = ''

        if (len(all_inputfiles) > 0):

            from Ganga.GPIDev.Lib.File.OutputFileManager import outputFilePostProcessingOnWN

            for inputFile in all_inputfiles:

                inputfileClassName = getName(inputFile)

                logger.debug("name: %s" % inputfileClassName)
                logger.debug(
                    "result: %s" %
                    str(outputFilePostProcessingOnWN(job, inputfileClassName)))

                if outputFilePostProcessingOnWN(job, inputfileClassName):
                    inputFile.processWildcardMatches()
                    if inputFile.subfiles:
                        getfromFile = False
                        for subfile in inputFile.subfiles:
                            wnCodeToDownloadInputFiles += subfile.getWNScriptDownloadCommand(
                                '')
                        else:
                            getfromFile = True
                    else:
                        getFromFile = True

                    if getFromFile:
                        wnCodeToDownloadInputFiles += inputFile.getWNScriptDownloadCommand(
                            '')

        wnCodeToDownloadInputData = ''

        if job.inputdata and (len(job.inputdata) > 0):

            from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForDownloadingInputFiles

            wnCodeToDownloadInputData = getWNCodeForDownloadingInputFiles(
                job, '')

        import inspect

        replace_dict = {
            '###CONSTRUCT_TIME###': (time.strftime("%c")),
            '###WNSANDBOX_SOURCE###':
            inspect.getsource(Sandbox.WNSandbox),
            '###GANGA_PYTHONPATH###':
            getConfig("System")["GANGA_PYTHONPATH"],
            '###OUTPUTDIR###':
            outDir,
            '###WORKDIR###':
            workdir,
            '###IN_BOX###':
            inbox,
            '###WN_INPUTFILES###':
            wnCodeToDownloadInputFiles,
            '###WN_INPUTDATA###':
            wnCodeToDownloadInputData,
            '###JOBCONFIG_ENV###':
            jobconfig.env if jobconfig.env is not None else dict(),
            '###EXE_STRING###':
            exeString,
            '###ARG_STRING###':
            argString,
            '###WN_POSTPROCESSING###':
            wnCodeForPostprocessing,
            '###PATTERNS_TO_ZIP###':
            patternsToZip,
            '###OUTPUT_SANDBOX_PATTERNS###':
            outputSandboxPatterns
        }

        script_location = os.path.join(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))),
            'InteractiveScriptTemplate.py.template')

        from Ganga.GPIDev.Lib.File import FileUtils
        commandString = FileUtils.loadScript(script_location, '')

        for k, v in replace_dict.iteritems():
            commandString = commandString.replace(str(k), str(v))

        return job.getInputWorkspace().writefile(FileBuffer(
            "__jobscript__", commandString),
                                                 executable=1)
示例#15
0
文件: Batch.py 项目: pseyfert/ganga
    def preparejob(self, jobconfig, master_input_sandbox):

        job = self.getJobObject()
        mon = job.getMonitoringService()
        import Ganga.Core.Sandbox as Sandbox
        from Ganga.GPIDev.Lib.File import File
        from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR
        import inspect

        fileutils = File( inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR )
        subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + [ fileutils ] )

        appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings()
        sharedoutputpath = job.getOutputWorkspace().getPath()
        ## FIXME Check this isn't a GangaList
        outputpatterns = jobconfig.outputbox
        environment = jobconfig.env if not jobconfig.env is None else {}


        import inspect
        script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))),
                                                       'BatchScriptTemplate.py')

        from Ganga.GPIDev.Lib.File import FileUtils
        text = FileUtils.loadScript(script_location, '')

        import Ganga.Core.Sandbox as Sandbox
        import Ganga.Utility as Utility
        from Ganga.Utility.Config import getConfig
        from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles
        jobidRepr = repr(self.getJobObject().getFQID('.'))

        replace_dict = {

        '###OUTPUTSANDBOXPOSTPROCESSING###' : getWNCodeForOutputSandbox(job, ['__syslog__'], jobidRepr),

        '###OUTPUTUPLOADSPOSTPROCESSING###' : getWNCodeForOutputPostprocessing(job, ''),

        '###DOWNLOADINPUTFILES###' : getWNCodeForDownloadingInputFiles(job, ''),

        '###INLINEMODULES###' : inspect.getsource(Sandbox.WNSandbox),
        '###INLINEHOSTNAMEFUNCTION###' : inspect.getsource(Utility.util.hostname),
        '###APPSCRIPTPATH###' : repr(appscriptpath),
        #'###SHAREDINPUTPATH###' : repr(sharedinputpath)),

        '###INPUT_SANDBOX###' : repr(subjob_input_sandbox + master_input_sandbox),
        '###SHAREDOUTPUTPATH###' : repr(sharedoutputpath),

        '###OUTPUTPATTERNS###' : repr(outputpatterns),
        '###JOBID###' : jobidRepr,
        '###ENVIRONMENT###' : repr(environment),
        '###PREEXECUTE###' : self.config['preexecute'],
        '###POSTEXECUTE###' : self.config['postexecute'],
        '###JOBIDNAME###' : self.config['jobid_name'],
        '###QUEUENAME###' : self.config['queue_name'],
        '###HEARTBEATFREQUENCE###' : self.config['heartbeat_frequency'],
        '###INPUT_DIR###' : repr(job.getStringInputDir()),

        '###GANGADIR###' : repr(getConfig('System')['GANGA_PYTHONPATH'])
        }

        for k, v in replace_dict.iteritems():
            text = text.replace(str(k), str(v))

        logger.debug('subjob input sandbox %s ', subjob_input_sandbox)
        logger.debug('master input sandbox %s ', master_input_sandbox)

        from Ganga.GPIDev.Lib.File import FileBuffer

        return job.getInputWorkspace().writefile(FileBuffer('__jobscript__', text), executable=1)
示例#16
0
    def configure(self, masterappconfig):

        exefile = 'skimFromCSV.exe'
        exe = 'skimFromCSV.exe'
        # exe = '/'.join([os.getenv("RECONUTILSROOT"),os.getenv("CMTCONFIG"),exefile])
        # if not isfile(exe):
        #   raise ApplicationConfigurationError(None,'Cannot find executable '+exe)

        job = self.getJobObject()

        if self.cmtsetup == None:
            raise ApplicationConfigurationError('No cmt setup script given.')
        if not isfile(self.cmtsetup):
            raise ApplicationConfigurationError(
                'Cannot find cmt setup script ' + self.cmtsetup)

        # Copy CSV file to inputdir. Done in splitter for subjobs.
        if not isfile(self.csvfile):
            raise ApplicationConfigurationError('Cannot find CSV file ' +
                                                self.csvfile)
        from shutil import copy
        tmpcsv = os.path.join(job.inputdir, os.path.basename(self.csvfile))
        if not os.path.exists(tmpcsv):
            copy(self.csvfile, job.inputdir)
        self.csvfile = tmpcsv

        args = []

        args.append('-O')
        args.append('file=' + self.csvfile)

        if self.outputfile == None:
            raise ApplicationConfigurationError(
                'No output file given. Fill the outputfile variable.')

        args.append('-o')
        args.append(self.outputfile)

        # Read the CSV file
        csvfile = open(self.csvfile, 'rb')
        run_subrun = []
        for line in csvfile:
            if line[0] == '#':
                continue
            row = line.split(",")
            if len(row) < 3:
                print "Ignoring badly-formatted line:", ",".join(row)
                continue

            r_sr = "%(run)08d-%(subrun)04d" % {
                "run": int(row[0]),
                "subrun": int(row[1])
            }
            if r_sr not in run_subrun:
                run_subrun.append(r_sr)

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                'The inputdata variable is not defined.')
        rawFileList = job.inputdata.get_dataset_filenames()
        if len(rawFileList) < 1:
            raise ApplicationConfigurationError('No input data file given.')

        fileList = []
        for r_sr in run_subrun:
            for rfile in rawFileList:
                if rfile.find(r_sr) > -1:
                    fileList.append(rfile)
                    continue
        if not len(fileList):
            raise ApplicationConfigurationError(
                'No file matching the run_subrun in the CSV file %s.' %
                self.csvfile)
        args.extend(fileList)

        argsStr = ' '.join(args)
        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        script += '${RECONUTILSROOT}/${CMTCONFIG}/' + exe + ' ' + argsStr + '\n'
        # Little trick to be able to control the final destination
        # of the subjob's CSV file with SandboxFile or MassStorageFile
        if job.master is not None:
            script += 'cp %s .' % self.csvfile

        from Ganga.GPIDev.Lib.File import FileBuffer

        if exefile.find('.exe') > -1:
            scriptname = exefile.replace('.exe', '.sh')
        else:
            scriptname = exefile + '.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#17
0
    def configure(self, masterappconfig):
        if self.cmtsetup == None:
            raise ApplicationConfigurationError(None,
                                                'No cmt setup script given.')

        # __________ Reco first ____________
        reco_args = convertIntToStringArgs(self.reco_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in reco_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in reco_args. You must use the filenamesubstr and reconewstr variables instead to define an output.'
                )

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                None, 'The inputdata variable is not defined.')
        fileList = job.inputdata.get_dataset_filenames()
        if len(fileList) < 1:
            raise ApplicationConfigurationError(None,
                                                'No input data file given.')

        firstFile = fileList[0].split('/')[-1]
        # Define the output
        reco_args.append('-o')
        if self.filenamesubstr == None:
            reco_outputfile = 'recoOutput.root'
        else:
            reco_outputfile = firstFile.replace(self.filenamesubstr,
                                                self.reconewstr)

        reco_args.append(reco_outputfile)

        # Just to define the output before the potentially long list of input files
        reco_args.extend(fileList)

        # __________ Now VFT ____________
        vft_args = convertIntToStringArgs(self.vft_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in vft_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    None,
                    'Option "-o" given in vft_args. You must use the filenamesubstr and reconewstr variables instead to define an output.'
                )

        # Define the output
        vft_args.append('-o')
        if self.filenamesubstr == None:
            vft_outputfile = 'vftOutput.root'
        else:
            vft_outputfile = firstFile.replace(self.filenamesubstr,
                                               self.vftnewstr)
        vft_args.append(vft_outputfile)

        # Use the reco output as an input for the VFT processing
        # or use the input file list if running in VFT only mode.
        if self.vft_only:
            vft_args.extend(fileList)
        else:
            vft_args.append(reco_outputfile)

        reco_argsStr = ' '.join(reco_args)
        vft_argsStr = ' '.join(vft_args)
        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        if not self.vft_only:
            script += self.reco_exe + ' ' + reco_argsStr + '\n'
        script += self.vft_exe + ' ' + vft_argsStr + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        scriptname = 'RecoPlusVFT.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#18
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = app.getJobObject()

        logger.debug("Loading pickle files")

        #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job)))
        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take sare of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################
        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        logger.debug("Adding info from pickle files")

        if os.path.exists(share_path):
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])

            outputsandbox.extend(
                [f.namePattern for f in job.non_copyable_outputfiles])

            outputsandbox.extend([f.namePattern for f in job.outputfiles])
            outputsandbox = unique(outputsandbox)
        #######################################################################

        logger.debug("Doing XML Catalog stuff")

        data = job.inputdata
        data_str = ''
        if data:
            logger.debug("Returning options String")
            data_str = data.optionsString()
            if data.hasLFNs():
                logger.debug("Returning Catalogue")
                inputsandbox.append(
                    FileBuffer('catalog.xml', data.getCatalog()))
                cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n'
                data_str += cat_opts

        logger.debug("Doing splitter_data stuff")
        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data.py', data_str))

        logger.debug("Doing GaudiPython stuff")

        cmd = 'python ./gaudipython-wrapper.py'
        opts = ''
        if is_gaudi_child(job.application):
            opts = 'options.pkl'
            cmd = 'gaudirun.py ' + \
                ' '.join(job.application.args) + ' %s data.py' % opts

        logger.debug("Setting up script")

        script = script_generator(
            create_runscript(job.application.newStyleApp),
            remove_unreplaced=False,
            OPTS=opts,
            PROJECT_OPTS=job.application.setupProjectOptions,
            APP_NAME=job.application.appname,
            APP_VERSION=job.application.version,
            APP_PACKAGE=job.application.package,
            PLATFORM=job.application.platform,
            CMDLINE=cmd,
            XMLSUMMARYPARSING=getXMLSummaryScript())  # ,
        # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ''))

        logger.debug("Returning StandardJobConfig")

        return StandardJobConfig(FileBuffer('gaudi-script.py',
                                            script,
                                            executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
示例#19
0
def get_input_sandbox(extra):
    sandbox = []
    sandbox += extra.input_files[:]
    sandbox += [FileBuffer(n, s) for (n, s) in extra.input_buffers.items()]
    logger.debug("Input sandbox: %s", str(sandbox))
    return sandbox
示例#20
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)
        input_data, parametricinput_data = dirac_inputdata(app)
        #        outputdata,   outputdata_path      = dirac_ouputdata(app)

        job = stripProxy(app).getJobObject()
        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        commandline = []
        commandline.append(app.exe)
        if isType(app.exe, File):
            #logger.info("app: %s" % str(app.exe.name))
            #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name))
            #logger.info("EXE: %s" % str(fileName))
            #inputsandbox.append(File(name=fileName))
            inputsandbox.append(app.exe)
            commandline[0] = os.path.join('.', os.path.basename(app.exe.name))
        commandline.extend([str(arg) for arg in app.args])
        logger.debug('Command line: %s: ', commandline)

        #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py")
        exe_script_name = 'exe-script.py'

        logger.info("Setting Command to be: '%s'" % repr(commandline))

        inputsandbox.append(
            FileBuffer(
                name=exe_script_name,
                contents=script_generator(
                    exe_script_template(),
                    #remove_unreplaced = False,
                    # ,
                    COMMAND=repr(commandline),
                    OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(
                        job, '    ')),
                executable=True))

        contents = script_generator(
            exe_script_template(),
            COMMAND=repr(commandline),
            OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(
                job, '    '))

        #logger.info("Script is: %s" % str(contents))

        from os.path import abspath, expanduser

        for this_file in job.inputfiles:
            if isinstance(this_file, LocalFile):
                for name in this_file.getFilenameList():
                    inputsandbox.append(File(abspath(expanduser(name))))
            elif isinstance(this_file, DiracFile):
                name = this_file.lfn
                if isinstance(input_data, list):
                    input_data.append(name)
                else:
                    input_data = [name]

        dirac_outputfiles = dirac_outputfile_jdl(outputfiles,
                                                 config['RequireDefaultSE'])

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            diracAPI_script_template(),
            DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac',
            DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job',
            DIRAC_OBJECT='Dirac()',
            JOB_OBJECT='Job()',
            NAME=mangle_job_name(app),
            # os.path.basename(exe_script_path),
            EXE=exe_script_name,
            # ' '.join([str(arg) for arg in app.args]),
            EXE_ARG_STR='',
            EXE_LOG_FILE='Ganga_Executable.log',
            ENVIRONMENT=None,  # app.env,
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=dirac_outputfiles,
            OUTPUT_PATH="",  # job.fqid,
            SETTINGS=diracAPI_script_settings(app),
            DIRAC_OPTS=job.backend.diracOpts,
            REPLICATE='True' if config['ReplicateOutputData'] else '',
            # leave the sandbox for altering later as needs
            # to be done in backend.submit to combine master.
            # Note only using 2 #s as auto-remove 3
            INPUT_SANDBOX='##INPUT_SANDBOX##')

        #logger.info("dirac_script: %s" % dirac_script)

        #logger.info("inbox: %s" % str(unique(inputsandbox)))
        #logger.info("outbox: %s" % str(unique(outputsandbox)))

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
示例#21
0
文件: ARC.py 项目: henryiii/ganga
    def preparejob(self, jobconfig, master_job_sandbox):
        '''Prepare the JDL'''

        script = self.__jobWrapperTemplate__()

        job = self.getJobObject()
        inpw = job.getInputWorkspace()

        wrapperlog = '__jobscript__.log'

        import Ganga.Core.Sandbox as Sandbox

        # FIXME: check what happens if 'stdout','stderr' are specified here
        script = script.replace('###OUTPUTSANDBOX###',
                                repr(jobconfig.outputbox))

        script = script.replace('###APPLICATION_NAME###',
                                getName(job.application))
        script = script.replace('###APPLICATIONEXEC###',
                                repr(jobconfig.getExeString()))
        script = script.replace('###APPLICATIONARGS###',
                                repr(jobconfig.getArguments()))

        from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles

        script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###',
                                getWNCodeForOutputPostprocessing(job, '    '))

        script = script.replace('###DOWNLOADINPUTFILES###',
                                getWNCodeForDownloadingInputFiles(job, '    '))

        if jobconfig.env:
            script = script.replace('###APPLICATIONENVS###',
                                    repr(jobconfig.env))
        else:
            script = script.replace('###APPLICATIONENVS###', repr({}))

        script = script.replace('###WRAPPERLOG###', repr(wrapperlog))
        import inspect
        script = script.replace('###INLINEMODULES###',
                                inspect.getsource(Sandbox.WNSandbox))

        mon = job.getMonitoringService()

        self.monInfo = None

        # set the monitoring file by default to the stdout
        if isinstance(self.monInfo, dict):
            self.monInfo['remotefile'] = 'stdout'

        # try to print out the monitoring service information in debug mode
        try:
            logger.debug('job info of monitoring service: %s' %
                         str(self.monInfo))
        except:
            pass

#       prepare input/output sandboxes
        import Ganga.Utility.files
        from Ganga.GPIDev.Lib.File import File
        from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR
        import inspect

        fileutils = File(inspect.getsourcefile(Ganga.Utility.files),
                         subdir=PYTHON_DIR)
        packed_files = jobconfig.getSandboxFiles() + [fileutils]
        sandbox_files = job.createPackedInputSandbox(packed_files)

        # sandbox of child jobs should include master's sandbox
        sandbox_files.extend(master_job_sandbox)

        # check the input file size and pre-upload larger inputs to the iocache
        lfc_host = ''

        input_sandbox_uris = []
        input_sandbox_names = []

        ick = True

        max_prestaged_fsize = 0
        for f in sandbox_files:

            idx = self.__check_and_prestage_inputfile__(f)

            if not idx:
                logger.error('input sandbox preparation failed: %s' % f)
                ick = False
                break
            else:

                if idx['lfc_host']:
                    lfc_host = idx['lfc_host']

                if idx['remote']:
                    abspath = os.path.abspath(f)
                    fsize = os.path.getsize(abspath)

                    if fsize > max_prestaged_fsize:
                        max_prestaged_fsize = fsize

                    input_sandbox_uris.append(
                        idx['remote'][os.path.basename(f)])

                    input_sandbox_names.append(os.path.basename(
                        urlparse(f)[2]))

                if idx['local']:
                    input_sandbox_uris += idx['local']
                    input_sandbox_names.append(os.path.basename(f))

        if not ick:
            logger.error('stop job submission')
            return None

        # determin the lcg-cp timeout according to the max_prestaged_fsize
        # - using the assumption of 1 MB/sec.
        max_prestaged_fsize = 0
        lfc_host = ''
        transfer_timeout = config['SandboxTransferTimeout']
        predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0))

        if predict_timeout > transfer_timeout:
            transfer_timeout = predict_timeout

        if transfer_timeout < 60:
            transfer_timeout = 60

        script = script.replace('###TRANSFERTIMEOUT###',
                                '%d' % transfer_timeout)

        # update the job wrapper with the inputsandbox list
        script = script.replace(
            '###INPUTSANDBOX###',
            repr({
                'remote': {},
                'local': input_sandbox_names
            }))

        # write out the job wrapper and put job wrapper into job's inputsandbox
        scriptPath = inpw.writefile(FileBuffer(
            '__jobscript_%s__' % job.getFQID('.'), script),
                                    executable=1)
        input_sandbox = input_sandbox_uris + [scriptPath]

        for isb in input_sandbox:
            logger.debug('ISB URI: %s' % isb)

        # compose output sandbox to include by default the following files:
        # - gzipped stdout (transferred only when the JobLogHandler is WMS)
        # - gzipped stderr (transferred only when the JobLogHandler is WMS)
        # - __jobscript__.log (job wrapper's log)
        output_sandbox = [wrapperlog]

        from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns
        for outputSandboxPattern in getOutputSandboxPatterns(job):
            output_sandbox.append(outputSandboxPattern)

        if config['JobLogHandler'] in ['WMS']:
            output_sandbox += ['stdout.gz', 'stderr.gz']

        if len(jobconfig.outputbox):
            output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME]

        # compose ARC XRSL
        xrsl = {
            #'VirtualOrganisation' : config['VirtualOrganisation'],
            'executable': os.path.basename(scriptPath),
            'environment': {
                'GANGA_LCG_VO': config['VirtualOrganisation'],
                'GANGA_LOG_HANDLER': config['JobLogHandler'],
                'LFC_HOST': lfc_host
            },
            #'stdout'                : 'stdout',
            #'stderr'                : 'stderr',
            'inputFiles': input_sandbox,
            'outputFiles': output_sandbox,
            #'OutputSandboxBaseDestURI': 'gsiftp://localhost'
        }

        xrsl['environment'].update({'GANGA_LCG_CE': self.CE})
        #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert()

        # if self.jobtype.upper() in ['NORMAL','MPICH']:
        #xrsl['JobType'] = self.jobtype.upper()
        # if self.jobtype.upper() == 'MPICH':
        #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)')
        # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)')
        #xrsl['NodeNumber'] = self.requirements.nodenumber
        # else:
        #    logger.warning('JobType "%s" not supported' % self.jobtype)
        #    return

        #       additional settings from the job
        if jobconfig.env:
            xrsl['environment'].update(jobconfig.env)

        xrslText = Grid.expandxrsl(xrsl)

        # append any additional requirements from the requirements object
        xrslText += '\n'.join(self.requirements.other)

        logger.debug('subjob XRSL: %s' % xrslText)
        return inpw.writefile(FileBuffer('__xrslfile__', xrslText))
示例#22
0
    def configure(self, masterappconfig):

        self.ana_useropt = convertIntToStringArgs(self.ana_useropt)
        args = []
        args.append('$RECONUTILSROOT/macros/grtf_VFT/make_ana.py')

        job = self.getJobObject()

        if self.cmtsetup == None:
            raise ApplicationConfigurationError('No cmt setup script given.')

        if not self.tree == None:
            args.append('-t')
            args.append(self.tree)

        if not self.ana_custom == None:
            args.append('-c')
            args.append(self.ana_custom)

        if not self.ana_useropt == None:
            for UsrOpt in self.ana_useropt:
                args.append('-O')
                args.append(UsrOpt)

        if self.ana_output == None:
            raise ApplicationConfigurationError(
                'No output file given. Fill the ana_output variable.')
        else:
            args.append('-o')
            args.append(self.ana_output)

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                'The inputdata variable is not defined.')
        fileList = job.inputdata.get_dataset_filenames()
        if len(fileList) < 1:
            raise ApplicationConfigurationError('No input data file given.')
        args.extend(fileList)

        if self.run_pdf:
            args.append('&&')
            args.append(
                '$ND280ANALYSISTOOLSROOT/macros/grtf/pdfgen/make_pdf.py')

            if not 'ana_output' in [
                    self.pdf_rdp, self.pdf_mcp, self.pdf_oldrdp,
                    self.pdf_oldmcp
            ]:
                raise ApplicationConfigurationError(
                    'None of the pdf inputs is set to use the make_ana.py output. Please set "pdf_rdp", "pdf_mcp", "pdf_oldrdp", or "pdf_oldmcp" to the value "ana_output"'
                )

            for key in ['pdf_rdp', 'pdf_mcp', 'pdf_oldrdp', 'pdf_oldmcp']:
                if getattr(self, key) == 'ana_output':
                    setattr(self, key, self.ana_output)

            argDict = {
                '--custom': 'pdf_custom',
                '--title': 'pdf_title',
                '--rdp': 'pdf_rdp',
                '--mcp': 'pdf_mcp',
                '--oldrdp': 'pdf_oldrdp',
                '--oldmcp': 'pdf_oldmcp',
                '--rdptitle': 'pdf_rdptitle',
                '--mcptitle': 'pdf_mcptitle',
                '--oldrdptitle': 'pdf_oldrdptitle',
                '--oldmcptitle': 'pdf_oldmcptitle',
                '--out': 'pdf_output'
            }
            #          argDict = { '--custom': self.pdf_custom, '--title': self.pdf_title, '--rdp': self.pdf_rdp, '--mcp': self.pdf_mcp, '--oldrdp': self.pdf_oldrdp, '--oldmcp': self.pdf_oldmcp, '--rdptitle': self.pdf_rdptitle, '--mcptitle': self.pdf_mcptitle, '--oldrdptitle': self.pdf_oldrdptitle, '--oldmcptitle': self.pdf_oldmcptitle, '--out': self.pdf_output }

            for key in argDict:
                if not getattr(self, argDict[key]) == None:
                    args.append(key + '=' + getattr(self, argDict[key]))

            for opt in self.pdf_options:
                for key in argDict:
                    if opt.find(key) > -1 and not getattr(
                            self, argDict[key]) == None:
                        raise ApplicationConfigurationError(
                            'The make_pdf.py command line argument %s was set through both the ganga application variable "%s" and pdf_options "%s". Use only one of them.'
                            % (key, argDict[key], opt))
                args.append(opt)

        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        script += ' '.join(args) + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        scriptname = 'make_ana.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#23
0
文件: SAGA.py 项目: pseyfert/ganga
    def makesagajobdesc(self, job, jobconfig):

        ## We need a unique subdirectory per job to avoid input/output file clashes
        ## The easiest way to do this is with a UUID-style directory name
        wd_uuid = "ganga-job-"
        #if job.name == '':
        #    wd_uuid += "noname-"
        #else:
        #    wd_uuid +=  job.name + "-"

        import uuid
        wd_uuid += str(uuid.uuid4())

        job.backend.workdir_uuid = wd_uuid

        ## Now we need to create a wrapper script on the fly. The wrapper
        ## script will be be transfered to the execution host and takes care
        ## of the archive unpacking as well as job monitoring / reporting.
        ws = SAGAWrapperScript()

        import inspect
        import Ganga.Core.Sandbox as Sandbox
        import Ganga.Utility as Utility
        ws.setInlineModules(inspect.getsource(Sandbox.WNSandbox))

        ws.setExecutable(jobconfig.getExeString())
        ws.setArguments(jobconfig.getArgStrings())
        ws.setOutputPatterns(jobconfig.outputbox)
        ws.setInputSandbox("_input_sandbox_" + str(job.id) + ".tgz")

        text = ws.getScript()

        jd = saga.job.description()
        logger.debug("setting up new saga job with id: %s", job.id)

        # create jobscript in input sandbox
        from Ganga.GPIDev.Lib.File import FileBuffer
        jobscript = job.getInputWorkspace().writefile(FileBuffer(
            '__jobscript__', text),
                                                      executable=1)

        logger.debug("  * created new jobscript wrapper: %s", jobscript)

        # workdir
        if len(job.backend.workdir) != 0:
            jd.working_directory = job.backend.workdir
            logger.debug("  * backend.workdir -> saga.workdir: %s",
                         jd.working_directory)
        else:  #default to the remote filesystem path component
            jd.working_directory = saga.url(self.filesystem_url + "/" +
                                            self.workdir_uuid + "/").path
            logger.debug(
                "  * saga.workdir: %s (not given - extracted from 'filesystem_url')",
                jd.working_directory)

        # executable
        exe = jd.working_directory + '__jobscript__'
        jd.executable = exe  #jobconfig.getExeString()
        logger.debug("  * application.exe -> saga.executable: %s",
                     jd.executable)

        # arguments
        argList = jobconfig.getArgStrings()
        #for arg in job.application.args:
        #    argList.append( arg ) #"\\'%s\\'" % arg )
        if len(argList) != 0:
            jd.arguments = argList
            logger.debug("  * application.args -> saga.arguments: %s",
                         jd.arguments)

        # environment
        envList = []
        for k, v in job.application.env.items():
            envList.append(k + "=" + v)  #"\\'%s\\'" % arg )
        if len(envList) != 0:
            jd.environment = envList
            logger.debug("  * application.env -> saga.environment: %s",
                         jd.environment)

        # queue
        if len(job.backend.queue) != 0:
            jd.queue = job.backend.queue
            logger.debug("  * backend.queue -> saga.queue: %s", jd.queue)

        # allocation
        if len(job.backend.allocation) != 0:
            jd.job_project = [job.backend.allocation]
            logger.debug("  * backend.allocation -> saga.job_project: %s",
                         jd.job_project)

        # spmd_variation
        if len(job.backend.spmd_variation) != 0:
            jd.spmd_variation = job.backend.spmd_variation
            logger.debug(
                "  * backend.spmd_variation -> saga.spmd_variation: %s",
                jd.spmd_variation)

        # number_of_processes
        if len(job.backend.number_of_processes) != 0:
            jd.number_of_processes = job.backend.number_of_processes
            logger.debug(
                "  * backend.number_of_processes -> saga.number_of_processes: %s",
                jd.number_of_processes)

        ## We have to create special filenames for stdout/stderr redirection
        ## To avoid name clashes, we append a UUID to the filename.
        path_component = saga.url(self.filesystem_url + "/" +
                                  self.workdir_uuid + "/")

        try:
            d = saga.filesystem.directory(path_component,
                                          saga.filesystem.Create)
            logger.debug(
                "  * created output/working directory on the remote system: %s",
                path_component)

        except saga.exception as e:
            logger.error(
                'exception caught while creating output/working directory: %s',
                e.get_all_messages())
            self.getJobObject().updateStatus("failed")

        ## STDOUT
        self.saga_job_out = path_component.url + "/out.log"
        #jd.output =  saga.url(self.saga_job_out).path

        logger.debug("  * stdout should become available here: %s",
                     saga.url(self.saga_job_out).url)

        ## STDERR
        self.saga_job_err = path_component.url + "/err.log"
        #jd.error = saga.url(self.saga_job_err).path

        logger.debug("  * stderr should become available here: %s",
                     saga.url(self.saga_job_err).url)

        return jd
示例#24
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = stripProxy(app).getJobObject()

        if job.inputdata:
            if not job.splitter:
                if len(job.inputdata) > 100:
                    raise BackendError(
                        "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!"
                    )

        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        data_str = 'import os\n'
        data_str += 'execfile(\'data.py\')\n'

        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data-wrapper.py', data_str))

        input_data = []

        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take care of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################

        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        if os.path.exists(share_path):
            #        if not os.path.exists(share_path):
            # raise GangaException('could not find the parser')
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            outdata_files = [
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                output_file for output_file in outdata_files
                if not isType(output_file, DiracFile)
            ])
            outbox_files = [
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                outbox_file for outbox_file in outbox_files
                if not isType(outbox_file, DiracFile)
            ])

            outputsandbox = [
                f.namePattern for f in job.non_copyable_outputfiles
            ]

            outputsandbox.extend([
                f.namePattern for f in job.outputfiles
                if not isType(f, DiracFile)
            ])
            outputsandbox = unique(outputsandbox)  # + outbox[:])
        #######################################################################

        input_data_dirac, parametricinput_data = dirac_inputdata(
            job.application)

        if input_data_dirac is not None:
            for f in input_data_dirac:
                if isType(f, DiracFile):
                    input_data.append(f.lfn)
                elif isType(f, str):
                    input_data.append(f)
                else:
                    raise ApplicationConfigurationError(
                        "Don't know How to handle anythig other than DiracFiles or strings to LFNs!"
                    )

        commandline = "python ./gaudipython-wrapper.py"
        if is_gaudi_child(app):
            commandline = 'gaudirun.py '
            commandline += ' '.join([str(arg) for arg in app.args])
            commandline += ' options.pkl data-wrapper.py'
        logger.debug('Command line: %s: ', commandline)

        gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(),
                                         "gaudi-script.py")

        script_generator(
            gaudi_script_template(),
            #remove_unreplaced = False,
            outputfile_path=gaudi_script_path,
            PLATFORM=app.platform,
            COMMAND=commandline,
            XMLSUMMARYPARSING=getXMLSummaryScript()  # ,
            #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
        )

        #logger.debug( "input_data %s" % str( input_data ) )

        # We want to propogate the ancestor depth to DIRAC when we have
        # inputdata set
        if job.inputdata is not None and isType(job.inputdata, LHCbDataset):

            # As the RT Handler we already know we have a Dirac backend
            if type(job.backend.settings) is not dict:
                raise ApplicationConfigurationError(
                    None, 'backend.settings should be a dict')

            if 'AncestorDepth' in job.backend.settings:
                ancestor_depth = job.backend.settings['AncestorDepth']
            else:
                ancestor_depth = job.inputdata.depth
        else:
            ancestor_depth = 0

        lhcbdirac_script_template = lhcbdiracAPI_script_template()

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac
        # remove after Ganga6 release
        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            lhcbdirac_script_template,
            DIRAC_IMPORT=
            'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
            DIRAC_JOB_IMPORT=
            'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
            DIRAC_OBJECT='DiracLHCb()',
            JOB_OBJECT='LHCbJob()',
            NAME=mangle_job_name(app),
            APP_NAME=stripProxy(app).appname,
            APP_VERSION=app.version,
            APP_SCRIPT=gaudi_script_path,
            APP_LOG_FILE='Ganga_%s_%s.log' %
            (stripProxy(app).appname, app.version),
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles,
            # job.fqid,#outputdata_path,
            OUTPUT_PATH="",
            OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'],
            SETTINGS=diracAPI_script_settings(job.application),
            DIRAC_OPTS=job.backend.diracOpts,
            PLATFORM=app.platform,
            REPLICATE='True'
            if getConfig('DIRAC')['ReplicateOutputData'] else '',
            ANCESTOR_DEPTH=ancestor_depth,
            ## This is to be modified in the final 'submit' function in the backend
            ## The backend also handles the inputfiles DiracFiles ass appropriate
            INPUT_SANDBOX='##INPUT_SANDBOX##')
        logger.debug("prepare: LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
示例#25
0
    def configure(self, masterappconfig):
        if self.cmtsetup == None:
            raise ApplicationConfigurationError('No cmt setup script given.')

        # __________ Reco first ____________
        reco_args = convertIntToStringArgs(self.reco_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in reco_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    'Option "-o" given in reco_args. You must use the filenamesubstr and reconewstr variables instead to define an output.'
                )

        # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox
        if job.inputdata == None:
            raise ApplicationConfigurationError(
                'The inputdata variable is not defined.')
        fileList = job.inputdata.get_dataset_filenames()
        if len(fileList) < 1:
            raise ApplicationConfigurationError('No input data file given.')

        firstFile = fileList[0].split('/')[-1]
        # Define the output
        reco_args.append('-o')
        if self.filenamesubstr == None:
            reco_outputfile = 'recoOutput.root'
        else:
            reco_outputfile = firstFile.replace(self.filenamesubstr,
                                                self.reconewstr)

        reco_args.append(reco_outputfile)

        # Just to define the output before the potentially long list of input files
        reco_args.extend(fileList)

        # __________ Now oaAnalysis ____________
        anal_args = convertIntToStringArgs(self.anal_args)

        job = self.getJobObject()

        # Need to handle the possibility of multiple output files !
        # setup the output file
        for arg in anal_args:
            if arg == '-o':
                raise ApplicationConfigurationError(
                    'Option "-o" given in anal_args. You must use the filenamesubstr and reconewstr variables instead to define an output.'
                )

        # Define the output
        anal_args.append('-o')
        if self.filenamesubstr == None:
            anal_outputfile = 'analOutput.root'
        else:
            anal_outputfile = firstFile.replace(self.filenamesubstr,
                                                self.analnewstr)
        anal_args.append(anal_outputfile)

        # Now add the input file
        anal_args.append(reco_outputfile)

        reco_argsStr = ' '.join(reco_args)
        anal_argsStr = ' '.join(anal_args)
        # Create the bash script and put it in input dir.
        script = '#!/bin/bash\n'
        script += 'source ' + self.cmtsetup + '\n'
        script += 'RunOARecon.exe ' + reco_argsStr + '\n'
        script += 'RunOAAnalysis.exe ' + anal_argsStr + '\n'

        from Ganga.GPIDev.Lib.File import FileBuffer

        scriptname = 'oaReconPlusoaAnalysis.sh'
        job.getInputWorkspace().writefile(FileBuffer(scriptname, script),
                                          executable=1)

        self._scriptname = job.inputdir + scriptname

        return (None, None)
示例#26
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """
        This function prepares the application of the actual job being submitted, master or not
        Args:
            app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured
            appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies
            appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job
            jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app
        """

        # Construct some common objects used in job submission here
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app, hasOtherInputData=True)


        job = app.getJobObject()

        # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job
        exe_script_name = 'im3shape-script.py'
        output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size)
        im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini
                                   app.catalog, output_filename, # catalog, output
                                   str(app.rank), str(app.size) ])

        full_cmd = app.exe_name + ' ' + im3shape_args

        outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)]

        inputsandbox.append(FileBuffer( name=exe_script_name,
                                        contents=script_generator(Im3Shape_script_template(),
                                                                  ## ARGS for app from job.app
                                                                  RUN_DIR = app.run_dir,
                                                                  BLACKLIST = os.path.basename(app.blacklist.namePattern),
                                                                  COMMAND = full_cmd,
                                                                  ## Stuff for Ganga
                                                                  OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]),
                                                                  OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    '),
                                                                 ),
                                        executable=True)
                            )

        # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded
        app_file_list = [app.im3_location, app.ini_location, app.blacklist]

        app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)]
        job.inputfiles.extend(app_file_list)

        # Slightly mis-using this here but it would be nice to have these files
        #job.inputfiles.extend(job.inputdata)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(diracAPI_script_template(),
                DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac',
                DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job',
                DIRAC_OBJECT = 'Dirac()',
                JOB_OBJECT = 'Job()',
                NAME = mangle_job_name(app),
                EXE = exe_script_name,
                EXE_ARG_STR = '',
                EXE_LOG_FILE = 'Ganga_Executable.log',
                ENVIRONMENT = None,
                INPUTDATA = input_data,
                PARAMETRIC_INPUTDATA = parametricinput_data,
                OUTPUT_SANDBOX = API_nullifier(outputsandbox),
                OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False),
                OUTPUT_PATH = "",  # job.fqid,
                SETTINGS = diracAPI_script_settings(app),
                DIRAC_OPTS = job.backend.diracOpts,
                REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
                # leave the sandbox for altering later as needs
                # to be done in backend.submit to combine master.
                # Note only using 2 #s as auto-remove 3
                INPUT_SANDBOX = '##INPUT_SANDBOX##'
                )


        return StandardJobConfig(dirac_script,
                inputbox=unique(inputsandbox),
                outputbox=unique(outputsandbox))