示例#1
0
 def setUp(self):
     self.dd = {
         'application':
         ComponentItem(category='applications'),
         'backend':
         ComponentItem(category='backends'),
         'name':
         SimpleItem('', comparable=0),
         'workdir':
         SimpleItem(defvalue=None,
                    type='string',
                    transient=1,
                    protected=1,
                    comparable=0),
         'status':
         SimpleItem(defvalue='new', protected=1, comparable=0),
         'id':
         SimpleItem(defvalue=None,
                    typelist=[str],
                    protected=1,
                    comparable=0),
         'inputbox':
         FileItem(defvalue=[], sequence=1),
         'outputbox':
         FileItem(defvalue=[], sequence=1),
         'overriden_copyable':
         SimpleItem(defvalue=None, protected=1, copyable=1),
         'plain_copyable':
         SimpleItem(defvalue=None, copyable=0)
     }
     self.s = Schema(Version(1, 0), self.dd)
示例#2
0
class CustomMerger(IMerger):
    """User tool for writing custom merging tools with Python

    Allows a script to be supplied that performs the merge of some custom file type.
    The script must be a python file which defines the following function:

    def merge(file_list, output_file):

        #perform the merge
        if not success:
            return -1
        else:
            return 0

    This module will be imported and used by the CustomMerger. The file_list is a
    list of paths to the files to be merged. output_file is a string path for
    the output of the merge. This file must exist by the end of the merge or the
    merge will fail. If the merge cannot proceed, then the function should return a 
    non-zero integer.

    Clearly this tool is provided for advanced ganga usage only, and should be used with
    this in mind.

    """
    _category = 'postprocessor'
    _name = 'CustomMerger'
    _schema = IMerger._schema.inherit_copy()
    _schema.datadict['module'] = FileItem(
        defvalue=None, doc='Path to a python module to perform the merge.')

    def mergefiles(self, file_list, output_file):

        import os
        if not os.path.exists(self.module.name):
            raise PostProcessException(
                "The module '&s' does not exist and so merging will fail.",
                self.module.name)
        result = False
        try:
            ns = {
                'file_list': copy.copy(file_list),
                'output_file': copy.copy(output_file)
            }
            execfile(self.module.name, ns)
            exec('_result = mergefiles(file_list,output_file)', ns)
            result = ns.get('_result', result)
        except Exception as e:
            raise PostProcessException(
                'There was a problem executing the custom merge: %s. Merge will fail.'
                % e)
        if result is not True:
            raise PostProcessException(
                'The custom merge did not return True, merge will fail.')
        return self.success
示例#3
0
class CustomChecker(IChecker):
    """User tool for writing custom check with Python.
       Make a file, e.g customcheck.py,
       In that file, do something like:

       def check(j):
           if j has passed:
               return True
           else: 
               return False


       When the job is about to be completed, Ganga will call this function and fail the job if False is returned.

    """
    _category = 'postprocessor'
    _name = 'CustomChecker'
    _schema = IChecker._schema.inherit_copy()
    _schema.datadict['module'] = FileItem(
        defvalue=None, doc='Path to a python module to perform the check.')
    _exportmethods = ['check']

    def check(self, job):
        if (self.module is None) or not self.module:
            raise PostProcessException(
                "No module is specified and so the check will fail.")
        if (self.module.name is None) or not os.path.isfile(self.module.name):
            raise PostProcessException(
                "The module '%s' does not exist and so CustomChecker will do nothing!"
                % (self.module.name))

        result = None

        try:
            ns = {'job': job}
            execfile(self.module.name, ns)
            exec('_result = check(job)', ns)
            result = ns.get('_result', result)
        except Exception as e:
            raise PostProcessException(
                'There was a problem with executing the module: %s, CustomChecker will do nothing!'
                % e)
        if result is not True and result is not False:
            raise PostProcessException(
                'The custom check module did not return True or False, CustomChecker will do nothing!'
            )
        if result is not True:
            logger.info('The custom check module returned False for job(%s)',
                        job.fqid)
            return self.failure
        return self.success
示例#4
0
class DSTMerger(AbstractMerger):
    """A merger object for  DST files

    The merger uses DaVinci to combine DST files that have
    been returned *locally* in a job's outputsandbox. As such
    it is mainly useful for microDST files.
    
    The usage is as with other merger objects. See the help for 
    TextMerger or RootMerger for more details.
    
    Example:
    
    dm = DSTMerger()
    dm.files = ['dv.dst']
    
    This object can be attached to a job object or 
    used to merge a list of jobs with its merge 
    method.
    
    It is possible to overide the default opts file
    for performing the merge. A new opts file can 
    be provided via the 'merge_opts' field. This should
    be done with care, as some opts are assumed when
    writing the files for output.
    
    """
    
    _category = 'mergers'
    _exportmethods = ['merge']
    _name = 'DSTMerger'
    _schema = AbstractMerger._schema.inherit_copy()
    docstr = 'Path to a options file to use when merging.'
    _schema.datadict['merge_opts'] = FileItem(defvalue=None, doc=docstr)
    docstr = 'The version of DaVinci to use when merging. (e.g. v19r14)'
    _schema.datadict['version'] = SimpleItem(defvalue='', doc=docstr)

    def __init__(self):
        super(DSTMerger,self).__init__(_DSTMergeTool())

    def merge(self, jobs, outputdir=None, ignorefailed=None, overwrite=None):
        self.merge_tool.merge_opts = self.merge_opts
        self.merge_tool.version = self.version
        
        logger.debug("zhangxm log: begin to register file!\n")        
       
        # do file registering 
        for sj in jobs:
            if sj.status=='completed':
               sj.application.register()
示例#5
0
class ITransform(GangaObject):
    _schema = Schema(Version(1, 0), {
        'status': SimpleItem(defvalue='new', protected=1, copyable=1, doc='Status - running, pause or completed', typelist=[str]),
        'name': SimpleItem(defvalue='Simple Transform', doc='Name of the transform (cosmetic)', typelist=[str]),
        'application': ComponentItem('applications', defvalue=None, optional=1, load_default=False, doc='Application of the Transform.'),
        'inputsandbox': FileItem(defvalue=[], sequence=1, doc="list of File objects shipped to the worker node "),
        'outputsandbox': SimpleItem(defvalue=[], typelist=[str], sequence=1, doc="list of filenames or patterns shipped from the worker node"),
        'backend': ComponentItem('backends', defvalue=None, optional=1, load_default=False, doc='Backend of the Transform.'),
        'splitter': ComponentItem('splitters', defvalue=None, optional=1, load_default=False, doc='Splitter used on each unit of the Transform.'),
        'postprocessors': ComponentItem('postprocessor', defvalue=None, doc='list of postprocessors to run after job has finished'),
        'merger': ComponentItem('mergers', defvalue=None, hidden=1, copyable=0, load_default=0, optional=1, doc='Merger to be done over all units when complete.'),
        'unit_merger': ComponentItem('mergers', defvalue=None, load_default=0, optional=1, doc='Merger to be copied and run on each unit separately.'),
        'copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy all units output to, e.g. Grid dataset -> Local Dataset'),
        'unit_copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy each individual unit output to, e.g. Grid dataset -> Local Dataset'),
        'run_limit': SimpleItem(defvalue=8, doc='Number of times a partition is tried to be processed.', protected=1, typelist=[int]),
        'minor_run_limit': SimpleItem(defvalue=3, doc='Number of times a unit can be resubmitted', protected=1, typelist=[int]),
        'major_run_limit': SimpleItem(defvalue=3, doc='Number of times a junit can be rebrokered', protected=1, typelist=[int]),
        'units': ComponentItem('units', defvalue=[], sequence=1, copyable=1, doc='list of units'),
        'inputdata': ComponentItem('datasets', defvalue=[], sequence=1, protected=1, optional=1, load_default=False, doc='Input datasets to run over'),
        'outputdata': ComponentItem('datasets', defvalue=None, optional=1, load_default=False, doc='Output dataset template'),
        'inputfiles': GangaFileItem(defvalue=[], sequence=1, doc="list of file objects that will act as input files for a job"),
        'outputfiles' : GangaFileItem(defvalue=[], sequence=1, doc="list of OutputFile objects to be copied to all jobs"),
        'metadata': ComponentItem('metadata', defvalue=MetadataDict(), doc='the metadata', protected=1),
        'rebroker_on_job_fail': SimpleItem(defvalue=True, doc='Rebroker if too many minor resubs'),
        'abort_loop_on_submit': SimpleItem(defvalue=True, doc='Break out of the Task Loop after submissions'),
        'required_trfs': SimpleItem(defvalue=[], typelist=[int], sequence=1, doc="IDs of transforms that must complete before this unit will start. NOTE DOESN'T COPY OUTPUT DATA TO INPUT DATA. Use TaskChainInput Dataset for that."),
        'chain_delay': SimpleItem(defvalue=0, doc='Minutes delay between a required/chained unit completing and starting this one', protected=0, typelist=[int]),
        'submit_with_threads': SimpleItem(defvalue=False, doc='Use Ganga Threads for submission'),
        'max_active_threads': SimpleItem(defvalue=10, doc='Maximum number of Ganga Threads to use. Note that the number of simultaneous threads is controlled by the queue system (default is 5)'),
        'info' : SimpleItem(defvalue=[],typelist=[str],protected=1,sequence=1,doc="Info showing status transitions and unit info"),
        'id': SimpleItem(defvalue=-1, protected=1, doc='ID of the Transform', typelist=[int]),
        #'force_single_unit' : SimpleItem(defvalue=False, doc='Force all input data into one Unit'),
    })

    _category = 'transforms'
    _name = 'ITransform'
    _exportmethods = ['addInputData', 'resetUnit', 'setRunLimit', 'getJobs', 'setMinorRunLimit',
                      'setMajorRunLimit', 'getID', 'overview', 'resetUnitsByStatus', 'removeUnusedJobs',
                      'showInfo', 'showUnitInfo', 'pause', 'n_all', 'n_status' ]
    _hidden = 0

    def showInfo(self):
        """Print out the info in a nice way"""
        print("\n".join( self.info ))

    def showUnitInfo(self, uid):
        """Print out the given unit info in a nice way"""
        self.units[uid].showInfo()

    def getJobs(self):
        """Return a list of the currently active job ids"""
        joblist = []
        for u in self.units:
            joblist += u.active_job_ids
        return joblist

    def setMinorRunLimit(self, newRL):
        """Set the number of times a job will be resubmitted before a major resubmit is attempted"""
        self.minor_run_limit = newRL

    def setMajorRunLimit(self, newRL):
        """Set the number of times a job will be rebrokered before the transform is paused"""
        self.major_run_limit = newRL

    def setRunLimit(self, newRL):
        """Set the total (minor+major) number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL

    def overview(self, status=''):
        """Show the status of the units in this transform"""
        for unit in self.units:
            # display colour given state
            o = ""
            o += ("%d:  " % self.units.index(unit)) + unit.name

            # is unit active?
            if unit.active:
                o += " " * (40 - len(o) + 3) + "*"
            else:
                o += " " * (40 - len(o) + 3) + "-"

            # sub job status
            o += "\t %i" % unit.n_status("submitted")
            o += "\t %i" % unit.n_status("running")
            o += "\t %i" % unit.n_status("completed")
            o += "\t %i" % unit.n_status("failed")
            o += "\t %i" % unit.minor_resub_count
            o += "\t %i" % unit.major_resub_count

            # change colour on state
            if unit.status == 'completed':
                o = markup(o, overview_colours["completed"])
            elif not unit.active:
                o = markup(o, overview_colours["bad"])
            elif unit.status == "recreating":
                o = markup(o, overview_colours["attempted"])
            elif len(unit.active_job_ids) == 0:
                o = markup(o, overview_colours["hold"])
            else:
                o = markup(o, overview_colours["running"])

            print(o)


# Special methods:
    def __init__(self):
        super(ITransform, self).__init__()
        self.initialize()

    def _auto__init__(self):
        self.status = 'new'

    def _readonly(self):
        """A transform is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def initialize(self):
        from Ganga.Lib.Localhost.Localhost import Localhost
        self.backend = Localhost()

    def check(self):
        """Check this transform has valid data, etc. and has the correct units"""

        # ignore anything but new transforms
        if self.status != "new":
            return

        # first, validate the transform
        if not self.validate():
            raise ApplicationConfigurationError(
                None, "Validate failed for Transform %s" % self.name)

        self.updateStatus("running")

    def startup(self):
        """This function is used to set the status after restarting Ganga"""
        pass

# Public methods
    def resetUnit(self, uid):
        """Reset the given unit"""
        addInfoString( self, "Reseting Unit %i" % ( uid ) )

        for u in self.units:
            if u.getID() == uid:
                u.reset()
                break

        # find any chained units and mark for recreation
        for trf in self._getParent().transforms:
            for u2 in trf.units:
                for req in u2.req_units:
                    if req == "%d:%d" % (self.getID(), u.getID()) or req == "%d:ALL" % (self.getID()):
                        trf.resetUnit(u2.getID())

        self.updateStatus("running")

    def getID(self):
        """Return the index of this trf in the parent task"""

        # if the id isn't already set, use the index from the parent Task
        if self.id < 0:
            task = self._getParent()
            if not task:
                raise ApplicationConfigurationError(
                    None, "This transform has not been associated with a task and so there is no ID available")
            self.id = task.transforms.index(self)
        
        return self.id

    def run(self, check=True):
        """Sets this transform to running status"""
        if self.status == "new" and check:
            self.check()
        if self.status != "completed":
            self.updateStatus("running")
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.warning("Transform is already completed!")

    def update(self):
        """Called by the parent task to check for status updates, submit jobs, etc."""
        if self.status == "pause" or self.status == "new":
            return 0

        # check for complete required units
        task = self._getParent()
        for trf_id in self.required_trfs:
            if task.transforms[trf_id].status != "completed":
                return 0

        # set the start time if not already set
        if len(self.required_trfs) > 0 and self.units[0].start_time == 0:
            for unit in self.units:
                unit.start_time = time.time() + self.chain_delay * 60 - 1

        # report the info for this transform
        unit_status = { "new":0, "hold":0, "running":0, "completed":0, "bad":0, "recreating":0 }
        for unit in self.units:
            unit_status[unit.status] += 1
         
        info_str = "Unit overview: %i units, %i new, %i hold, %i running, %i completed, %i bad. to_sub %i" % (len(self.units), unit_status["new"], unit_status["hold"],
                                                                                                              unit_status["running"], unit_status["completed"],
                                                                                                              unit_status["bad"], self._getParent().n_tosub())
      
        addInfoString(self, info_str)
                
        # ask the unit splitter if we should create any more units given the
        # current data
        self.createUnits()

        # loop over units and update them ((re)submits will be called here)
        old_status = self.status
        unit_status_list = []

        # find submissions first
        unit_update_list = []
        for unit in self.units:

            if not unit.checkForSubmission() and not unit.checkForResubmission():
                unit_update_list.append(unit)
                continue

            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        # now check for download
        for unit in unit_update_list:
            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for any TaskChainInput completions
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:
                if task.transforms[ds.input_trf_id].status != "completed":
                    return 0

        # update status and check
        for state in ['running', 'hold', 'bad', 'completed']:
            if state in unit_status_list:
                if state == 'hold':
                    state = "running"
                if state != self.status:
                    self.updateStatus(state)
                break

    def createUnits(self):
        """Create new units if required given the inputdata"""

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for chaining
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:

                # check for single unit
                if ds.single_unit:

                    # is there a unit already linked?
                    done = False
                    rec_unit = None
                    for out_unit in self.units:
                        if '%d:ALL' % (ds.input_trf_id) in out_unit.req_units:
                            done = True
                            # check if the unit is being recreated
                            if out_unit.status == "recreating":
                                rec_unit = out_unit
                            break

                    if not done or rec_unit:
                        new_unit = self.createChainUnit(
                            self._getParent().transforms[ds.input_trf_id].units, ds.use_copy_output)
                        if new_unit:
                            self.addChainUnitToTRF(
                                new_unit, ds, -1, prev_unit=rec_unit)

                else:

                    # loop over units in parent trf and create units as
                    # required
                    for in_unit in self._getParent().transforms[ds.input_trf_id].units:

                        # is there a unit already linked?
                        done = False
                        rec_unit = None
                        for out_unit in self.units:
                            if '%d:%d' % (ds.input_trf_id, in_unit.getID()) in out_unit.req_units:
                                done = True
                                # check if the unit is being recreated
                                if out_unit.status == "recreating":
                                    rec_unit = out_unit
                                break

                        if not done or rec_unit:
                            new_unit = self.createChainUnit(
                                [in_unit], ds.use_copy_output)
                            if new_unit:
                                self.addChainUnitToTRF(
                                    new_unit, ds, in_unit.getID(), prev_unit=rec_unit)

    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create a chained unit given the parent outputdata"""
        return IUnit()

    def addChainUnitToTRF(self, unit, inDS, unit_id=-1, prev_unit=None):
        """Add a chained unit to this TRF. Override for more control"""
        if unit_id == -1:
            unit.req_units.append('%d:ALL' % (inDS.input_trf_id))
            unit.name = "Parent: TRF %d, All Units" % (inDS.input_trf_id)
        else:
            unit.req_units.append('%d:%d' % (inDS.input_trf_id, unit_id))
            unit.name = "Parent: TRF %d, Unit %d" % (
                inDS.input_trf_id, unit_id)

        self.addUnitToTRF(unit, prev_unit)

    def addInputData(self, inDS):
        """Add the given input dataset to the list"""
        self.inputdata.append(inDS)

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        if self.status != "completed":
            self.updateStatus("pause")
            #self.status = "pause"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.debug("Transform is already completed!")

    def setRunlimit(self, newRL):
        """Set the number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL
        logger.debug("Runlimit set to %i", newRL)

# Methods that can/should be overridden by derived classes
    def validate(self):
        """Override this to validate that the transform is OK"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        # make sure a path has been selected for any local downloads
        if self.unit_copy_output is not None and isType(self.unit_copy_output, TaskLocalCopy):
            if self.unit_copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        if self.copy_output is not None and isType(self.copy_output, TaskLocalCopy):
            if self.copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        # this is a generic trf so assume the application and splitter will do
        # all the work
        return True

    def addUnitToTRF(self, unit, prev_unit=None):
        """Add a unit to this Transform given the input and output data"""
        if not unit:
            raise ApplicationConfigurationError(None, "addUnitTOTRF failed for Transform %d (%s): No unit specified" % (self.getID(), self.name))

        addInfoString( self, "Adding Unit to TRF...")
        unit.updateStatus("hold")
        unit.active = True
        if prev_unit:
            unit.prev_job_ids += prev_unit.prev_job_ids
            self.units[prev_unit.getID()] = unit
        else:
            self.units.append(unit)
            stripProxy(unit).id = len(self.units) - 1

# Information methods
    def fqn(self):
        task = self._getParent()
        if task:
            return "Task %i Transform %i" % (task.id, task.transforms.index(self))
        else:
            return "Unassigned Transform '%s'" % (self.name)

    def n_active(self):
        return sum([u.n_active() for u in self.units])

    def n_all(self):
        return sum([u.n_all() for u in self.units])

    def n_status(self, status):
        return sum([u.n_status(status) for u in self.units])

    def info(self):
        logger.info(markup("%s '%s'" % (getName(self), self.name), status_colours[self.status]))
        logger.info("* backend: %s" % getName(self.backend))
        logger.info("Application:")
        self.application.printTree()

    def updateStatus(self, status):
        """Update the transform status"""
        self.status = status

    def createUnitCopyOutputDS(self, unit_id):
        """Create a the Copy Output dataset to use with this unit. Overload to handle more than the basics"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        if isType(self.unit_copy_output, TaskLocalCopy):
            logger.warning("Default implementation of createUnitCopyOutputDS can't handle datasets of type '%s'" % getName(self.unit_copy_output))
            return

        # create copies of the Copy Output DS and add Unit name to path
        self.units[unit_id].copy_output = self.unit_copy_output.clone()
        self.units[unit_id].copy_output.local_location = os.path.join(
            self.unit_copy_output.local_location, self.units[unit_id].name.replace(":", "_").replace(" ", "").replace(",", "_"))

    def __setattr__(self, attr, value):

        if attr == 'outputfiles':

            if value != []:
                if self.outputdata is not None:
                    logger.error(
                        'ITransform.outputdata is set, you can\'t set ITransform.outputfiles')
                    return
                elif self.outputsandbox != []:
                    logger.error(
                        'ITransform.outputsandbox is set, you can\'t set ITransform.outputfiles')
                    return

            # reduce duplicate values here, leave only duplicates for LCG,
            # where we can have replicas
            uniqueValuesDict = []
            uniqueValues = []

            for val in value:
                key = '%s%s' % (getName(val), val.namePattern)
                if key not in uniqueValuesDict:
                    uniqueValuesDict.append(key)
                    uniqueValues.append(val)
                elif getName(val) == 'LCGSEFile':
                    uniqueValues.append(val)

            super(ITransform, self).__setattr__(attr, uniqueValues)

        elif attr == 'inputfiles':

            if value != []:
                if self.inputsandbox != []:
                    logger.error(
                        'ITransform.inputsandbox is set, you can\'t set ITransform.inputfiles')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputsandbox is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'inputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyInput']:
                    logger.error(
                        'Use of ITransform.inputsandbox is forbidden, please use ITransform.inputfiles')
                    return

                if self.inputfiles != []:
                    logger.error(
                        'ITransform.inputfiles is set, you can\'t set ITransform.inputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputdata':

            if value is not None:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputdata is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputdata')
                    return
            super(ITransform, self).__setattr__(attr, value)

        else:
            super(ITransform, self).__setattr__(attr, value)

    def resetUnitsByStatus(self, status='bad'):
        """Reset all units of a given status"""
        for unit in self.units:
            if unit.status == status:
                logger.info("Resetting Unit %d, Transform %d..." %
                            (unit.getID(), self.getID()))
                self.resetUnit(unit.getID())

    def checkUnitsAreCompleted(self, parent_units):
        """Check the given parent units are complete"""
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return False

        return True

    def getChainInclExclMasks(self, parent_units):
        """return the include/exclude masks from the TaskChainInput"""
        incl_pat_list = []
        excl_pat_list = []
        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        for parent in parent_units:
            for inds in self.inputdata:
                if isType(inds, TaskChainInput) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        return incl_pat_list, excl_pat_list

    def getParentUnitJobs(self, parent_units, include_subjobs=True):
        """Return the list of parent jobs"""
        job_list = []
        for parent in parent_units:
            job = getJobByID(parent.active_job_ids[0])
            if job.subjobs:
                job_list += job.subjobs
            else:
                job_list += [job]

        return job_list

    def removeUnusedJobs(self):
        """Remove all jobs that aren't being used, e.g. failed jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing job '%d'..." % jid)
                    job = getJobByID(jid)
                    job.remove()
                except Exception as err:
                    logger.debug("removeUnused: %s" % str(err))
                    logger.error("Problem removing job '%d'" % jid)
示例#6
0
文件: Root.py 项目: wireshark10/ganga
class Root(IPrepareApp):
    """
    Root application -- running ROOT

    To run a job in ROOT you need to specify the CINT script to be
    executed. Additional files required at run time (shared libraries,
    source files, other scripts, Ntuples) should be placed in the
    inputsandbox of the job. Arguments can be passed onto the script using
    the 'args' field of the application.

    Defining a Simple Job:

    As an example the script analysis.C in the directory ~/abc might
    contain:

    void analysis(const char* type, int events) {
      std::cout << type << "  " << events << std::endl;
    }

    To define an LCG job on the Ganga command line with this script, 
    running in ROOT version 5.14.00b with the arguments 'MinBias' 
    and 10, you would do the following:

    r = Root()
    r.version = '6.04.02'
    r.script = '~/abc/analysis.C'
    r.args = ['Minbias', 10]

    j = Job(application=r, backend=LCG())

    Using Shared Libraries:

    If you have private shared libraries that should be loaded you need to
    include them in the inputsandbox. Files you want back as a result of
    running your job should be placed in your outputsandbox. 

    The shared library mechanism is particularly useful in order to create 
    a thin wrapper around code that uses precompiled libraries, or
    that has not been designed to work in the CINT environment.

    **For more detailed instructions, see the following Wiki page:**

    https://twiki.cern.ch/twiki/bin/view/ArdaGrid/HowToRootJobsSharedObject

    A summary of this page is given below:

    Consider the follow in CINT script, runMain.C, that makes use of a ROOT 
    compatible shared library:

    void runMain(){
      //set up main, eg command line opts
      char* argv[] = {"runMain.C","--muons","100"};
      int argc = 3;

      //load the shared library
      gSystem->Load("libMain");

      //run the code
      Main m(argv,argc);
      int returnCode = m.run();
    }

    The class Main is as follows and has been compiled into a shared
    library, libMain.so. 

    Main.h:

    #ifndef MAIN_H
    #define MAIN_H
    #include "TObject.h"

    class Main : public TObject {

        public:
          Main(){}//needed by Root IO
          Main(char* argv[], int argc);
          int run();

          ClassDef(Main,1)//Needed for CINT
    };
    #endif

    Main.cpp:

    #include <iostream>
    using std::cout;
    using std::endl;
    #include "Main.h"

    ClassImp(Main)//needed for CINT
    Main::Main(char* arvv[], int argc){
      //do some setup, command line opts etc
    }

    int Main::run(){
      cout << "Running Main..." << endl;
      return 0;
    }

    To run this on LCG, a Job could be created as follows:

    r = Root()
    r.version = '5.12.00' #version must be on LCG external site
    r.script = 'runMain.C'

    j = Job(application=r,backend=LCG())
    j.inputsandbox = ['libMain.so']

    It is a requirement that your script contains a function with the same
    name as the script itself and that the shared library file is built to
    be binary compatible with the Grid environment (e.g. same architecture 
    and version of gcc). As shown above, the wrapper class must be made CINT 
    compatible. This restriction does not, however, apply to classes used by 
    the wrapper class. When running remote (e.g. LCG) jobs, the architecture
    used is 'slc3_ia32_gcc323' if the Root version is 5.16 or earlier and
    'slc4_ia32_gcc34' otherwise. This reflects the availability of builds
    on the SPI server:

    http://service-spi.web.cern.ch/service-spi/external/distribution/


    For backends that use a local installation of ROOT the location should
    be set correctly in the [Root] section of the configuration.

    Using Python and Root:

    The Root project provides bindings for Python, the language supported by 
    the Ganga command line interface. These bindings are referred to as PyRoot.
    A job is run using PyRoot if the script has the '.py' extension or the 
    usepython flag is set to True.

    There are many example PyRoot scripts available in the Root tutorials. 
    A short example is given below:

    gengaus.py:

    if __name__ == '__main__':
        from ROOT import gRandom

        output = file('gaus.txt','w')
        try:
            for i in range(100):
                print(gRandom.Gaus(), file=output)
        finally:
            output.close()

    The above script could be run in Ganga as follows:

    r = Root()
    r.version = '5.14.00'
    r.script = '~/gengaus.py'
    r.usepython = True #set automatically for '.py' scripts

    j = Job(application=r,backend=Local())
    j.outputsandbox = ['gaus.txt']
    j.submit()

    When running locally, the python interpreter used for running PyRoot jobs
    will default to the one being used in the current Ganga session.
    The Root binaries selected must be binary compatible with this version.

    The pythonhome variable in the [Root] section of .gangarc controls which
    interpreter will be used for PyRoot jobs.

    When using PyRoot on a remote backend, e.g. LCG, the python version that
    is used will depend on that used to build the Root version requested.

    """
    _schema = Schema(
        Version(1, 1), {
            'script':
            FileItem(
                defvalue=None,
                preparable=1,
                doc=
                'A File object specifying the script to execute when Root starts',
                checkset='_checkset_script'),
            'args':
            SimpleItem(
                defvalue=[],
                typelist=[str, int],
                sequence=1,
                doc=
                "List of arguments for the script. Accepted types are numerics and strings"
            ),
            'version':
            SimpleItem(defvalue='6.04.02', doc="The version of Root to run"),
            'usepython':
            SimpleItem(
                defvalue=False,
                doc=
                "Execute 'script' using Python. The PyRoot libraries are added to the PYTHONPATH."
            ),
            'is_prepared':
            SimpleItem(
                defvalue=None,
                strict_sequence=0,
                visitable=1,
                copyable=1,
                typelist=[None, bool],
                protected=1,
                hidden=0,
                comparable=1,
                doc=
                'Location of shared resources. Presence of this attribute implies the application has been prepared.'
            ),
            'hash':
            SimpleItem(
                defvalue=None,
                typelist=[None, str],
                hidden=1,
                doc=
                'MD5 hash of the string representation of applications preparable attributes'
            )
        })
    _category = 'applications'
    _name = 'Root'
    _exportmethods = ['prepare', 'unprepare']

    def __init__(self):
        super(Root, self).__init__()

        from Ganga.GPIDev.Lib.File import getSharedPath

        self.shared_path = Ganga.GPIDev.Lib.File.getSharedPath()
        if self.script is None or self.script == File():
            self.script = getDefaultScript()

    def configure(self, masterappconfig):
        return (None, None)

    def unprepare(self, force=False):
        """
        Revert a Root() application back to it's unprepared state.
        """
        logger.debug('Running unprepare in Root app')
        if self.is_prepared is not None:
            self.decrementShareCounter(self.is_prepared)
            self.is_prepared = None
        self.hash = None

    def prepare(self, force=False):
        """
        A method to place the Root application into a prepared state.
        """
        if (self.is_prepared is not None) and (force is not True):
            raise ApplicationPrepareError(
                '%s application has already been prepared. Use prepare(force=True) to prepare again.'
                % getName(self))
        self.is_prepared = ShareDir()
        logger.info('Created shared directory: %s' % (self.is_prepared.name))

        try:
            copy_worked = self.copyPreparables()
            if copy_worked == 0:
                logger.error(
                    'Failed during prepare() phase. Unpreparing application.')
                self.unprepare()
                return 0
            else:
                # add the newly created shared directory into the metadata
                # system if the app is associated with a persisted object
                self.checkPreparedHasParent(self)
                self.post_prepare()
                return 1
        except:
            self.unprepare()
            raise

    def _checkset_script(self, value):
        """Callback used to set usepython to 1 if the script name has a *.py or *.PY extention."""
        from os.path import splitext
        (_, ext) = splitext(str(value.name))
        # use pyroot if this is a python script
        if ('.py' == ext.lower()):
            logger.debug('Setting usepython to True')
            self.usepython = True
示例#7
0
class Gaudi(GaudiBase):

    _name = 'Gaudi'
    __doc__ = GaudiDocString(_name)
    _category = 'applications'
    _exportmethods = GaudiBase._exportmethods[:]
    _exportmethods += ['prepare', 'unprepare']
    _hidden = 1
    _schema = GaudiBase._schema.inherit_copy()

    docstr = 'The gaudirun.py cli args that will be passed at run-time'
    _schema.datadict['args'] = SimpleItem(defvalue=['-T'],
                                          sequence=1,
                                          strict_sequence=0,
                                          typelist=['str', 'type(None)'],
                                          doc=docstr)
    docstr = 'The name of the optionsfile. Import statements in the file ' \
             'will be expanded at submission time and a full copy made'
    _schema.datadict['optsfile'] = FileItem(preparable=1,
                                            sequence=1,
                                            strict_sequence=0,
                                            defvalue=[],
                                            doc=docstr)
    docstr = 'A python configurable string that will be appended to the '  \
             'end of the options file. Can be multiline by using a '  \
             'notation like \nHistogramPersistencySvc().OutputFile = '  \
             '\"myPlots.root"\\nEventSelector().PrintFreq = 100\n or by '  \
             'using triple quotes around a multiline string.'
    _schema.datadict['extraopts'] = SimpleItem(preparable=1,
                                               defvalue=None,
                                               typelist=['str', 'type(None)'],
                                               doc=docstr)

    _schema.version.major += 0
    _schema.version.minor += 0

    def _auto__init__(self):
        """bootstrap Gaudi applications. If called via a subclass
        set up some basic structure like version platform..."""
        self._init()

    def _parse_options(self):
        raise NotImplementedError

    def prepare(self, force=False):

        from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
        from Ganga.GPIDev.Lib.File.File import File
        if isType(self.optsfile, (list, tuple, GangaList)):
            for this_file in self.optsfile:
                if type(this_file) is str:
                    this_file = File(this_file)
                else:
                    continue

        elif type(self.optsfile) is str:
            self.optsfile = [File(self.optsfile)]

        try:
            super(Gaudi, self).prepare(force)
        except Exception, err:
            logger.debug("Super Prepare Error:\n%s" % str(err))
            raise err

        logger.debug("Prepare")

        _is_prepared = self.is_prepared

        #logger.info("_is_prepared: %s" % _is_prepared)

        share_dir = os.path.join(
            expandfilename(getConfig('Configuration')['gangadir']), 'shared',
            getConfig('Configuration')['user'], _is_prepared.name)

        # We will return a list of files 'send_to_share' which will be copied into the jobs
        # inputsandbox when prepare called from job object. NOTE that these files will not go
        # into an inputsandbox when prepare called on standalone app.
        # Things in the inputsandbox end up in the working dir at runtime.

        # Exception is just re-thrown here after setting is_prepared to None
        # could have done the setting in the actual functions but didnt want
        # prepared state altered from the readInputData pseudo-static member
        try:
            self._check_inputs()
        except Exception, err:
            logger.debug("_check_inputs Error:\n%s" % str(err))
            self.unprepare()
            raise err
示例#8
0
class Transform(GangaObject):
    _schema = Schema(
        Version(1, 0), {
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       copyable=0,
                       doc='Status - running, pause or completed',
                       typelist=["str"]),
            'name':
            SimpleItem(defvalue='Simple Transform',
                       doc='Name of the transform (cosmetic)',
                       typelist=["str"]),
            'application':
            ComponentItem(
                'applications',
                defvalue=None,
                optional=1,
                load_default=False,
                filter="checkTaskApplication",
                doc=
                'Application of the Transform. Must be a Task-Supporting application.'
            ),
            'inputsandbox':
            FileItem(defvalue=[],
                     typelist=['str', 'Ganga.GPIDev.Lib.File.File.File'],
                     sequence=1,
                     doc="list of File objects shipped to the worker node "),
            'outputsandbox':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                doc="list of filenames or patterns shipped from the worker node"
            ),
            'inputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Input dataset'),
            'outputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Output dataset'),
            'backend':
            ComponentItem('backends',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Backend of the Transform.'),
            'run_limit':
            SimpleItem(
                defvalue=4,
                doc='Number of times a partition is tried to be processed.',
                protected=1,
                typelist=["int"]),
            '_partition_status':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map (only necessary) partitions to their status'),
            '_app_partition':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map of applications to partitions'),
            '_app_status':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map of applications to status'),
            '_next_app_id':
            SimpleItem(defvalue=0,
                       hidden=1,
                       copyable=0,
                       doc='Next ID used for the application',
                       typelist=["int"]),
        })

    _category = 'transforms'
    _name = 'Transform'
    _exportmethods = [
        'run',
        'pause',  # Operations
        'setPartitionStatus',
        'setRunlimit',
        'setFailed',  # Control Partitions
        'getPartitionStatus',
        'getJobs',
        'getPartitionJobs',
        # Info
        'overview',
        'info',
        'n_all',
        'n_status',
        'retryFailed'
    ]

    #   _app_status = {}
    _partition_apps = None

    # possible partition status values:
    # ignored, hold, ready, running, completed, attempted, failed, bad

    # Special methods:
    def __init__(self):
        super(Transform, self).__init__()
        self.initialize()

    def _readonly(self):
        """A transform is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def initialize(self):
        from Ganga import GPI
        self.backend = stripProxy(GPI.Local())

    def check(self):
        pass

    def startup(self):
        """This function is used to set the status after restarting Ganga"""
        # Make sure that no partitions are kept "running" from previous
        # sessions
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)
        # At this point the applications still need to notify the Transformation of their status
        # Search jobs for task-supporting applications
        id = "%i:%i" % (self._getParent().id,
                        self._getParent().transforms.index(self))
        for j in GPI.jobs:
            if "tasks_id" in stripProxy(j.application).getNodeData():
                # print "tasks_id of jobid ", j.fqid,
                # stripProxy(j.application).getNodeAttribute("tasks_id"), id
                if stripProxy(j.application).getNodeAttribute(
                        "tasks_id").endswith(id):
                    try:
                        if j.subjobs:
                            for sj in j.subjobs:
                                app = stripProxy(sj.application)
                                stripProxy(app.getTransform()).setAppStatus(
                                    app,
                                    app._getParent().status)
                        else:
                            app = stripProxy(j.application)
                            stripProxy(app.getTransform()).setAppStatus(
                                app,
                                app._getParent().status)
                    except AttributeError as e:
                        logger.error("%s", e)

    def getPartitionApps(self):
        if self._partition_apps is None:
            # Create the reverse map _partition_apps from _app_partition
            self._partition_apps = {}
            for (app, partition) in self._app_partition.iteritems():
                if partition in self._partition_apps:
                    if not app in self._partition_apps[partition]:
                        self._partition_apps[partition].append(app)
                else:
                    self._partition_apps[partition] = [app]
        return self._partition_apps

    def fix(self):
        """This function fixes inconsistencies in application status"""
        # Create the reverse map _partition_apps from _app_partition
        self._app_status = {}
        # Make sure that no partitions are kept "running" from previous
        # sessions
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)
        # At this point the applications still need to notify the Transformation of their status
        # Search jobs for task-supporting applications

        id = "%i:%i" % (self._getParent().id,
                        self._getParent().transforms.index(self))
        for j in GPI.jobs:
            if "tasks_id" in stripProxy(j.application).getNodeData():
                if stripProxy(
                        j.application).getNodeAttribute("tasks_id") == id:
                    try:
                        if j.subjobs:
                            for sj in j.subjobs:
                                app = stripProxy(sj.application)
                                stripProxy(app.getTransform()).setAppStatus(
                                    app,
                                    app._getParent().status)
                        else:
                            app = stripProxy(j.application)
                            stripProxy(app.getTransform()).setAppStatus(
                                app,
                                app._getParent().status)
                    except AttributeError as e:
                        logger.error("%s", e)

# Public methods

    def run(self, check=True):
        """Sets this transform to running status"""
        if self.status == "new" and check:
            self.check()
        if self.status != "completed":
            self.updateStatus("running")
            #self.status = "running"
            # Check if this transform has completed in the meantime
            is_complete = True
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    is_complete = False
                    break
            if is_complete:
                self.updateStatus("completed")
                #self.status = "completed"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.warning("Transform is already completed!")

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        if self.status != "completed":
            self.updateStatus("pause")
            #self.status = "pause"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.debug("Transform is already completed!")

    def setRunlimit(self, newRL):
        """Set the number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL
        cs = self._partition_status.items()
        for (c, s) in cs:
            if s in ["attempted", "failed"]:
                failures = self.getPartitionFailures(c)
                if failures >= newRL:
                    self._partition_status[c] = "failed"
                else:
                    self._partition_status[c] = "attempted"
        logger.debug("Runlimit set to %i", newRL)

    def setPartitionStatus(self, partition, status):
        """ Set the Status of the given partition to "ready", "hold", "bad" or "completed".
            The status is then updated to the status indicated by the applications"""
        self.setPartitionsStatus([partition], status)

    def getJobs(self):
        """ Get the job slice of all jobs for this transform """
        return self.getPartitionJobs(None)

    def getPartitionJobs(self, partition):
        """ Get the job slice that processed the given partition. Iterates over the job list. """
        task = self._getParent()
        id = task.transforms.index(self)
        if partition is None:
            sname = "tasks(%i).transforms[%i].getJobs()" % (task.id, id)
        else:
            sname = "tasks(%i).transforms[%i].getPartitionJobs(%s)" % (
                task.id, id, partition)
        jobslice = JobRegistrySlice(sname)

        def addjob(j):
            if partition is None or self._app_partition[
                    j.application.id] == partition:
                jobslice.objects[j.fqid] = stripProxy(j)

        for j in GPI.jobs:
            try:
                stid = j.application.tasks_id.split(":")
                if int(stid[-2]) == task.id and int(stid[-1]) == id:
                    if j.subjobs:
                        for sj in j.subjobs:
                            addjob(sj)
                    else:
                        addjob(j)
            except Exception as err:
                logger.debug("getPartitionJobs Exception:\n%s" % str(err))
                pass
        return JobRegistrySliceProxy(jobslice)

    def setFailed(self, partition):
        """ Tells Tasks that all Applications that have executed this partition have actually failed."""
        for aid in self._app_partition:
            if aid in self._app_status and self._app_status[aid] == "removed":
                continue
            # Save the status
            self._app_status[aid] = "failed"
            # Update the corresponding partition status
        self.setPartitionStatus(partition, "ready")

    def retryFailed(self):
        """Retry all failed partitions (forget about failed jobs)"""
        for aid in self._app_partition:
            if aid in self._app_status and self._app_status[aid] == "failed":
                self._app_status[aid] = "removed"
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)

# Internal methods

    def finalise(self):
        """Finalise the transform - no-op by default"""
        return

    def submitJobs(self, n):
        """Create Ganga Jobs for the next N partitions that are ready and submit them."""
        next = self.getNextPartitions(n)
        if len(next) == 0:
            return 0
        numjobs = 0
        for j in self.getJobsForPartitions(next):
            stripProxy(j.application).transition_update("submitting")
            try:
                j.submit()
            except JobError:
                logger.error(
                    "Error on job submission! The current transform will be paused until this problem is fixed."
                )
                logger.error(
                    "type tasks(%i).run() to continue after the problem has been fixed.",
                    self._getParent().id)
                self.pause()
            numjobs += 1
        return numjobs

    def checkTaskApplication(self, app):
        """warns the user if the application is not compatible """
        if app is None:
            return None
        if not "tasks_id" in stripProxy(app).getNodeData():
            return taskApp(app)
        return app

    def setAppStatus(self, app, new_status):
        """Reports status changes in application jobs
           possible status values: 
           normal   : (new, submitting,) submitted, running, completing, completed
           failures : killed, failed
           transient: incomplete (->new), unknown, removed"""

        # Check if we know the occurring application...
        if app.id == -1:
            return
        if not app.id in self._app_partition:
            logger.warning("%s was contacted by an unknown application %i.",
                           self.fqn(), app.id)
            return
        # Silently ignore message if the application is already removed or
        # completed
        if app.id in self._app_status and self._app_status[app.id] in [
                "removed", "completed", "failed"
        ]:
            return
        # Check the status
        if new_status == "completed" and not self.checkCompletedApp(app):
            logger.error("%s app %i failed despite listed as completed!",
                         self.fqn(), app.id)
            new_status = "failed"
        # Save the status
        self._app_status[app.id] = new_status
        # Update the corresponding partition status
        self.updatePartitionStatus(self._app_partition[app.id])

    def setMasterJobStatus(self, job, new_status):
        """hook for a master job status update"""
        return

    def updatePartitionStatus(self, partition):
        """ Calculate the correct status of the given partition. 
            "completed" and "bad" is never changed here
            "hold" is only changed to "completed" here. """
        # print "updatePartitionStatus ", partition, " transform ", self.id
        # If the partition has status, and is not in a fixed state, check it!

        if partition in self._partition_status and (
                not self._partition_status[partition] in ["bad", "completed"]):
            # if we have no applications, we are in "ready" state
            if not partition in self.getPartitionApps():
                if self._partition_status[partition] != "hold":
                    self._partition_status[partition] = "ready"
            else:
                status = [
                    self._app_status[app]
                    for app in self.getPartitionApps()[partition]
                    if app in self._app_status
                    and not self._app_status[app] in ["removed", "killed"]
                ]
                # Check if we have completed this partition
                if "completed" in status:
                    self._partition_status[partition] = "completed"
                # Check if we are not on hold
                elif self._partition_status[partition] != "hold":
                    # Check if we are running
                    running = False
                    for stat in [
                            "completing", "running", "submitted", "submitting"
                    ]:
                        if stat in status:
                            self._partition_status[partition] = "running"
                            running = True
                            break
                    if not running:
                        # Check if we failed
                        #failures = len([stat for stat in status if stat in ["failed","new"]])
                        failures = self.getPartitionFailures(partition)

                        if failures >= self.run_limit:
                            self._partition_status[partition] = "failed"
                        elif failures > 0:
                            self._partition_status[partition] = "attempted"
                        else:
                            # Here we only have some "unknown" applications
                            # This could prove difficult when launching new applications. Care has to be taken
                            # to get the applications out of "unknown" stats as quickly as possible, to avoid double submissions.
                            #logger.warning("Partition with only unknown applications encountered. This is probably not a problem.")
                            self._partition_status[partition] = "ready"
        # Notify the next transform (if any) of the change in input status
        self.notifyNextTransform(partition)

        # Update the Tasks status if necessary
        task = self._getParent()
        if partition in self._partition_status and self._partition_status[
                partition] in ["completed", "bad"
                               ] and self.status == "running":
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    return
            #self.status = "completed"
            self.updateStatus("completed")
            if task:
                task.updateStatus()
        elif self.status == "completed":
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    self.updateStatus("running")
                    #self.status = "running"
                    if task:
                        task.updateStatus()
                    return

    def notifyNextTransform(self, partition):
        """ Notify any dependant transforms of the input update """
        task = self._getParent()
        if task and (task.transforms.index(self) + 1 < len(task.transforms)):
            task.transforms[task.transforms.index(self) + 1].updateInputStatus(
                self, partition)

    def setPartitionsStatus(self, partitions, status):
        """ Set the Status of the partitions to "ready", "hold", "bad" or "completed".
            The status is then updated to the status indicated by the applications
            "bad" and "completed" is never changed except to "ignored", "hold" is only changed to "completed". """
        if status == "ignored":
            [
                self._partition_status.pop(c) for c in partitions
                if c in self._partition_status
            ]
        elif status in ["ready", "hold", "bad", "completed"]:
            for c in partitions:
                self._partition_status[c] = status
        else:
            logger.error(
                "setPartitionsStatus called with invalid status string %s",
                status)
        for c in partitions:
            self.updatePartitionStatus(c)

    def setPartitionsLimit(self, limitpartition):
        """ Set all partitions from and including limitpartition to ignored """
        partitions = [c for c in self._partition_status if c >= limitpartition]
        self.setPartitionsStatus(partitions, "ignored")

    def getPartitionStatus(self, partition):
        if partition in self._partition_status:
            return self._partition_status[partition]
        else:
            return "ignored"

    def getNextPartitions(self, n):
        """Returns the N next partitions to process"""
        partitionlist = sorted(c for c, v in self._partition_status.items()
                               if v in ["ready", "attempted"])
        return partitionlist[:n]

    def getNewAppID(self, partition):
        """ Returns a new application ID and associates this ID with the partition given. """
        id = self._next_app_id
        self._app_partition[id] = partition
        if partition in self.getPartitionApps():
            self.getPartitionApps()[partition].append(id)
        else:
            self.getPartitionApps()[partition] = [id]
        self._next_app_id += 1
        return id

    def createNewJob(self, partition):
        """ Returns a new job initialized with the transforms application, backend and name """
        task = self._getParent(
        )  # this works because createNewJob is only called by a task
        id = task.transforms.index(self)
        j = GPI.Job()
        stripProxy(j).backend = self.backend.clone()
        stripProxy(j).application = self.application.clone()
        stripProxy(j).application.tasks_id = "%i:%i" % (task.id, id)
        stripProxy(j).application.id = self.getNewAppID(partition)
        j.inputdata = self.inputdata
        j.outputdata = self.outputdata
        j.inputsandbox = self.inputsandbox
        j.outputsandbox = self.outputsandbox
        j.name = "T%i:%i C%i" % (task.id, id, partition)
        return j

# Methods that can/should be overridden by derived classes

    def checkCompletedApp(self, app):
        """Can be overriden to improve application completeness checking"""
        return True

    def updateInputStatus(self, ltf, partition):
        """Is called my the last transform (ltf) if the partition 'partition' changes status"""
        # per default no dependencies exist
        pass

    def getJobsForPartitions(self, partitions):
        """This is only an example, this class should be overridden by derived classes"""
        return [self.createNewJob(p) for p in partitions]

# Information methods

    def fqn(self):
        task = self._getParent()
        if task:
            return "Task %i Transform %i" % (task.id,
                                             task.transforms.index(self))
        else:
            return "Unassigned Transform '%s'" % (self.name)

    def n_all(self):
        return len(self._partition_status)

    def n_status(self, status):
        return len(
            [cs for cs in self._partition_status.values() if cs == status])

    def overview(self):
        """ Get an ascii art overview over task status. Can be overridden """
        task = self._getParent()
        if not task is None:
            id = str(task.transforms.index(self))
        else:
            id = "?"
        o = markup("#%s: %s '%s'\n" % (id, getName(self), self.name),
                   status_colours[self.status])
        i = 0
        partitions = sorted(self._partition_status.keys())
        for c in partitions:
            s = self._partition_status[c]
            if c in self.getPartitionApps():
                failures = self.getPartitionFailures(c)
                o += markup("%i:%i " % (c, failures), overview_colours[s])
            else:
                o += markup("%i " % c, overview_colours[s])
            i += 1
            if i % 20 == 0:
                o += "\n"
        logger.info(o)

    def info(self):
        logger.info(
            markup("%s '%s'" % (getName(self), self.name),
                   status_colours[self.status]))
        logger.info("* backend: %s" % getName(self.backend))
        logger.info("Application:")
        self.application.printTree()

    def getPartitionFailures(self, partition):
        """Return the number of failures for this partition"""
        return len([
            1 for app in self.getPartitionApps()[partition]
            if app in self._app_status
            and self._app_status[app] in ["new", "failed"]
        ])

    def updateStatus(self, status):
        """Update the transform status"""
        self.status = status
示例#9
0
文件: IUnit.py 项目: slangrock/ganga
class IUnit(GangaObject):
    _schema = Schema(
        Version(1, 0), {
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       copyable=0,
                       doc='Status - running, pause or completed',
                       typelist=["str"]),
            'name':
            SimpleItem(defvalue='Simple Unit',
                       doc='Name of the unit (cosmetic)',
                       typelist=["str"]),
            'application':
            ComponentItem('applications',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Application of the Transform.'),
            'inputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Input dataset'),
            'outputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Output dataset'),
            'active':
            SimpleItem(defvalue=False, hidden=1, doc='Is this unit active'),
            'active_job_ids':
            SimpleItem(defvalue=[],
                       typelist=['int'],
                       sequence=1,
                       hidden=1,
                       doc='Active job ids associated with this unit'),
            'prev_job_ids':
            SimpleItem(defvalue=[],
                       typelist=['int'],
                       sequence=1,
                       hidden=1,
                       doc='Previous job ids associated with this unit'),
            'minor_resub_count':
            SimpleItem(defvalue=0, hidden=1, doc='Number of minor resubmits'),
            'major_resub_count':
            SimpleItem(defvalue=0, hidden=1, doc='Number of major resubmits'),
            'req_units':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                hidden=1,
                doc=
                'List of units that must complete for this to start (format TRF_ID:UNIT_ID)'
            ),
            'start_time':
            SimpleItem(
                defvalue=0,
                hidden=1,
                doc='Start time for this unit. Allows a delay to be put in'),
            'copy_output':
            ComponentItem(
                'datasets',
                defvalue=None,
                load_default=0,
                optional=1,
                doc=
                'The dataset to copy the output of this unit to, e.g. Grid dataset -> Local Dataset'
            ),
            'merger':
            ComponentItem('mergers',
                          defvalue=None,
                          load_default=0,
                          optional=1,
                          doc='Merger to be run after this unit completes.'),
            'splitter':
            ComponentItem('splitters',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Splitter used on each unit of the Transform.'),
            'postprocessors':
            ComponentItem(
                'postprocessor',
                defvalue=None,
                doc='list of postprocessors to run after job has finished'),
            'inputsandbox':
            FileItem(defvalue=[],
                     typelist=['str', 'Ganga.GPIDev.Lib.File.File.File'],
                     sequence=1,
                     doc="list of File objects shipped to the worker node "),
            'inputfiles':
            GangaFileItem(
                defvalue=[],
                typelist=[
                    'str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'
                ],
                sequence=1,
                doc=
                "list of file objects that will act as input files for a job"),
            'outputfiles':
            GangaFileItem(
                defvalue=[],
                typelist=[
                    'str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'
                ],
                sequence=1,
                doc="list of OutputFile objects to be copied to all jobs"),
            'info':
            SimpleItem(defvalue=[],
                       typelist=['str'],
                       protected=1,
                       sequence=1,
                       doc="Info showing status transitions and unit info"),
            'id':
            SimpleItem(defvalue=-1,
                       protected=1,
                       doc='ID of the Unit',
                       typelist=["int"]),
        })

    _category = 'units'
    _name = 'IUnit'
    _exportmethods = []
    _hidden = 0

    # Special methods:
    def __init__(self):
        super(IUnit, self).__init__()
        self.updateStatus("new")

    def _readonly(self):
        """A unit is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def validate(self):
        """Validate that this unit is OK and set it to active"""
        self.active = True
        return True

    def getID(self):
        """Get the ID of this unit within the transform"""

        # if the id isn't already set, use the index from the parent Task
        if self.id < 0:
            trf = self._getParent()
            if not trf:
                raise ApplicationConfigurationError(
                    None,
                    "This unit has not been associated with a transform and so there is no ID available"
                )
            self.id = trf.units.index(self)

        return self.id

    def updateStatus(self, status):
        """Update status hook"""
        addInfoString(
            self, "Status change from '%s' to '%s'" % (self.status, status))
        self.status = status

    def createNewJob(self):
        """Create any jobs required for this unit"""
        pass

    def checkCompleted(self, job):
        """Check if this unit is complete"""
        if job.status == "completed":
            return True
        else:
            return False

    def checkForSubmission(self):
        """Check if this unit should submit a job"""

        # check the delay
        if time.time() < self.start_time:
            return False

        # check if we already have a job
        if len(self.active_job_ids) != 0:
            return False

        # if we're using threads, check the max number
        if self._getParent(
        ).submit_with_threads and GPI.queues.totalNumUserThreads(
        ) > self._getParent().max_active_threads:
            return False

        return True

    def checkForResubmission(self):
        """check if this unit should be resubmitted"""

        # check if we already have a job
        if len(self.active_job_ids) == 0:
            return False
        else:
            job = GPI.jobs(self.active_job_ids[0])
            if job.status in ["failed", "killed"]:
                return True

            return False

    def checkParentUnitsAreComplete(self):
        """Check to see if the parent units are complete"""
        req_ok = True
        task = self._getParent()._getParent()
        for req in self.req_units:
            req_trf_id = int(req.split(":")[0])

            if req.find("ALL") == -1:
                req_unit_id = int(req.split(":")[1])
                if task.transforms[req_trf_id].units[
                        req_unit_id].status != "completed":
                    req_ok = False

            else:
                # need all units from this trf
                for u in task.transforms[req_trf_id].units:
                    if u.status != "completed":
                        req_ok = False

        return req_ok

    def checkMajorResubmit(self, job):
        """check if this job needs to be fully rebrokered or not"""
        pass

    def majorResubmit(self, job):
        """perform a mjor resubmit/rebroker"""
        self.prev_job_ids.append(job.id)
        self.active_job_ids.remove(job.id)

    def minorResubmit(self, job):
        """perform just a minor resubmit"""
        try:
            trf = self._getParent()
        except Exception as err:
            logger.debug("GetParent exception!\n%s" % str(err))
            trf = None
        if trf is not None and trf.submit_with_threads:
            addInfoString(self, "Attempting job re-submission with queues...")
            GPI.queues.add(job.resubmit)
        else:
            addInfoString(self, "Attempting job re-submission...")
            job.resubmit()

    def update(self):
        """Update the unit and (re)submit jobs as required"""
        #logger.warning("Entered Unit %d update function..." % self.getID())

        # if we're complete, then just return
        if self.status in ["completed", "recreating"] or not self.active:
            return 0

        # check if submission is needed
        task = self._getParent()._getParent()
        trf = self._getParent()
        maxsub = task.n_tosub()

        # check parent unit(s)
        req_ok = self.checkParentUnitsAreComplete()

        # set the start time if not already set
        if len(self.req_units) > 0 and req_ok and self.start_time == 0:
            self.start_time = time.time() + trf.chain_delay * 60 - 1

        if req_ok and self.checkForSubmission() and maxsub > 0:

            # create job and submit
            addInfoString(self, "Creating Job...")
            j = self.createNewJob()
            if j.name == '':
                j.name = "T%i:%i U%i" % (task.id, trf.getID(), self.getID())

            try:
                if trf.submit_with_threads:
                    addInfoString(self,
                                  "Attempting job submission with queues...")
                    GPI.queues.add(j.submit)
                else:
                    addInfoString(self, "Attempting job submission...")
                    j.submit()

            except Exception as err:
                logger.debug("update Err: %s" % str(err))
                addInfoString(self, "Failed Job Submission")
                addInfoString(self, "Reason: %s" % (formatTraceback()))
                logger.error("Couldn't submit the job. Deactivating unit.")
                self.prev_job_ids.append(j.id)
                self.active = False
                trf._setDirty()  # ensure everything's saved
                return 1

            self.active_job_ids.append(j.id)
            self.updateStatus("running")
            trf._setDirty()  # ensure everything's saved

            if trf.submit_with_threads:
                return 0

            return 1

        # update any active jobs
        for jid in self.active_job_ids:

            # we have an active job so see if this job is OK and resubmit if
            # not
            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("Update2 Err: %s" % str(err))
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            if job.status == "completed":

                # check if actually completed
                if not self.checkCompleted(job):
                    return 0

                # check for DS copy
                if trf.unit_copy_output:
                    if not self.copy_output:
                        trf.createUnitCopyOutputDS(self.getID())

                    if not self.copyOutput():
                        return 0

                # check for merger
                if trf.unit_merger:
                    if not self.merger:
                        self.merger = trf.createUnitMerger(self.getID())

                    if not self.merge():
                        return 0

                # all good so mark unit as completed
                self.updateStatus("completed")

            elif job.status == "failed" or job.status == "killed":

                # check for too many resubs
                if self.minor_resub_count + self.major_resub_count > trf.run_limit - 1:
                    logger.error(
                        "Too many resubmits (%i). Deactivating unit." %
                        (self.minor_resub_count + self.major_resub_count))
                    addInfoString(
                        self, "Deactivating unit. Too many resubmits (%i)" %
                        (self.minor_resub_count + self.major_resub_count))
                    self.active = False
                    return 0

                rebroker = False

                if self.minor_resub_count > trf.minor_run_limit - 1:
                    if self._getParent().rebroker_on_job_fail:
                        rebroker = True
                    else:
                        logger.error(
                            "Too many minor resubmits (%i). Deactivating unit."
                            % self.minor_resub_count)
                        addInfoString(
                            self,
                            "Deactivating unit. Too many resubmits (%i)" %
                            (self.minor_resub_count + self.minor_resub_count))
                        self.active = False
                        return 0

                if self.major_resub_count > trf.major_run_limit - 1:
                    logger.error(
                        "Too many major resubmits (%i). Deactivating unit." %
                        self.major_resub_count)
                    addInfoString(
                        self, "Deactivating unit. Too many resubmits (%i)" %
                        (self.minor_resub_count + self.major_resub_count))
                    self.active = False
                    return 0

                # check the type of resubmit
                if rebroker or self.checkMajorResubmit(job):

                    self.major_resub_count += 1
                    self.minor_resub_count = 0

                    try:
                        addInfoString(self, "Attempting major resubmit...")
                        self.majorResubmit(job)
                    except Exception as err:
                        logger.debug("Update Err3: %s" % str(err))
                        logger.error(
                            "Couldn't resubmit the job. Deactivating unit.")
                        addInfoString(self, "Failed Job resubmission")
                        addInfoString(self, "Reason: %s" % (formatTraceback()))
                        self.active = False

                    # break the loop now because we've probably changed the
                    # active jobs list
                    return 1
                else:
                    self.minor_resub_count += 1
                    try:
                        addInfoString(self, "Attempting minor resubmit...")
                        self.minorResubmit(job)
                    except Exception as err:
                        logger.debug("Update Err4: %s" % str(err))
                        logger.error(
                            "Couldn't resubmit the job. Deactivating unit.")
                        addInfoString(self, "Failed Job resubmission")
                        addInfoString(self, "Reason: %s" % (formatTraceback()))
                        self.active = False
                        return 1

    def reset(self):
        """Reset the unit completely"""
        addInfoString(self, "Reseting Unit...")
        self.minor_resub_count = 0
        self.major_resub_count = 0
        if len(self.active_job_ids) > 0:
            self.prev_job_ids += self.active_job_ids
        self.active_job_ids = []

        self.active = True

        # if has parents, set to recreate
        if len(self.req_units) > 0:
            self.updateStatus("recreating")
        else:
            self.updateStatus("running")

    # Info routines
    def n_active(self):

        if self.status == 'completed':
            return 0

        tot_active = 0
        active_states = ['submitted', 'running']

        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_active Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) > 0:
                    for sj_stat in j.getNodeIndexCache()['subjobs:status']:
                        if sj_stat in active_states:
                            tot_active += 1
                else:
                    if j.getNodeIndexCache()['status'] in active_states:
                        tot_active += 1
            else:
                #logger.warning("WARNING: (active check) No index cache for job object %d" % jid)
                if j.status in active_states:
                    if j.subjobs:
                        for sj in j.subjobs:
                            if sj.status in active_states:
                                tot_active += 1
                    else:
                        tot_active += 1

        return tot_active

    def n_status(self, status):
        tot_active = 0
        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_status Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) > 0:
                    for sj_stat in j.getNodeIndexCache()['subjobs:status']:
                        if sj_stat == status:
                            tot_active += 1
                else:
                    if j.getNodeIndexCache()['status'] == status:
                        tot_active += 1

            else:
                #logger.warning("WARNING: (status check) No index cache for job object %d" % jid)
                if j.subjobs:
                    for sj in j.subjobs:
                        if sj.status == status:
                            tot_active += 1
                else:
                    if j.status == status:
                        tot_active += 1

        return tot_active

    def n_all(self):
        total = 0
        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_all Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) != 0:
                    total += len(j.getNodeIndexCache()['subjobs:status'])
                else:
                    total += 1
            else:
                #logger.warning("WARNING: (status check) No index cache for job object %d" % jid)
                if j.subjobs:
                    total = len(j.subjobs)
                else:
                    total = 1

        return total

    def overview(self):
        """Print an overview of this unit"""
        o = "    Unit %d: %s        " % (self.getID(), self.name)

        for s in ["submitted", "running", "completed", "failed", "unknown"]:
            o += markup("%i   " % self.n_status(s), overview_colours[s])

        print(o)

    def copyOutput(self):
        """Copy any output to the given dataset"""
        logger.error(
            "No default implementation for Copy Output - contact plugin developers"
        )
        return False
示例#10
0
class GaudiPython(GaudiBase):
    """The GaudiPython Application handler

    The GaudiPython application handler is for running LHCb GaudiPython
    jobs. This means running scripts where you are in control of the events
    loop etc. If you are usually running jobs using the gaudirun script
    this is *not* the application handler you should use. Instead use the
    DaVinci, Gauss, ... handlers.

    For its configuration it needs to know what application and version to
    use for setting up the environment. More detailed configuration options
    are described in the schema below.

    An example of submitting a GaudiPython job to Dirac could be:

    app = GaudiPython(project='DaVinci', version='v19r14')

    # Give absolute path to the python file to be executed. 
    # If several files are given the subsequent ones will go into the
    # sandbox but it is the users responsibility to include them
    app.script = ['/afs/...../myscript.py']

    # Define dataset
    ds = LHCbDataset(['LFN:spam','LFN:eggs'])

    # Construct and submit job object
    j=Job(application=app,backend=Dirac(),inputdata=ds)
    j.submit()

"""
    _name = 'GaudiPython'
    _category = 'applications'
    _exportmethods = GaudiBase._exportmethods[:]
    _exportmethods += ['prepare', 'unprepare']

    _schema = GaudiBase._schema.inherit_copy()
    docstr = 'The package the application belongs to (e.g. "Sim", "Phys")'
    _schema.datadict['package'] = SimpleItem(defvalue=None,
                                             typelist=['str', 'type(None)'],
                                             doc=docstr)
    docstr = 'The package where your top level requirements file is read '  \
             'from. Can be written either as a path '  \
             '\"Tutorial/Analysis/v6r0\" or in traditional notation '  \
             '\"Analysis v6r0 Tutorial\"'
    _schema.datadict['masterpackage'] = SimpleItem(
        defvalue=None, typelist=['str', 'type(None)'], doc=docstr)
    docstr = 'Extra options to be passed onto the SetupProject command '\
             'used for configuring the environment. As an example '\
             'setting it to \'--dev\' will give access to the DEV area. '\
             'For full documentation of the available options see '\
             'https://twiki.cern.ch/twiki/bin/view/LHCb/SetupProject'
    _schema.datadict['setupProjectOptions'] = SimpleItem(
        defvalue='', typelist=['str', 'type(None)'], doc=docstr)
    docstr = 'The name of the script to execute. A copy will be made ' + \
             'at submission time'
    _schema.datadict['script'] = FileItem(preparable=1,
                                          sequence=1,
                                          strict_sequence=0,
                                          defvalue=[],
                                          doc=docstr)
    docstr = "List of arguments for the script"
    _schema.datadict['args'] = SimpleItem(defvalue=[],
                                          typelist=['str'],
                                          sequence=1,
                                          doc=docstr)
    docstr = 'The name of the Gaudi application (e.g. "DaVinci", "Gauss"...)'
    _schema.datadict['project'] = SimpleItem(preparable=1,
                                             defvalue=None,
                                             typelist=['str', 'type(None)'],
                                             doc=docstr)
    _schema.version.major += 2
    _schema.version.minor += 0

    def _get_default_version(self, gaudi_app):
        return guess_version(self, gaudi_app)

    def _attribute_filter__set__(self, n, v):
        if n == 'project':
            self.appname = v
        return v

    def _auto__init__(self):
        if (not self.appname) and (not self.project):
            self.project = 'DaVinci'  # default
        if (not self.appname):
            self.appname = self.project
        self._init()

    def _getshell(self):
        import EnvironFunctions
        return EnvironFunctions._getshell(self)

    def prepare(self, force=False):
        super(GaudiPython, self).prepare(force)
        self._check_inputs()

        share_dir = os.path.join(
            expandfilename(getConfig('Configuration')['gangadir']), 'shared',
            getConfig('Configuration')['user'], self.is_prepared.name)

        fillPackedSandbox(
            self.script,
            os.path.join(share_dir, 'inputsandbox',
                         '_input_sandbox_%s.tar' % self.is_prepared.name))
        gzipFile(
            os.path.join(share_dir, 'inputsandbox',
                         '_input_sandbox_%s.tar' % self.is_prepared.name),
            os.path.join(share_dir, 'inputsandbox',
                         '_input_sandbox_%s.tgz' % self.is_prepared.name),
            True)
        # add the newly created shared directory into the metadata system if
        # the app is associated with a persisted object
        self.checkPreparedHasParent(self)
        self.post_prepare()

    def master_configure(self):
        return (None, StandardJobConfig())

    def configure(self, master_appconfig):
        # self._configure()
        name = join('.', self.script[0].subdir, split(self.script[0].name)[-1])
        script = "from Gaudi.Configuration import *\n"
        if self.args:
            script += 'import sys\nsys.argv += %s\n' % str(self.args)
        script += "importOptions('data.py')\n"
        script += "execfile(\'%s\')\n" % name

        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning Job Configuration")
        return (None,
                StandardJobConfig(inputbox=input_files,
                                  outputbox=outputsandbox))

    def _check_inputs(self):
        """Checks the validity of user's entries for GaudiPython schema"""
        if len(self.script) == 0:
            logger.warning("No script defined. Will use a default "
                           'script which is probably not what you want.')
            self.script = [
                File(
                    os.path.join(
                        os.path.dirname(inspect.getsourcefile(GaudiPython)),
                        'options/GaudiPythonExample.py'))
            ]
        else:
            for f in self.script:
                f.name = fullpath(f.name)

        return

    def postprocess(self):
        XMLPostProcessor.postprocess(self, logger)
示例#11
0
class _DSTMergeTool(IMergeTool):
    
    _category = 'merge_tools'
    _hidden = 1
    _name = '_DSTMergeTool'
    _schema = IMergeTool._schema.inherit_copy()
    docstr = 'Path to a options file to use when merging.'
    _schema.datadict['merge_opts'] = FileItem(defvalue=None,doc=docstr)
    docstr = 'The version of DaVinci to use when merging. (e.g. v19r14)'
    _schema.datadict['version'] = SimpleItem(defvalue='',doc=docstr)
    
    def selectOptionsFile(self, version_string):
        """Trys to find the correct version of the optsions file to use based on the version."""
        
        dir = os.path.dirname(inspect.getsourcefile(_DSTMergeTool))
        options_dir = os.path.join(dir,'options')
            
        #search for the version of the merge opts which most closly matches 'version'
        import glob
        files = glob.glob(options_dir + os.path.sep + 'DSTMerger*.opts')

        #try to find the best options file to use
        opts_files = {}
        for f in files:
            file_name = os.path.basename(f)
            v = None
            #remove the .opts part
            if file_name.endswith('.opts'):
                file_name = file_name[0:-5]
            #remove the DSTMerger bit
            if file_name.startswith('DSTMerger-'):
                file_name = file_name[10:]
            if file_name:
                v = CMTVersion(file_name)
            else:
                v = CMTVersion()
            opts_files[v] = f
        
        #the result to return
        opts_file = None
        
        #itterate over the versions in order
        keys = opts_files.keys()
        keys.sort()
        saved = keys[-1]#default is latest one
        if version_string:
            version = CMTVersion(version_string)
            for k in keys:
                if version < k:
                    break
                else:
                    saved = k
        opts_file = opts_files[saved]
        return opts_file

    def mergefiles(self, file_list, output_file):
        
        #if no opts file is specified, then use version from installation
        if self.merge_opts is None or not self.merge_opts.name:
            self.merge_opts = File(self.selectOptionsFile(self.version))       
                            
        if not os.path.exists(self.merge_opts.name):
            msg = "The options file '%s' needed for merging does not exist." 
            raise MergerError(msg % self.merge_opts.name)
        logger.info("Using the options file '%s'.", self.merge_opts.name)
        
        #this is the bit specifing the files
        output_opts = """
// the output file from the merge
InputCopyStream.Output = "DATAFILE='PFN:%s' TYP='POOL_ROOTTREE' OPT='REC'";

//the files to merge
EventSelector.Input = {""" % output_file
        
        file_sep = ','
        file_len = len(file_list)
        
        for i in xrange(file_len):
            file_name = file_list[i]
            if i == (file_len - 1):
                file_sep = '' #i.e. last entry does not have a comma
            output_opts += """
"DATAFILE='PFN:%s' TYP='POOL_ROOTTREE' OPT='READ'"%s""" % (file_name, file_sep)
        output_opts += """
};"""
        
        #write this out to a file
        opts_file_name = tempfile.mktemp('.opts')
        try:
            opts_file = file(opts_file_name,'w')
            opts_file.write(output_opts)
        finally:
            opts_file.close()
            
        if not os.path.exists(opts_file_name):
            msg = "Failed to write tempory options file '%s' during merge"
            raise MergerError(msg % opts_file_name)
        
        #now run gaudirun via a script
        shell_script = """#!/bin/sh
        
SP=`which SetupProject.sh`
if [ -n $SP ]; then 
  . SetupProject.sh  --ignore-missing DaVinci %s
else
  echo "Could not find the SetupProject.sh script. Your job will probably fail"
fi
gaudirun.py %s %s
exit $?
""" % (self.version, self.merge_opts.name, opts_file_name)

        script_file_name = tempfile.mktemp('.sh')
        try:
            script_file = file(script_file_name,'w')
            script_file.write(shell_script)
        finally:
            script_file.close()
        
        return_code = subprocess.call(['/bin/sh',script_file_name])
        if return_code != 0:
            msg = 'The DSTMerger returned %i when calling gaudirun'
            logger.warning(msg % return_code)
            
        #finally clean up
        os.unlink(script_file_name)
        os.unlink(opts_file_name)
        
        if not os.path.exists(output_file):
            msg = "The output file '%s' was not created"
            raise MergerError(msg % output_file)