Exemplo n.º 1
0
 def test_equal(self):
     v1 = Version(1, 0)
     v2 = Version(1, 0)
     self.assertEqual(v1, v2)
     self.assertTrue(v1.isCompatible(v2))
Exemplo n.º 2
0
class LHCbDataset(GangaDataset):

    '''Class for handling LHCb data sets (i.e. inputdata for LHCb jobs).

    Example Usage:
    ds = LHCbDataset(["lfn:/some/lfn.file","pfn:/some/pfn.file"])
    ds[0] # DiracFile("/some/lfn.file") - see DiracFile docs for usage
    ds[1] # PhysicalFile("/some/pfn.file")- see PhysicalFile docs for usage
    len(ds) # 2 (number of files)
    ds.getReplicas() # returns replicas for *all* files in the data set
    ds.replicate("CERN-USER") # replicate *all* LFNs to "CERN-USER" SE
    ds.getCatalog() # returns XML catalog slice
    ds.optionsString() # returns Gaudi-sytle options 
    [...etc...]
    '''
    schema = {}
    docstr = 'List of PhysicalFile and DiracFile objects'
    schema['files'] = GangaFileItem(defvalue=[], typelist=['str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'], sequence=1, doc=docstr)
    docstr = 'Ancestor depth to be queried from the Bookkeeping'
    schema['depth'] = SimpleItem(defvalue=0, doc=docstr)
    docstr = 'Use contents of file rather than generating catalog.'
    schema['XMLCatalogueSlice'] = GangaFileItem(defvalue=None, doc=docstr)
    docstr = 'Specify the dataset persistency technology'
    schema['persistency'] = SimpleItem(
        defvalue=None, typelist=['str', 'type(None)'], doc=docstr)
    schema['treat_as_inputfiles'] = SimpleItem(defvalue=False, doc="Treat the inputdata as inputfiles, i.e. copy the inputdata to the WN")

    _schema = Schema(Version(3, 0), schema)
    _category = 'datasets'
    _name = "LHCbDataset"
    _exportmethods = ['getReplicas', '__len__', '__getitem__', 'replicate',
                      'hasLFNs', 'append', 'extend', 'getCatalog', 'optionsString',
                      'getLFNs', 'getFileNames', 'getFullFileNames',
                      'difference', 'isSubset', 'isSuperset', 'intersection',
                      'symmetricDifference', 'union', 'bkMetadata',
                      'isEmpty', 'hasPFNs', 'getPFNs']  # ,'pop']

    def __init__(self, files=None, persistency=None, depth=0, fromRef=False):
        super(LHCbDataset, self).__init__()
        if files is None:
            files = []
        self.files = GangaList()
        process_files = True
        if fromRef:
            self.files._list.extend(files)
            process_files = False
        elif isinstance(files, GangaList):
            def isFileTest(_file):
                return isinstance(_file, IGangaFile)
            areFiles = all([isFileTest(f) for f in files._list])
            if areFiles:
                self.files._list.extend(files._list)
                process_files = False
        elif isinstance(files, LHCbDataset):
            self.files._list.extend(files.files._list)
            process_files = False

        if process_files:
            if isType(files, LHCbDataset):
                for this_file in files:
                    self.files.append(deepcopy(this_file))
            elif isType(files, IGangaFile):
                self.files.append(deepcopy(this_file))
            elif isType(files, (list, tuple, GangaList)):
                new_list = []
                for this_file in files:
                    if type(this_file) is str:
                        new_file = string_datafile_shortcut_lhcb(this_file, None)
                    elif isType(this_file, IGangaFile):
                        new_file = stripProxy(this_file)
                    else:
                        new_file = strToDataFile(this_file)
                    new_list.append(new_file)
                self.files.extend(new_list)
            elif type(files) is str:
                self.files.append(string_datafile_shortcut_lhcb(this_file, None), False)
            else:
                raise GangaException("Unknown object passed to LHCbDataset constructor!")

        self.files._setParent(self)

        logger.debug("Processed inputs, assigning files")

        # Feel free to turn this on again for debugging but it's potentially quite expensive
        #logger.debug( "Creating dataset with:\n%s" % self.files )
        
        logger.debug("Assigned files")

        self.persistency = persistency
        self.depth = depth
        logger.debug("Dataset Created")

    def __getitem__(self, i):
        '''Proivdes scripting (e.g. ds[2] returns the 3rd file) '''
        #this_file = self.files[i]
        # print type(this_file)
        # return this_file
        # return this_file
        # return this_file
        if type(i) == type(slice(0)):
            ds = LHCbDataset(files=self.files[i])
            ds.depth = self.depth
            #ds.XMLCatalogueSlice = self.XMLCatalogueSlice
            return ds
        else:
            return self.files[i]

    def getReplicas(self):
        'Returns the replicas for all files in the dataset.'
        lfns = self.getLFNs()
        cmd = 'getReplicas(%s)' % str(lfns)
        result = get_result(cmd, 'LFC query error', 'Could not get replicas.')
        return result['Value']['Successful']

    def hasLFNs(self):
        'Returns True is the dataset has LFNs and False otherwise.'
        for f in self.files:
            if isDiracFile(f):
                return True
        return False

    def hasPFNs(self):
        'Returns True is the dataset has PFNs and False otherwise.'
        for f in self.files:
            if not isDiracFile(f):
                return True
        return False

    def replicate(self, destSE=''):
        '''Replicate all LFNs to destSE.  For a list of valid SE\'s, type
        ds.replicate().'''

        if not destSE:
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            DiracFile().replicate('')
            return
        if not self.hasLFNs():
            raise GangaException('Cannot replicate dataset w/ no LFNs.')

        retry_files = []

        for f in self.files:
            if not isDiracFile(f):
                continue
            try:
                result = f.replicate( destSE=destSE )
            except Exception as err:
                msg = 'Replication error for file %s (will retry in a bit).' % f.lfn
                logger.warning(msg)
                logger.warning("Error: %s" % str(err))
                retry_files.append(f)

        for f in retry_files:
            try:
                result = f.replicate( destSE=destSE )
            except Exception as err:
                msg = '2nd replication attempt failed for file %s. (will not retry)' % f.lfn
                logger.warning(msg)
                logger.warning(str(err))

    def extend(self, files, unique=False):
        '''Extend the dataset. If unique, then only add files which are not
        already in the dataset.'''
        from Ganga.GPIDev.Base import ReadOnlyObjectError

        if self._parent is not None and self._parent._readonly():
            raise ReadOnlyObjectError('object Job#%s  is read-only and attribute "%s/inputdata" cannot be modified now' % (self._parent.id, getName(self)))

        _external_files = []

        if type(files) is str or isType(files, IGangaFile):
            _external_files = [files]
        elif type(files) in [list, tuple]:
            _external_files = files
        elif isType(files, LHCbDataset):
            _external_files = files.files
        else:
            if not hasattr(files, "__getitem__") or not hasattr(files, '__iter__'):
                _external_files = [files]

        # just in case they extend w/ self
        _to_remove = []
        for this_file in _external_files:
            if hasattr(this_file, 'subfiles'):
                if len(this_file.subfiles) > 0:
                    _external_files = makeGangaListByRef(this_file.subfiles)
                    _to_remove.append(this_file)
            if type(this_file) is str:
                _external_files.append(string_datafile_shortcut_lhcb(this_file, None))
                _to_remove.append(this_file)

        for _this_file in _to_remove:
            _external_files.pop(_external_files.index(_this_file))

        for this_f in _external_files:
            _file = getDataFile(this_f)
            if _file is None:
                _file = this_f
            myName = _file.namePattern
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            if isType(_file, DiracFile):
                myName = _file.lfn
            if unique and myName in self.getFileNames():
                continue
            self.files.append(stripProxy(_file))

    def removeFile(self, input_file):
        try:
            self.files.remove(input_file)
        except:
            raise GangaException('Dataset has no file named %s' % input_file.namePattern)

    def getLFNs(self):
        'Returns a list of all LFNs (by name) stored in the dataset.'
        lfns = []
        if not self:
            return lfns
        for f in self.files:
            if isDiracFile(f):
                subfiles = f.getSubFiles()
                if len(subfiles) == 0:
                    lfns.append(f.lfn)
                else:
                    for file in subfiles:
                        lfns.append(file.lfn)

        #logger.debug( "Returning LFNS:\n%s" % str(lfns) )
        logger.debug("Returning #%s LFNS" % str(len(lfns)))
        return lfns

    def getPFNs(self):
        'Returns a list of all PFNs (by name) stored in the dataset.'
        pfns = []
        if not self:
            return pfns
        for f in self.files:
            if isPFN(f):
                pfns.append(f.namePattern)
        return pfns

    def getFullFileNames(self):
        'Returns all file names w/ PFN or LFN prepended.'
        names = []
        from GangaDirac.Lib.Files.DiracFile import DiracFile
        for f in self.files:
            if isType(f, DiracFile):
                names.append('LFN:%s' % f.lfn)
            else:
                try:
                    names.append('PFN:%s' % f.namePattern)
                except:
                    logger.warning("Cannot determine filename for: %s " % f)
                    raise GangaException("Cannot Get File Name")
        return names

    def getCatalog(self, site=''):
        '''Generates an XML catalog from the dataset (returns the XML string).
        Note: site defaults to config.LHCb.LocalSite
        Note: If the XMLCatalogueSlice attribute is set, then it returns
              what is written there.'''
        if hasattr(self.XMLCatalogueSlice, 'name'):
            if self.XMLCatalogueSlice.name:
                f = open(self.XMLCatalogueSlice.name)
                xml_catalog = f.read()
                f.close()
                return xml_catalog
        if not site:
            site = getConfig('LHCb')['LocalSite']
        lfns = self.getLFNs()
        depth = self.depth
        tmp_xml = tempfile.NamedTemporaryFile(suffix='.xml')
        cmd = 'getLHCbInputDataCatalog(%s,%d,"%s","%s")' \
              % (str(lfns), depth, site, tmp_xml.name)
        result = get_result(cmd, 'LFN->PFN error', 'XML catalog error.')
        xml_catalog = tmp_xml.read()
        tmp_xml.close()
        return xml_catalog

    def optionsString(self, file=None):
        'Returns the Gaudi-style options string for the dataset (if a filename' \
            ' is given, the file is created and output is written there).'
        if not self or len(self) == 0:
            return ''
        snew = ''
        if self.persistency == 'ROOT':
            snew = '\n#new method\nfrom GaudiConf import IOExtension\nIOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == 'POOL':
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == None:
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['
        else:
            logger.warning(
                "Unknown LHCbDataset persistency technology... reverting to None")
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['

        sold = '\nexcept ImportError:\n    #Use previous method\n    from Gaudi.Configuration import EventSelector\n    EventSelector().Input=['
        sdatasetsnew = ''
        sdatasetsold = ''

        dtype_str_default = getConfig('LHCb')['datatype_string_default']
        dtype_str_patterns = getConfig('LHCb')['datatype_string_patterns']
        for f in self.files:
            dtype_str = dtype_str_default
            for this_str in dtype_str_patterns:
                matched = False
                for pat in dtype_str_patterns[this_str]:
                    if fnmatch.fnmatch(f.namePattern, pat):
                        dtype_str = this_str
                        matched = True
                        break
                if matched:
                    break
            sdatasetsnew += '\n        '
            sdatasetsold += '\n        '
            if isDiracFile(f):
                sdatasetsnew += """ \"LFN:%s\",""" % f.lfn
                sdatasetsold += """ \"DATAFILE='LFN:%s' %s\",""" % (f.lfn, dtype_str)
            else:
                sdatasetsnew += """ \"PFN:%s\",""" % f.namePattern
                sdatasetsold += """ \"DATAFILE='PFN:%s' %s\",""" % (f.namePattern, dtype_str)
        if sdatasetsold.endswith(","):
            if self.persistency == 'ROOT':
                sdatasetsnew = sdatasetsnew[:-1] + """\n], clear=True)"""
            else:
                sdatasetsnew = sdatasetsnew[:-1] + """\n    ], clear=True)"""
            sdatasetsold = sdatasetsold[:-1]
            sdatasetsold += """\n    ]"""
        if(file):
            f = open(file, 'w')
            if self.persistency == 'ROOT':
                f.write(snew)
                f.write(sdatasetsnew)
            else:
                f.write(snew)
                f.write(sdatasetsnew)
                f.write(sold)
                f.write(sdatasetsold)
            f.close()
        else:
            if self.persistency == 'ROOT':
                return snew + sdatasetsnew
            else:
                return snew + sdatasetsnew + sold + sdatasetsold

    def _checkOtherFiles(self, other ):
        if isType(other, GangaList) or isType(other, []):
            other_files = LHCbDataset(other).getFullFileNames()
        elif isType(other, LHCbDataset):
            other_files = other.getFullFileNames()
        else:
            raise GangaException("Unknown type for difference")
        return other_files

    def difference(self, other):
        '''Returns a new data set w/ files in this that are not in other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).difference(other_files)
        data = LHCbDataset()
        data.extend([list(files)])
        data.depth = self.depth
        return data

    def isSubset(self, other):
        '''Is every file in this data set in other?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issubset(other_files)

    def isSuperset(self, other):
        '''Is every file in other in this data set?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issuperset(other_files)

    def symmetricDifference(self, other):
        '''Returns a new data set w/ files in either this or other but not
        both.'''
        other_files = other.checkOtherFiles(other)
        files = set(self.getFullFileNames()).symmetric_difference(other_files)
        data = LHCbDataset()
        data.extend([list(files)])
        data.depth = self.depth
        return data

    def intersection(self, other):
        '''Returns a new data set w/ files common to this and other.'''
        other_files = other._checkOtherFiles(other)
        files = set(self.getFullFileNames()).intersection(other_files)
        data = LHCbDataset()
        data.extend([list(files)])
        data.depth = self.depth
        return data

    def union(self, other):
        '''Returns a new data set w/ files from this and other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).union(other_files)
        data = LHCbDataset()
        data.extend([list(files)])
        data.depth = self.depth
        return data

    def bkMetadata(self):
        'Returns the bookkeeping metadata for all LFNs. '
        logger.info("Using BKQuery(bkpath).getDatasetMetadata() with bkpath=the bookkeeping path, will yeild more metadata such as 'TCK' info...")
        cmd = 'bkMetaData(%s)' % self.getLFNs()
        b = get_result(cmd, 'Error removing replica', 'Replica rm error.')
        return b
Exemplo n.º 3
0
 def test_different(self):
     v1 = Version(1, 0)
     v2 = Version(1, 2)
     self.assertNotEqual(v1, v2)
     self.assertTrue(v2.isCompatible(v1))
     self.assertFalse(v1.isCompatible(v2))
Exemplo n.º 4
0
class IGangaFile(GangaObject):

    """IGangaFile represents base class for output files, such as MassStorageFile, LCGSEFile, DiracFile, LocalFile, etc 
    """
    _schema = Schema(Version(1, 1), {'namePattern': SimpleItem(
        defvalue="", doc='pattern of the file name')})
    _category = 'gangafiles'
    _name = 'IGangaFile'
    _hidden = 1
    __slots__ = list()

    def __init__(self):
        super(IGangaFile, self).__init__()

    def setLocation(self):
        """
        Sets the location of output files that were uploaded from the WN
        """
        raise NotImplementedError

    def location(self):
        """
        Return list with the locations of the post processed files (if they were configured to upload the output somewhere)
        """
        raise NotImplementedError

    def get(self):
        """
        Retrieves locally all files that were uploaded before that 
        Order of priority about where a file is going to be placed are:
            1) The localDir as defined in the schema. (Exceptions thrown if this doesn't exist)
            2) The Job outpudir of the parent job if the localDir is not defined.
            3) raise an exception if neither are defined correctly.
        """
        if self.localDir:
            if not os.path.isdir(self.localDir):
                msg = "Folder '%s' doesn't exist. Please construct this before 'get'-ing a file." % self.localDir
                raise GangaFileError(msg)
            to_location = self.localDir
        else:
            try:
                to_location = self.getJobObject().outputdir
            except AssertionError:
                msg = "%s: Failed to get file object. Please set the `localDir` parameter and try again. e.g. file.localDir=os.getcwd();file.get()" % getName(self)
                logger.debug("localDir value: %s" % self.localDir)
                logger.debug("parent: %s" % self._getParent())
                raise GangaFileError(msg)

        # FIXME CANNOT perform a remote globbing here in a nice way so have to just perform a copy when dealing with wildcards
        if not os.path.isfile(os.path.join(to_location, self.namePattern)):
            returnable = self.copyTo(to_location)
            if not self.localDir:
                self.localDir = to_location
            return returnable
        else:
            logger.debug("File: %s already exists, not performing copy" % (os.path.join(to_location, self.namePattern), ))
            return True


    def getSubFiles(self, process_wildcards=False):
        """
        Returns the sub files if wildcards are used
        """
        # should we process wildcards? Used for inputfiles
        if process_wildcards:
            self.processWildcardMatches()

        # if we have subfiles, return that
        if hasattr(self, 'subfiles'):
            return self.subfiles

        return []

    def getFilenameList(self):
        """
        Returns the filenames of all associated files through a common interface
        """
        raise NotImplementedError

    def getWNScriptDownloadCommand(self, indent):
        """
        Gets the command used to download already uploaded file
        """
        raise NotImplementedError

    def put(self):
        """
        Postprocesses (upload) output file to the desired destination from the client
        """
        raise NotImplementedError

    def copyTo(self, targetPath):
        """
        Copy a the file to the local storage using the appropriate file-transfer mechanism
        This will raise an exception if targetPath isn't set to something sensible.
        Args:
            targetPath (str): Target path where the file is to copied to
        """
        if not isinstance(targetPath, str) and targetPath:
            raise GangaFileError("Cannot perform a copyTo with no given targetPath!")
        if regex.search(self.namePattern) is None\
            and os.path.isfile(os.path.join(self.localDir, self.namePattern)):

            if not os.path.isfile(os.path.join(targetPath, self.namePattern)):
                shutil.copy(os.path.join(self.localDir, self.namePattern), os.path.join(targetPath, self.namePattern))
            else:
                logger.debug("Already found file: %s" % os.path.join(targetPath, self.namePattern))
                
            return True

        # Again, cannot perform a remote glob here so have to ignore wildcards
        else:
            return self.internalCopyTo(targetPath)

    def internalCopyTo(self, targetPath):
        """
        Internal method for implementing the actual copy mechanism for each IGangaFile
        Args:
             targetPath (str): Target path where the file is to copied to
        """
        raise NotImplementedError

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip, postProcessLocationsFP):
        """
        Returns script that have to be injected in the jobscript for postprocessing on the WN
        """
        raise NotImplementedError

    def processWildcardMatches(self):
        """
        If namePattern contains a wildcard, populate the subfiles property
        """
        raise NotImplementedError

    def _auto_remove(self):
        """
        Remove called when job is removed as long as config option allows
        """
        self.remove()

    def _readonly(self):
        return False

    def _list_get__match__(self, to_match):
        if isinstance(to_match, str):
            return fnmatch(self.namePattern, to_match)
        # Note: type(DiracFile) = ObjectMetaclass
        # type(ObjectMetaclass) = type
        # hence checking against a class type not an instance
        if isinstance(type(to_match), type):
            return issubclass(self.__class__, to_match)
        return to_match == self

    def execSyscmdSubprocess(self, cmd):

        import subprocess

        exitcode = -999
        mystdout = ''
        mystderr = ''

        try:
            child = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            (mystdout, mystderr) = child.communicate()
            exitcode = child.returncode
        finally:
            pass

        return (exitcode, mystdout, mystderr)

    def remove(self):
        """
        Objects should implement something to overload this!
        """
        raise NotImplementedError

    def accessURL(self):
        """
        Return the URL including the protocol used to access a file on a certain storage element
        """
        raise NotImplementedError

    def hasMatchedFiles(self):
        """
        Return if this file has got valid matched files. Default implementation checks for
        subfiles and locations
        """

        # check for subfiles
        if (hasattr(self, 'subfiles') and len(self.subfiles) > 0):
            # we have subfiles so we must have actual files associated
            return True

        # check for locations
        if (hasattr(self, 'locations') and len(self.locations) > 0):
            return True

        return False

    def containsWildcards(self):
        """
        Return if the name has got wildcard characters
        """
        if regex.search(self.namePattern) != None:
            return True

        return False

    def cleanUpClient(self):
        """
        This method cleans up the client space after performing a put of a file after a job has completed
        """

        # For all other file types (not LocalFile) The file in the outputdir is temporary waiting for Ganga to pass it to the storage solution
        job = self.getJobObject()

        for f in glob.glob(os.path.join(job.outputdir, self.namePattern)):
            try:
                os.remove(f)
            except OSError as err:
                if err.errno != errno.ENOENT:
                    logger.error('failed to remove temporary/intermediary file: %s' % f)
                    logger.debug("Err: %s" % err)
                    raise err

    def expandString(self, inputStr, fileName=''):
        """
        This method deals with the automatic string replacement in the string notation for IGangaFile objects
        Args:
            inputStr(str): This is the input string which is being evaluated/expanded
            fileName(str): This is an optional filename used to replace {fname}
        Returns:
            str:    This new string is the result of fully expanding the inputStr object
        """

        outputStr = inputStr

        if self._getParent() is not None:
            jobfqid = self.getJobObject().fqid
                                
            jobid = jobfqid
            subjobid = ''

            split = jobfqid.split('.')

            if len(split) > 1:
                jobid = split[0]
                subjobid = split[1]
          
            outputStr = outputStr.replace('{jid}', jobid)                                                                    
            outputStr = outputStr.replace('{sjid}', subjobid)

        if fileName:
            outputStr = outputStr.replace('{fname}', fileName)
        else:
            outputStr = outputStr.replace('{fname}', os.path.basename(self.namePattern))

        return outputStr
Exemplo n.º 5
0
class DiracBase(IBackend):

    """The backend that submits jobs to the Grid via DIRAC.

    The backend for jobs to be submitted to the Grid. Jobs are
    submitted through the DIRAC WMS system and then in turn submitted to the
    Grid. A few examples of usage are given below

    # Create Dirac backend object
    b = Dirac()

    # Create and submit job.
    j = Job(application=app,backend=b)
    j.submit()

    # Run a Root job on the Grid if in LHCb VO

    # Create a Root application object. See Root help text for instructions
    # on how to configure this.
    app = Root()

    # Create and submit job to Dirac using default options
    j = Job(application=app,backend=Dirac())
    j.submit()

    # Using the 'settings' attribute
    j.backend.settings['BannedSites'] = ['LCG.CERN.ch']
    j.resubmit()

    # settings can be set at any time but are only 'respected' during
    # submit and resubmit.

    """

    dirac_monitoring_is_active = True

    _schema = Schema(Version(3, 2), {
        'id': SimpleItem(defvalue=None, protected=1, copyable=0,
                         typelist=['int', 'type(None)'],
                         doc='The id number assigned to the job by the DIRAC WMS. If seeking help'
                         ' on jobs with the Dirac backend, please always report this id '
                         'number in addition to a full description of your problem. The id '
                         'can also be used to further inspect the job at '
                         'https://lhcbweb.pic.es/DIRAC/info/general/diracOverview'),
        'status': SimpleItem(defvalue=None, protected=1, copyable=0,
                             typelist=['str', 'type(None)'],
                             doc='The detailed status as reported by the DIRAC WMS'),
        'actualCE': SimpleItem(defvalue=None, protected=1, copyable=0,
                               typelist=['str', 'type(None)'],
                               doc='The location where the job ran'),
        'normCPUTime': SimpleItem(defvalue=None, protected=1, copyable=0,
                                  typelist=['str', 'type(None)'],
                                  doc='The normalized CPU time reported by the DIRAC WMS'),
        'statusInfo': SimpleItem(defvalue='', protected=1, copyable=0,
                                 typelist=['str', 'type(None)'],
                                 doc='Minor status information from Dirac'),
        'extraInfo': SimpleItem(defvalue='', protected=1, copyable=0,
                                typelist=['str', 'type(None)'],
                                doc='Application status information from Dirac'),
        'diracOpts': SimpleItem(defvalue='',
                                doc='DIRAC API commands to add the job definition script. Only edit '
                                'if you *really* know what you are doing'),
        'settings': SimpleItem(defvalue={'CPUTime': 2 * 86400},
                               doc='Settings for DIRAC job (e.g. CPUTime, BannedSites, etc.)')
    })
    _exportmethods = ['getOutputData', 'getOutputSandbox', 'removeOutputData',
                      'getOutputDataLFNs', 'peek', 'reset', 'debug']
    _packed_input_sandbox = True
    _category = "backends"
    _name = 'DiracBase'
    _hidden = True

    def _setup_subjob_dataset(self, dataset):
        """
        This method is used for constructing datasets on a per subjob basis when submitting parametric jobs
        Args:
            Dataset (Dataset): This is a GangaDataset object, todo check this isn't a list
        """
        return None

    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True

    def _common_submit(self, dirac_script):
        '''Submit the job via the Dirac server.
        Args:
            dirac_script (str): filename of the JDL which is to be submitted to DIRAC
        '''
        j = self.getJobObject()
        self.id = None
        self.actualCE = None
        self.status = None
        self.extraInfo = None
        self.statusInfo = ''
        j.been_queued = False
        dirac_cmd = """execfile(\'%s\')""" % dirac_script
        result = execute(dirac_cmd)
        # Could use the below code instead to submit on a thread
        # If submitting many then user may terminate ganga before
        # all jobs submitted
#        def submit_checker(result, job, script):
#            err_msg = 'Error submitting job to Dirac: %s' % str(result)
#            if not result_ok(result) or 'Value' not in result:
#                logger.error(err_msg)
#                raise BackendError('Dirac',err_msg)
#
#            idlist = result['Value']
#            if type(idlist) is list:
#                return job._setup_bulk_subjobs(idlist, script)
#            job.id = idlist
#        server.execute_nonblocking(dirac_cmd, callback_func=submit_checker, args=(self, dirac_script))
#        return True

        err_msg = 'Error submitting job to Dirac: %s' % str(result)
        if not result_ok(result) or 'Value' not in result:
            logger.error(err_msg)
            logger.error("\n\n===\n%s\n===\n" % dirac_script)
            logger.error("\n\n====\n")
            with open(dirac_script, 'r') as file_in:
                logger.error("%s" % file_in.read())
            logger.error("\n====\n")
            raise BackendError('Dirac', err_msg)

        idlist = result['Value']
        if type(idlist) is list:
            return self._setup_bulk_subjobs(idlist, dirac_script)

        self.id = idlist
        return type(self.id) == int

    def _addition_sandbox_content(self, subjobconfig):
        '''any additional files that should be sent to dirac
        Args:
            subjobcofig (unknown): This is the config for this subjob (I think)'''
        return []

    def submit(self, subjobconfig, master_input_sandbox):
        """Submit a DIRAC job
        Args:
            subjobconfig (unknown):
            master_input_sandbox (list): file names which are in the master sandbox of the master sandbox (if any)
        """
        j = self.getJobObject()

        sboxname = j.createPackedInputSandbox(subjobconfig.getSandboxFiles())

        input_sandbox = master_input_sandbox[:]
        input_sandbox += sboxname

        input_sandbox += self._addition_sandbox_content(subjobconfig)

        ## Add LFN to the inputfiles section of the file
        input_sandbox_userFiles = []
        for this_file in j.inputfiles:
            if isType(this_file, DiracFile):
                input_sandbox_userFiles.append('LFN:'+str(this_file.lfn))
        if j.master:
            for this_file in j.master.inputfiles:
                if isType(this_file, DiracFile):
                    input_sandbox_userFiles.append('LFN:'+str(this_file.lfn))

        for this_file in input_sandbox_userFiles:
            input_sandbox.append(this_file)

        logger.debug("dirac_script: %s" % str(subjobconfig.getExeString()))
        logger.debug("sandbox_cont:\n%s" % str(input_sandbox))

        dirac_script = subjobconfig.getExeString().replace('##INPUT_SANDBOX##', str(input_sandbox))

        dirac_script_filename = os.path.join(j.getInputWorkspace().getPath(), 'dirac-script.py')
        f = open(dirac_script_filename, 'w')
        f.write(dirac_script)
        f.close()
        return self._common_submit(dirac_script_filename)

    def master_auto_resubmit(self, rjobs):
        '''Duplicate of the IBackend.master_resubmit but hooked into auto resubmission
        such that the monitoring server is used rather than the user server
        Args:
            rjobs (list): This is a list of jobs which are to be auto-resubmitted'''
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0
        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid, getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    result = b._resubmit()
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(IncompleteJobSubmissionError(fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger, debug=isType(x, GangaException))
                    return handleError(IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1

    def resubmit(self):
        """Resubmit a DIRAC job"""
        return self._resubmit()

    def _resubmit(self):
        """Resubmit a DIRAC job"""
        j = self.getJobObject()
        parametric = False
        script_path = os.path.join(j.getInputWorkspace().getPath(), 'dirac-script.py')
        # Check old script
        if j.master is None and not os.path.exists(script_path):
            raise BackendError('Dirac', 'No "dirac-script.py" found in j.inputdir')

        if j.master is not None and not os.path.exists(script_path):
            script_path = os.path.join(
                j.master.getInputWorkspace().getPath(), 'dirac-script.py')
            if not os.path.exists(script_path):
                raise BackendError('Dirac', 'No "dirac-script.py" found in j.inputdir or j.master.inputdir')
            parametric = True

        # Read old script
        f = open(script_path, 'r')
        script = f.read()
        f.close()

        # Create new script - ##note instead of using get_parametric_dataset
        # could just use j.inputdata.
        if parametric is True:
            parametric_datasets = get_parametric_datasets(script.split('\n'))
            if j.master:
                if len(parametric_datasets) != len(j.master.subjobs):
                    raise BackendError('Dirac', 'number of parametric datasets defined in API script doesn\'t match number of master.subjobs')
            if j.inputdata and len(j.inputdata) > 0:
                _input_files = [f for f in j.inputdata if not isType(f, DiracFile)]
            else:
                _input_files = []
            if set(parametric_datasets[j.id]).symmetric_difference(set([f.namePattern for f in _input_files])):
                raise BackendError(
                    'Dirac', 'Mismatch between dirac-script and job attributes.')
            script = script.replace('.setParametricInputData(%s)' % str(parametric_datasets),
                                    '.setInputData(%s)' % str(parametric_datasets[j.id]))
            script = script.replace('%n', str(j.id))  # name

        start_user_settings = '# <-- user settings\n'
        new_script = script[
            :script.find(start_user_settings) + len(start_user_settings)]

        job_ident = get_job_ident(script.split('\n'))
        for key, value in self.settings.iteritems():
            if str(key).startswith('set'):
                _key = key[3:]
            else:
                _key = key
            if type(value) is str:
                template = '%s.set%s("%s")\n'
            else:
                template = '%s.set%s(%s)\n'
            new_script += template % (job_ident, str(_key), str(value))
        new_script += script[script.find('# user settings -->'):]

        # Save new script
        new_script_filename = os.path.join(j.getInputWorkspace().getPath(), 'dirac-script.py')
        f = open(new_script_filename, 'w')
        f.write(new_script)
        f.flush()
        f.close()
        return self._common_submit(new_script_filename)

    def reset(self, doSubjobs=False):
        """Resets the state of a job back to 'submitted' so that the
        monitoring will run on it again.
        Args:
            doSubjobs (bool): Should we rest the subjobs associated with this job or not"""
        j = self.getJobObject()

        disallowed = ['submitting', 'killed']
        if j.status in disallowed:
            logger.warning("Can not reset a job in status '%s'." % j.status)
        else:
            j.getOutputWorkspace().remove(preserve_top=True)
            self.extraInfo = None
            self.statusInfo = ''
            self.status = None
            self.actualCE = None
            j.been_queued = False
            j.updateStatus('submitted')
            if j.subjobs and not doSubjobs:
                logger.info('This job has subjobs, if you would like the backends '
                            'of all the subjobs that are in status=\'completing\' or '
                            'status=\'failed\' also reset then recall reset with the '
                            'arg \'True\' i.e. job(3).backend.reset(True)')
            elif j.subjobs and doSubjobs:
                logger.info('resetting the backends of \'completing\' and \'failed\' subjobs.')
                for sj in j.subjobs:
                    if sj.status == 'completing' or sj.status == 'failed':
                        sj.backend.reset()
            if j.master:
                j.master.updateMasterJobStatus()

    def kill(self):
        """ Kill a Dirac jobs"""
        if not self.id:
            return None
        dirac_cmd = 'kill(%d)' % self.id
        result = execute(dirac_cmd)
        if not result_ok(result):
            raise BackendError('Dirac', 'Could not kill job: %s' % str(result))
        return result['OK']

    def peek(self, filename=None, command=None):
        """Peek at the output of a job (Note: filename/command are ignored).
        Args:
            filename (str): Ignored but is filename of a file in the sandbox
            command (str): Ignored but is a command which could be executed"""
        dirac_cmd = 'peek(%d)' % self.id
        result = execute(dirac_cmd)
        if result_ok(result):
            logger.info(result['Value'])
        else:
            logger.error("No peeking available for Dirac job '%i'.", self.id)

    def getOutputSandbox(self, outputDir=None):
        """Get the outputsandbox for the job object controlling this backend
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
        """
        j = self.getJobObject()
        if outputDir is None:
            outputDir = j.getOutputWorkspace().getPath()
        dirac_cmd = "getOutputSandbox(%d,'%s')"  % (self.id, outputDir)
        result = execute(dirac_cmd)
        if not result_ok(result):
            msg = 'Problem retrieving output: %s' % str(result)
            logger.warning(msg)
            return False

        return True

    def removeOutputData(self):
        """
        Remove all the LFNs associated with this job.
        """
        # Note when the API can accept a list for removeFile I will change
        # this.
        j = self.getJobObject()
        if j.subjobs:
            for sj in j.subjobs:
                outputfiles_foreach(sj, DiracFile, lambda x: x.remove())
        else:
            outputfiles_foreach(j, DiracFile, lambda x: x.remove())

    def getOutputData(self, outputDir=None, names=None, force=False):
        """Retrieve data stored on SE to dir (default=job output workspace).
        If names=None, then all outputdata is downloaded otherwise names should
        be a list of files to download. If force=True then data will be redownloaded
        even if the file already exists.

        Note that if called on a master job then all subjobs outputwill be downloaded.
        If dir is None then the subjobs output goes into their individual
        outputworkspaces as expected. If however one specifies a dir then this is
        treated as a top dir and a subdir for each job will be created below it. This
        will avoid overwriting files with the same name from each subjob.
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
            names (list): list of names which match namePatterns in the outputfiles
            force (bool): Force the download out data potentially overwriting existing objects
        """
        j = self.getJobObject()
        if outputDir is not None and not os.path.isdir(outputDir):
            raise GangaException("Designated outupt path '%s' must exist and be a directory" % outputDir)

        def download(dirac_file, job, is_subjob=False):
            dirac_file.localDir = job.getOutputWorkspace().getPath()
            if outputDir is not None:
                output_dir = outputDir
                if is_subjob:
                    output_dir = os.path.join(outputDir, job.fqid)
                    if not os.path.isdir(output_dir):
                        os.mkdir(output_dir)
                dirac_file.localDir = output_dir
            if os.path.exists(os.path.join(dirac_file.localDir, os.path.basename(dirac_file.lfn))) and not force:
                return
            try:
                if isType(dirac_file, DiracFile):
                    dirac_file.get(localPath=dirac_file.localDir)
                else:
                    dirac_file.get()
                return dirac_file.lfn
            # should really make the get method throw if doesn't suceed. todo
            except GangaException as e:
                logger.warning(e)

        suceeded = []
        if j.subjobs:
            for sj in j.subjobs:
                suceeded.extend([download(f, sj, True) for f in outputfiles_iterator(sj, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)])
        else:
            suceeded.extend([download(f, j, False) for f in outputfiles_iterator(j, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)])

        return filter(lambda x: x is not None, suceeded)

    def getOutputDataLFNs(self):
        """Retrieve the list of LFNs assigned to outputdata"""
        j = self.getJobObject()
        lfns = []

        if j.subjobs:
            for sj in j.subjobs:
                lfns.extend([f.lfn for f in outputfiles_iterator(sj, DiracFile) if f.lfn != ''])
        else:
            lfns.extend([f.lfn for f in outputfiles_iterator(j, DiracFile) if f.lfn != ''])
        return lfns

    def debug(self):
        '''Obtains some (possibly) useful DIRAC debug info. '''
        # check services
        cmd = 'getServicePorts()'
        result = execute(cmd)
        if type(result) == str:
            try:
                result = eval(result)
            except Exception as err:
                logger.debug("Exception, err: %s" % str(err))
                pass
        if not result_ok(result):
            logger.warning('Could not obtain services: %s' % str(result))
            return
        services = result.get('Value', {})
        for category in services:
            system, service = category.split('/')
            cmd = "ping('%s','%s')" % (system, service)
            result = execute(cmd)
            if type(result) == str:
                try:
                    result = eval(result)
                except Exception as err:
                    logger.debug("Exception: %s" % str(err))
                    pass
            msg = 'OK.'
            if not result_ok(result):
                msg = '%s' % result['Message']
            logger.info('%s: %s' % (category, msg))
        # get pilot info for this job
        if type(self.id) != int:
            return
        j = self.getJobObject()
        cwd = os.getcwd()
        debug_dir = j.getDebugWorkspace().getPath()
        cmd = "getJobPilotOutput(%d,'%s')" % \
              (self.id, debug_dir)
        result = execute(cmd)
        if result_ok(result):
            logger.info('Pilot Info: %s/pilot_%d/std.out.' %
                        (debug_dir, self.id))
        else:
            logger.error(result.get('Message', ''))

    @staticmethod
    def _bulk_updateStateTime(jobStateDict, bulk_time_lookup={} ):
        """ This performs the same as the _getStateTime method but loops over a list of job ids within the DIRAC namespace (much faster)
        Args:
            jobStateDict (dict): This is a dict of {job.backend.id : job_status, } elements
            bulk_time_lookup (dict): Dict of result of multiple calls to getBulkStateTime, performed in advance
        """
        for this_state, these_jobs in jobStateDict.iteritems():
            if bulk_time_lookup == {} or this_state not in bulk_time_lookup:
                bulk_result = execute("getBulkStateTime(%s,\'%s\')" %
                                        (repr([j.backend.id for j in these_jobs]), this_state))
            else:
                bulk_result = bulk_time_lookup[this_state]
            for this_job in jobStateDict[this_state]:
                backend_id = this_job.backend.id
                if backend_id in bulk_result and bulk_result[backend_id]:
                    DiracBase._getStateTime(this_job, this_state, {this_state : bulk_result[backend_id]})
                else:
                    DiracBase._getStateTime(this_job, this_state)

    @staticmethod
    def _getStateTime(job, status, getStateTimeResult={}):
        """Returns the timestamps for 'running' or 'completed' by extracting
        their equivalent timestamps from the loggingInfo.
        Args:
            job (Job): This is the job object we want to update
            status (str): This is the Ganga status we're updating (running, completed... etc)
            getStateTimeResult (dict): This is the optional result of executing the approriate getStateTime
                                        against this job.backend.id, if not provided the command is called internally
        """
        # Now private to stop server cross-talk from user thread. Since updateStatus calles
        # this method whether called itself by the user thread or monitoring thread.
        # Now don't use hook but define our own private version
        # used in monitoring loop... messy but works.
        if job.status != status:
            b_list = ['running', 'completing', 'completed', 'failed']
            backend_final = ['failed', 'completed']
            # backend stamps
            if not job.subjobs and status in b_list:
                for childstatus in b_list:
                    if job.backend.id:
                        logger.debug("Accessing getStateTime() in diracAPI")
                        if childstatus in backend_final:
                            if childstatus in getStateTimeResult:
                                be_statetime = getStateTimeResult[childstatus]
                            else:
                                be_statetime = execute("getStateTime(%d,\'%s\')" % (job.backend.id, childstatus))
                            job.time.timestamps["backend_final"] = be_statetime
                            logger.debug("Wrote 'backend_final' to timestamps.")
                            break
                        else:
                            time_str = "backend_" + childstatus
                            if time_str not in job.time.timestamps:
                                if childstatus in getStateTimeResult:
                                    be_statetime = getStateTimeResult[childstatus]
                                else:
                                    be_statetime = execute("getStateTime(%d,\'%s\')" % (job.backend.id, childstatus))
                                job.time.timestamps["backend_" + childstatus] = be_statetime
                            logger.debug("Wrote 'backend_%s' to timestamps.", childstatus)
                    if childstatus == status:
                        break
            logger.debug("_getStateTime(job with id: %d, '%s') called.", job.id, job.status)
        else:
            logger.debug("Status changed from '%s' to '%s'. No new timestamp was written", job.status, status)

    def timedetails(self):
        """Prints contents of the loggingInfo from the Dirac API."""
        if not self.id:
            return None
        logger.debug("Accessing timedetails() in diracAPI")
        dirac_cmd = 'timedetails(%d)' % self.id
        return execute(dirac_cmd)

    @staticmethod
    def job_finalisation_cleanup(job, updated_dirac_status):
        """
        Method for reverting a job back to a clean state upon a failure in the job progression
        Args:
            job (Job) This is the job to change the status
            updated_dirac_status (str): Ganga status which is to be used somewhere
        """
        #   Revert job back to running state if we exit uncleanly
        if job.status == "completing":
            job.updateStatus('running')
            if job.master:
                job.master.updateMasterJobStatus()
        # FIXME should I add something here to cleanup on sandboxes pulled from
        # malformed job output?

    @staticmethod
    def _internal_job_finalisation(job, updated_dirac_status):
        """
        This method performs the main job finalisation
        Args:
            job (Job): Thi is the job we want to finalise
            updated_dirac_status (str): String representing the Ganga finalisation state of the job failed/completed
        """

        if updated_dirac_status == 'completed':
            start = time.time()
            # firstly update job to completing
            DiracBase._getStateTime(job, 'completing')
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us

            job.updateStatus('completing')
            if job.master:
                job.master.updateMasterJobStatus()

            output_path = job.getOutputWorkspace().getPath()

            logger.info('Contacting DIRAC for job: %s' % job.fqid)
            # Contact dirac which knows about the job
            job.backend.normCPUTime, getSandboxResult, file_info_dict, completeTimeResult = execute("finished_job(%d, '%s')" % (job.backend.id, output_path))

            now = time.time()
            logger.info('%0.2fs taken to download output from DIRAC for Job %s' % ((now - start), job.fqid))

            #logger.info('Job ' + job.fqid + ' OutputDataInfo: ' + str(file_info_dict))
            #logger.info('Job ' + job.fqid + ' OutputSandbox: ' + str(getSandboxResult))
            #logger.info('Job ' + job.fqid + ' normCPUTime: ' + str(job.backend.normCPUTime))

            # Set DiracFile metadata
            wildcards = [f.namePattern for f in job.outputfiles.get(DiracFile) if regex.search(f.namePattern) is not None]

            lfn_store = os.path.join(output_path, getConfig('Output')['PostProcessLocationsFileName'])

            # Make the file on disk with a nullop...
            if not os.path.isfile(lfn_store):
                with open(lfn_store, 'w'):
                    pass

            if job.outputfiles.get(DiracFile):

                # Now we can iterate over the contents of the file without touching it
                with open(lfn_store, 'ab') as postprocesslocationsfile:
                    if not hasattr(file_info_dict, 'keys'):
                        logger.error("Error understanding OutputDataInfo: %s" % str(file_info_dict))
                        from Ganga.Core.exceptions import GangaException
                        raise GangaException("Error understanding OutputDataInfo: %s" % str(file_info_dict))

                    ## Caution is not clear atm whether this 'Value' is an LHCbism or bug
                    list_of_files = file_info_dict.get('Value', file_info_dict.keys())

                    for file_name in list_of_files:
                        file_name = os.path.basename(file_name)
                        info = file_info_dict.get(file_name)
                        #logger.debug("file_name: %s,\tinfo: %s" % (str(file_name), str(info)))

                        if not hasattr(info, 'get'):
                            logger.error("Error getting OutputDataInfo for: %s" % str(job.getFQID('.')))
                            logger.error("Please check the Dirac Job still exists or attempt a job.backend.reset() to try again!")
                            logger.error("Err: %s" % str(info))
                            logger.error("file_info_dict: %s" % str(file_info_dict))
                            from Ganga.Core.exceptions import GangaException
                            raise GangaException("Error getting OutputDataInfo")

                        valid_wildcards = [wc for wc in wildcards if fnmatch.fnmatch(file_name, wc)]
                        if not valid_wildcards:
                            valid_wildcards.append('')

                        for wc in valid_wildcards:
                            #logger.debug("wildcard: %s" % str(wc))

                            DiracFileData = 'DiracFile:::%s&&%s->%s:::%s:::%s\n' % (wc,
                                                                                    file_name,
                                                                                    info.get('LFN', 'Error Getting LFN!'),
                                                                                    str(info.get('LOCATIONS', ['NotAvailable'])),
                                                                                    info.get('GUID', 'NotAvailable')
                                                                                    )
                            #logger.debug("DiracFileData: %s" % str(DiracFileData))
                            postprocesslocationsfile.write(DiracFileData)
                            postprocesslocationsfile.flush()

                logger.debug("Written: %s" % open(lfn_store, 'r').readlines())

            # check outputsandbox downloaded correctly
            if not result_ok(getSandboxResult):
                logger.warning('Problem retrieving outputsandbox: %s' % str(getSandboxResult))
                DiracBase._getStateTime(job, 'failed')
                if job.status in ['removed', 'killed']:
                    return
                elif (job.master and job.master.status in ['removed', 'killed']):
                    return  # user changed it under us
                job.updateStatus('failed')
                if job.master:
                    job.master.updateMasterJobStatus()
                raise BackendError('Problem retrieving outputsandbox: %s' % str(getSandboxResult))

            # finally update job to completed
            DiracBase._getStateTime(job, 'completed', completeTimeResult)
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('completed')
            if job.master:
                job.master.updateMasterJobStatus()
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for complete update : ' + str(now - start))

        elif updated_dirac_status == 'failed':
            # firstly update status to failed
            DiracBase._getStateTime(job, 'failed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('failed')
            if job.master:
                job.master.updateMasterJobStatus()

            # if requested try downloading outputsandbox anyway
            if configDirac['failed_sandbox_download']:
                execute("getOutputSandbox(%d,'%s')" % (job.backend.id, job.getOutputWorkspace().getPath()))
        else:
            logger.error("Job #%s Unexpected dirac status '%s' encountered" % (job.getFQID('.'), updated_dirac_status))

    @staticmethod
    def job_finalisation(job, updated_dirac_status):
        """
        Attempt to finalise the job given and auto-retry 5 times on error
        Args:
            job (Job): Job object to finalise
            updated_dirac_status (str): The Ganga status to update the job to, i.e. failed/completed
        """
        count = 1
        limit = 5
        sleep_length = 2.5

        while count != limit:

            try:
                count += 1
                # Check status is sane before we start
                if job.status != "running" and (not job.status in ['completed', 'killed', 'removed']):
                    job.updateStatus('submitted')
                    job.updateStatus('running')
                if job.status in ['completed', 'killed', 'removed']:
                    break
                # make sure proxy is valid
                if DiracBase.checkDiracProxy():
                    # perform finalisation
                    DiracBase._internal_job_finalisation(job, updated_dirac_status)
                else:
                    # exit gracefully
                    logger.warning("Cannot process job: %s. DIRAC monitoring has been disabled. To activate your grid proxy type: \'gridProxy.renew()\'" % job.fqid)
                break
            except Exception as err:

                logger.warning("An error occured finalising job: %s" % job.getFQID('.'))
                logger.warning("Attemting again (%s of %s) after %s-sec delay" % (str(count), str(limit), str(sleep_length)))
                if count == limit:
                    logger.error("Unable to finalise job after %s retries due to error:\n%s" % (job.getFQID('.'), str(err)))
                    job.force_status('failed')
                    raise

            time.sleep(sleep_length)

        job.been_queued = False

    @staticmethod
    def requeue_dirac_finished_jobs(requeue_jobs, finalised_statuses):
        """
        Method used to requeue jobs whih are in the finalized state of some form, finished/failed/etc
        Args:
            requeue_jobs (list): This is a list of the jobs which are to be requeued to be finalised
            finalised_statuses (dict): Dict of the Dirac statuses vs the Ganga statuses after running
        """

        from Ganga.Core import monitoring_component

        # requeue existing completed job
        for j in requeue_jobs:
            if j.been_queued:
                continue

            if monitoring_component:
                if monitoring_component.should_stop():
                    break
            if not configDirac['serializeBackend']:
                getQueues()._monitoring_threadpool.add_function(DiracBase.job_finalisation,
                                                           args=(j, finalised_statuses[j.backend.status]),
                                                           priority=5, name="Job %s Finalizing" % j.fqid)
                j.been_queued = True
            else:
                DiracBase.job_finalisation(j, finalised_statuses[j.backend.status])


    @staticmethod
    def monitor_dirac_running_jobs(monitor_jobs, finalised_statuses):
        """
        Method to update the configuration of jobs which are in a submitted/running state in Ganga&Dirac
        Args:
            monitor_jobs (list): Jobs which are to be monitored for their status change
            finalised_statuses (dict): Dict of the Dirac statuses vs the Ganga statuses after running
        """

        # now that can submit in non_blocking mode, can see jobs in submitting
        # that have yet to be assigned an id so ignore them
        # NOT SURE THIS IS VALID NOW BULK SUBMISSION IS GONE
        # EVEN THOUGH COULD ADD queues.add(j.submit) WILL KEEP AN EYE ON IT
        # dirac_job_ids    = [ j.backend.id for j in monitor_jobs if j.backend.id is not None ]
        # Correction this did become a problem for a crashed session during
        # submit, see #104454
        dead_jobs = (j for j in monitor_jobs if j.backend.id is None)
        for d in dead_jobs:
            d.updateStatus('failed')
            if d.master is not None:
                d.master.updateMasterJobStatus()

        ganga_job_status = [j.status for j in monitor_jobs if j.backend.id is not None]
        dirac_job_ids = [j.backend.id for j in monitor_jobs if j.backend.id is not None]

        logger.debug("GangaStatus: %s" % str(ganga_job_status))
        logger.debug("diracJobIDs: %s" % str(dirac_job_ids))

        if not dirac_job_ids:
            ## Nothing to do here stop bugging DIRAC about it!
            ## Everything else beyond here in the function depends on some ids present here, no ids means we can stop.
            return

        statusmapping = configDirac['statusmapping']

        result, bulk_state_result = execute('monitorJobs(%s, %s)' %( repr(dirac_job_ids), repr(statusmapping)))

        if not DiracBase.checkDiracProxy():
            return

        #result = results[0]
        #bulk_state_result = results[1]

        if len(result) != len(ganga_job_status):
            logger.warning('Dirac monitoring failed for %s, result = %s' % (str(dirac_job_ids), str(result)))
            logger.warning("Results: %s" % str(results))
            return

        from Ganga.Core import monitoring_component

        requeue_job_list = []
        jobStateDict = {}

        jobs_to_update = {}
        master_jobs_to_update = []

        thread_handled_states = ['completed', 'failed']
        for job, state, old_state in zip(monitor_jobs, result, ganga_job_status):
            if monitoring_component:
                if monitoring_component.should_stop():
                    break

            if job.been_queued:
                continue

            job.backend.statusInfo = state[0]
            job.backend.status = state[1]
            job.backend.actualCE = state[2]
            updated_dirac_status = state[3]
            try:
                job.backend.extraInfo = state[4]
            except Exception as err:
                logger.debug("gexception: %s" % str(err))
                pass
            logger.debug('Job status vector  : ' + job.fqid + ' : ' + repr(state))

            if updated_dirac_status not in jobStateDict:
                jobStateDict[updated_dirac_status] = []
            jobStateDict[updated_dirac_status].append(job)

            if job.backend.status in finalised_statuses:
                if job.status != 'running':
                    if job.status in ['removed', 'killed']:
                        requeue_job_list.append(job)
                    elif (job.master and job.master.status in ['removed', 'killed']):
                        continue  # user changed it under us
                    else:
                        if 'running' not in jobs_to_update:
                            jobs_to_update['running'] = []
                        jobs_to_update['running'].append(job)
                        if job.master:
                            if job.master not in master_jobs_to_update:
                                master_jobs_to_update.append(job.master)
                        requeue_job_list.append(job)

            else:
                if job.status in ['removed', 'killed']:
                    continue
                if (job.master and job.master.status in ['removed', 'killed']):
                    continue  # user changed it under us
                if job.status != updated_dirac_status:
                    if updated_dirac_status not in jobs_to_update:
                        jobs_to_update[updated_dirac_status] = []
                    jobs_to_update[updated_dirac_status].append(job)
                    if job.master:
                        if job.master not in master_jobs_to_update:
                            master_jobs_to_update.append(job.master)

        DiracBase._bulk_updateStateTime(jobStateDict, bulk_state_result)

        for status in jobs_to_update:
            for job in jobs_to_update[status]:
                job.updateStatus(status, update_master=False)

        for j in master_jobs_to_update:
            j.updateMasterJobStatus()

        DiracBase.requeue_dirac_finished_jobs(requeue_job_list, finalised_statuses)

    @staticmethod
    def checkDiracProxy():
        # make sure proxy is valid
        if not _proxyValid(shouldRenew = False, shouldRaise = False):
            if DiracBase.dirac_monitoring_is_active is True:
                logger.warning('DIRAC monitoring inactive (no valid proxy found).')
                logger.warning('Type: \'gridProxy.renew()\' to (re-)activate')
            DiracBase.dirac_monitoring_is_active = False
        else:
            DiracBase.dirac_monitoring_is_active = True
        return DiracBase.dirac_monitoring_is_active

    @staticmethod
    def updateMonitoringInformation(jobs_):
        """Check the status of jobs and retrieve output sandboxesi
        Args:
            jobs_ (list): List of the appropriate jobs to monitored
        """
        # Only those jobs in 'submitted','running' are passed in here for checking
        # if however they have already completed in Dirac they may have been put on queue
        # for processing from last time. These should be put back on queue without
        # querying dirac again. Their signature is status = running and job.backend.status
        # already set to Done or Failed etc.

        jobs = [stripProxy(j) for j in jobs_]

        # make sure proxy is valid
        if not DiracBase.checkDiracProxy():
            return

        # remove from consideration any jobs already in the queue. Checking this non persisted attribute
        # is better than querying the queue as cant tell if a job has just been taken off queue and is being processed
        # also by not being persistent, this attribute automatically allows queued jobs from last session to be considered
        # for requeing
        interesting_jobs = [j for j in jobs if not j.been_queued]
        # status that correspond to a ganga 'completed' or 'failed' (see DiracCommands.status(id))
        # if backend status is these then the job should be on the queue
        finalised_statuses = configDirac['finalised_statuses']

        monitor_jobs = [j for j in interesting_jobs if j.backend.status not in finalised_statuses]
        requeue_jobs = [j for j in interesting_jobs if j.backend.status in finalised_statuses]

        #logger.debug('Interesting jobs: ' + repr([j.fqid for j in interesting_jobs]))
        #logger.debug('Monitor jobs    : ' + repr([j.fqid for j in monitor_jobs]))
        #logger.debug('Requeue jobs    : ' + repr([j.fqid for j in requeue_jobs]))

        DiracBase.requeue_dirac_finished_jobs(requeue_jobs, finalised_statuses)
        DiracBase.monitor_dirac_running_jobs(monitor_jobs, finalised_statuses)
Exemplo n.º 6
0
class GridftpSandboxCache(GridSandboxCache):
    '''
    Helper class for upladong/downloading/deleting sandbox files using lcg-cp/lcg-del commands with gsiftp protocol.

    @author: Hurng-Chun Lee
    @contact: [email protected]
    '''

    gridftp_sandbox_cache_schema_datadict.update({
        'baseURI':
        SimpleItem(defvalue='',
                   copyable=1,
                   doc='the base URI for storing cached files')
    })

    _schema = Schema(Version(1, 0), gridftp_sandbox_cache_schema_datadict)
    _category = 'GridSandboxCache'
    _name = 'GridftpSandboxCache'

    logger = getLogger()

    def __init__(self):
        super(GridftpSandboxCache, self).__init__()
        self.protocol = 'gsiftp'

    def impl_upload(self, cred_req, files=[], opts=''):
        """
        Uploads multiple files to a remote gridftp server.
        """

        shell = getShell(cred_req)

        # making the directory on remove storage at destURI
        dirname = self.__get_unique_fname__()

        # creating subdirectory

        dir_ok = False

        destURI = '%s/%s' % (self.baseURI, dirname)

        uri_info = urisplit(destURI)

        cmd = 'uberftp %s "cd %s"' % (uri_info[1], uri_info[2])

        rc, output, m = self.__cmd_retry_loop__(shell, cmd, 1)

        if rc != 0:

            for l in output.split('\n'):
                l.strip()
                if re.match(r'^550.*', l):
                    # the directory is not found (error code 550), try to creat
                    # the lowest level one
                    cmd = 'uberftp %s "mkdir %s"' % (uri_info[1], uri_info[2])

                    rc, output, m = self.__cmd_retry_loop__(shell, cmd, 1)

                    if rc != 0:
                        self.logger.error(output)
                    else:
                        dir_ok = True

                    break
        else:
            self.logger.debug('parent directory already available: %s' %
                              destURI)
            dir_ok = True

        if not dir_ok:
            self.logger.error('parent directory not available: %s' % destURI)
            return []

        # the algorithm of uploading one file
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj

            def process(self, file):
                # decide number of parallel stream to be used
                fsize = os.path.getsize(urlparse(file)[2])
                fname = os.path.basename(urlparse(file)[2])
                fpath = os.path.abspath(urlparse(file)[2])

                md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True)
                nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024))

                if nbstream < 1:
                    nbstream = 1  # min stream
                if nbstream > 8:
                    nbstream = 8  # max stream

                myDestURI = '%s/%s' % (destURI, fname)

                # uploading the file
                cmd = 'uberftp'
                if nbstream > 1:
                    cmd += ' -c %d' % nbstream

                cmd += ' file:%s %s' % (fpath, myDestURI)

                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    self.cacheObj.logger.error(output)
                    return False
                else:
                    fidx = GridftpFileIndex()
                    fidx.id = myDestURI
                    fidx.name = fname
                    fidx.md5sum = md5sum
                    fidx.attributes['fpath'] = fpath

                    self.__appendResult__(file, fidx)
                    return True

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_gridftp',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        return runner.getResults().values()

    def impl_download(self, cred_req, files=[], dest_dir=None, opts=''):
        """
        Downloads multiple files from gridftp server to 
        a local directory.
        """
        if not dest_dir:
            dest_dir = os.getcwd()
        self.logger.debug('download file to: %s', dest_dir)

        shell = getShell(cred_req)

        # the algorithm of downloading one file to a local directory
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj

            def process(self, file):

                srcURI = file.id
                fname = os.path.basename(urisplit(srcURI)[2])
                destURI = 'file:%s/%s' % (dest_dir, fname)

                #cmd  = 'uberftp %s %s' % (srcURI, destURI)
                cmd = 'globus-url-copy %s %s' % (srcURI, destURI)

                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    self.cacheObj.logger.error(output)
                    return False
                else:
                    self.__appendResult__(file.id, file)
                    return True

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_gridftp',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        return runner.getResults().values()

    def impl_delete(self, cred_req, files=[], opts=''):
        """
        Deletes multiple files from remote gridftp server
        """

        shell = getShell(cred_req)

        # the algorithm of downloading one file to a local directory
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj

            def process(self, file):

                destURI = file.id

                uri_info = urisplit(destURI)

                cmd = 'uberftp %s "rm %s"' % (uri_info[1], uri_info[2])

                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    self.cacheObj.logger.error(output)
                    return False
                else:
                    self.__appendResult__(file.id, file)
                    return True

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_lcgdel',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        # update the local index file
        del_files = runner.getResults().values()
        all_files = self.get_cached_files()

        left_files = []
        for f in all_files:
            if f not in del_files:
                left_files.append(f)

        self.impl_bookkeepUploadedFiles(left_files, append=False)

        return del_files
Exemplo n.º 7
0
class LCGSandboxCache(GridSandboxCache):
    '''
    Helper class for upladong/downloading/deleting sandbox files using lcg-cr/lcg-cp/lcg-del commands. 

    @author: Hurng-Chun Lee 
    @contact: [email protected]
    '''

    lcg_sandbox_cache_schema_datadict.update({
        'se':
        SimpleItem(defvalue='', copyable=1, doc='the LCG SE hostname'),
        'se_type':
        SimpleItem(defvalue='srmv2', copyable=1, doc='the LCG SE type'),
        'se_rpath':
        SimpleItem(defvalue='generated',
                   copyable=1,
                   doc='the relative path to the VO directory on the SE'),
        'lfc_host':
        SimpleItem(defvalue='', copyable=1, doc='the LCG LFC hostname'),
        'srm_token':
        SimpleItem(
            defvalue='',
            copyable=1,
            doc=
            'the SRM space token, meaningful only when se_type is set to srmv2'
        )
    })

    _schema = Schema(Version(1, 0), lcg_sandbox_cache_schema_datadict)
    _category = 'GridSandboxCache'
    _name = 'LCGSandboxCache'

    logger = getLogger()

    def __init__(self):
        super(LCGSandboxCache, self).__init__()
        self.protocol = 'lcg'

    def __setattr__(self, attr, value):
        if attr == 'se_type' and value not in ['', 'srmv1', 'srmv2', 'se']:
            raise AttributeError('invalid se_type: %s' % value)
        super(LCGSandboxCache, self).__setattr__(attr, value)

    def impl_upload(self, files=[], opts=''):
        """
        Uploads multiple files to a remote grid storage.
        """

        shell = getShell(self.middleware)

        if self.lfc_host:
            shell.env['LFC_HOST'] = self.lfc_host

        self.logger.debug('upload file with LFC_HOST: %s',
                          shell.env['LFC_HOST'])

        # the algorithm of uploading one file
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj
                self.dirname = self.cacheObj.__get_unique_fname__()

            def process(self, file):
                # decide number of parallel stream to be used
                fsize = os.path.getsize(urlparse(file)[2])
                fname = os.path.basename(urlparse(file)[2])
                fpath = os.path.abspath(urlparse(file)[2])

                md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True)
                nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024))

                if nbstream < 1:
                    nbstream = 1  # min stream
                if nbstream > 8:
                    nbstream = 8  # max stream

                cmd = 'lcg-cr -t 180 --vo %s -n %d' % (self.cacheObj.vo,
                                                       nbstream)
                if self.cacheObj.se != None:
                    cmd = cmd + ' -d %s' % self.cacheObj.se
                if self.cacheObj.se_type == 'srmv2' and self.cacheObj.srm_token:
                    cmd = cmd + ' -D srmv2 -s %s' % self.cacheObj.srm_token

                # specify the physical location
                cmd = cmd + \
                    ' -P %s/ganga.%s/%s' % (self.cacheObj.se_rpath,
                                            self.dirname, fname)

                # specify the logical filename
                # NOTE: here we assume the root dir for VO is /grid/<voname>
                lfc_dir = '/grid/%s/ganga.%s' % (self.cacheObj.vo,
                                                 self.dirname)
                if not self.cacheObj.__lfc_mkdir__(shell, lfc_dir):
                    self.cacheObj.logger.warning(
                        'cannot create LFC directory: %s' % lfc_dir)
                    return None

                cmd = cmd + ' -l %s/%s %s' % (lfc_dir, fname, file)
                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    return False
                else:
                    match = re.search('(guid:\S+)', output)
                    if match:
                        guid = match.group(1)

                        fidx = LCGFileIndex()
                        fidx.id = guid
                        fidx.name = fname
                        fidx.md5sum = md5sum
                        fidx.lfc_host = self.cacheObj.lfc_host
                        fidx.local_fpath = fpath

                        self.__appendResult__(file, fidx)
                        return True
                    else:
                        return False

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_lcgcr',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        return runner.getResults().values()

    def impl_download(self, files=[], dest_dir=None, opts=''):
        """
        Downloads multiple files from remote grid storages to 
        a local directory.
        """
        if not dest_dir:
            dest_dir = os.getcwd()
        self.logger.debug('download file to: %s', dest_dir)

        # the algorithm of downloading one file to a local directory
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj
                self.shell = getShell(self.cacheObj.middleware)

            def process(self, file):

                guid = file.id
                lfn = file.attributes['local_fpath']
                lfc_host = file.attributes['lfc_host']
                fname = os.path.basename(urlparse(lfn)[2])

                self.shell.env['LFC_HOST'] = lfc_host
                self.cacheObj.logger.debug('download file with LFC_HOST: %s',
                                           self.shell.env['LFC_HOST'])

                cmd = 'lcg-cp -t %d --vo %s ' % (self.cacheObj.timeout,
                                                 self.cacheObj.vo)
                if self.cacheObj.se_type:
                    cmd += '-T %s ' % self.cacheObj.se_type
                cmd += '%s file://%s/%s' % (guid, dest_dir, fname)

                self.cacheObj.logger.debug('download file: %s', cmd)

                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    self.shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    return False
                else:
                    self.__appendResult__(file.id, file)
                    return True

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_lcgcp',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        return runner.getResults().values()

    def impl_delete(self, files=[], opts=''):
        """
        Deletes multiple files from remote grid storages. 
        """

        # the algorithm of downloading one file to a local directory
        class MyAlgorithm(Algorithm):
            def __init__(self, cacheObj):
                Algorithm.__init__(self)
                self.cacheObj = cacheObj
                self.shell = getShell(self.cacheObj.middleware)

            def process(self, file):

                guid = file.id

                lfc_host = file.attributes['lfc_host']

                self.shell.env['LFC_HOST'] = lfc_host

                self.cacheObj.logger.debug('delete file with LFC_HOST: %s' %
                                           self.shell.env['LFC_HOST'])

                cmd = 'lcg-del -a -t 60 --vo %s %s' % (self.cacheObj.vo, guid)

                rc, output, m = self.cacheObj.__cmd_retry_loop__(
                    self.shell, cmd, self.cacheObj.max_try)

                if rc != 0:
                    return False
                else:
                    self.__appendResult__(file.id, file)
                    return True

        myAlg = MyAlgorithm(cacheObj=self)
        myData = Data(collection=files)

        runner = MTRunner(name='sandboxcache_lcgdel',
                          algorithm=myAlg,
                          data=myData)
        runner.start()
        runner.join(-1)

        # update the local index file
        del_files = runner.getResults().values()
        all_files = self.get_cached_files()

        left_files = []
        for f in all_files:
            if f not in del_files:
                left_files.append(f)

        self.impl_bookkeepUploadedFiles(left_files, append=False)

        return del_files

    # For GUID protocol
    def __lfc_mkdir__(self, shell, path, mode='775'):
        '''Creates a directory in LFC'''

        cmd = 'lfc-mkdir -p -m %s %s' % (mode, path)

        (rc, output, m) = self.__cmd_retry_loop__(shell, cmd, 1)

        if rc != 0:
            return False
        else:
            return True
Exemplo n.º 8
0
class JobTime(GangaObject):
    """Job timestamp access.
       In development

       Changes in the status of a Job are timestamped - a datetime object
       is stored in the dictionary named 'timestamps', in Coordinated
       Universal Time(UTC). More information on datetime objects can be
       found at:

       http://docs.python.org/library/datetime.html

       Datetime objects can be subtracted to produce a 'timedelta' object.
       More information about these can be found at the above address.
       '+', '*', and '/' are not supported by datetime objects.

       Datetime objects can be formatted into strings using the
       .strftime(format_string) application, and the strftime codes.
       e.g. %Y -> year as integer
            %a -> abbreviated weekday name
            %M -> minutes as inetger

       The full list can be found at:
       http://docs.python.org/library/datetime.html#strftime-behavior

       Standard status types with built in access methods are:
       -'new'
       -'submitted'
       -'running'
       -'completed'
       -'killed'
       -'failed'

       These return a string with default format %Y/%m/%d @ %H:%M:%S. A
       custom format can be specified in the arguement.

       Any information stored within the timestamps dictionary can also be
       extracted in the way as in would be for a standard, non-application
       specific python dictionary.

       For a table display of the Job's timestamps use .time.display(). For
       timestamps details from the backend use .time.details()


    """

    timestamps = {}
    sj_statlist = []

    _schema = Schema(
        Version(0, 0), {
            'timestamps':
            SimpleItem(defvalue={},
                       doc="Dictionary containing timestamps for job",
                       summary_print='_timestamps_summary_print')
        })

    _category = 'jobtime'
    _name = 'JobTime'
    _exportmethods = [
        'display', 'new', 'submitting', 'submitted', 'backend_running',
        'backend_final', 'backend_completing', 'completing', 'final',
        'running', 'runtime', 'waittime', 'submissiontime', 'details',
        'printdetails'
    ]

    def __init__(self):
        super(JobTime, self).__init__()
        self.timestamps = {}
        # this makes sure the contents of the list don't get copied when the
        # Job does.
        self.sj_statlist = []

    def __deepcopy__(self, memo):
        obj = super(JobTime, self).__deepcopy__(memo)
        # Lets not re-initialize the object as we lose history from previous submissions
        # obj.newjob()
        return obj

    def newjob(self):
        """Timestamps job upon creation.
        """
        t = datetime.datetime.utcnow()
        self.timestamps['new'] = t
        # this makes sure the contents of the list don't get copied when the
        # Job does.
        self.sj_statlist = []

    def timenow(self, status):
        """Updates timestamps as job status changes.
        """
        j = self.getJobObject()
        t_now = datetime.datetime.utcnow()
        b_list = ['running', 'completing', 'completed', 'failed']
        final = ['killed', 'failed', 'completed']
        backend_final = ['failed', 'completed']
        ganga_master = ['new', 'submitting', 'killed']

        id = j.id
        if id is None:
            id = str("unknown")
        logger.debug("Job %s called timenow('%s')", str(id), status)

        # standard method:
        if not j.subjobs:
            # backend stamps
            if status in b_list:
                for childstatus in b_list:
                    be_statetime = stripProxy(
                        j.backend).getStateTime(childstatus)
                    if be_statetime is not None:
                        if childstatus in backend_final:
                            self.timestamps["backend_final"] = be_statetime
                            logger.debug(
                                "Wrote 'backend_final' to timestamps.")
                        else:
                            self.timestamps["backend_" +
                                            childstatus] = be_statetime
                            logger.debug("Wrote 'backend_%s' to timestamps.",
                                         childstatus)
                    if childstatus == status:
                        break
            # ganga stamps
            if status in final:
                self.timestamps["final"] = t_now
                logger.debug("Wrote 'final' to timestamps.")
            else:
                self.timestamps[status] = t_now
                logger.debug("Wrote '%s' to timestamps.", status)

        # subjobs method:
        if j.master:  # identifies subjobs
            logger.debug(
                "j.time.timenow() caught subjob %d.%d in the '%s' status",
                j.master.id, j.id, status)

            for written_status in j.time.timestamps.keys():
                if written_status not in j.master.time.sj_statlist:
                    j.master.time.sj_statlist.append(written_status)
                    logger.debug("written_status: '%s' written to sj_statlist",
                                 written_status)

        # master job method
        if j.subjobs:  # identifies master job
            logger.debug(
                "j.time.timenow() caught master job %d in the '%s' status",
                j.id, status)
            if status in ganga_master:  # don't use subjob stamp for these
                self.timestamps[status] = t_now
                logger.debug(
                    "status: '%s' in ganga_master written to master timestamps.",
                    status)
            else:
                for state in self.sj_statlist:
                    if state not in ganga_master:
                        j.time.timestamps[state] = self.sjStatList_return(
                            state)
                        logger.debug(
                            "state: '%s' from sj_statlist to written to master timestamps.",
                            state)
                    else:
                        pass

    def sjStatList_return(self, status):
        list = []
        final = ['backend_final', 'final']
        j = self.getJobObject()
        for sjs in j.subjobs:
            try:
                if isinstance(sjs.time.timestamps[status], datetime.datetime):
                    list.append(sjs.time.timestamps[status])
                else:
                    logger.debug(
                        'Attempt to add a non datetime object in the timestamp, job=%d, subjob=%d',
                        j.id, sjs.id)
            except KeyError:
                logger.debug(
                    "Status '%s' not found in timestamps of job %d.%d.",
                    status, sjs.master.id, sjs.id)
        list.sort()
        try:
            if status in final:
                return list[-1]
            return list[0]
        except IndexError:
            # change this to a more appropriate debug.
            logger.debug(
                "IndexError: ID: %d, Status: '%s', length of list: %d", j.id,
                status, len(list))

    def display(self, format="%Y/%m/%d %H:%M:%S"):
        return self._display(format)

    # Justin 10.9.09: I think 'ljust' might be just as good if not better than
    # 'rjust' here:
    def _display(self, format="%Y/%m/%d %H:%M:%S", interactive=False):
        """Displays existing timestamps in a table.

           Format can be specified by typing a string of the appropriate strftime() behaviour codes as the arguement.
           e.g. '%H:%M:%S' ==> 13:55:01

           For a full list of codes see
           http://docs.python.org/library/datetime.html?#strftime-behavior
        """
        retstr = ''
        T = datetime.datetime.now()
        tstring = T.strftime(format)
        length = len(tstring)
        times = [0 for k in self.timestamps.keys()]
        for i in range(0, len(self.timestamps.keys())):
            try:
                times[i] = self.timestamps[self.timestamps.keys()[i]].strftime(
                    format).rjust(length) + ' - ' + self.timestamps.keys()[i]
            except AttributeError:
                times[i] = str(self.timestamps[self.timestamps.keys(
                )[i]]).rjust(length) + ' - ' + self.timestamps.keys()[i]

        # try to make chronological - can fail when timestamps are the same to
        # nearest sec -> becomes alphabetical...
        times.sort()
        retstr = retstr + '\n' + \
            'Time (UTC)'.rjust(length) + '   Status' + '\n'
        for i in range(0, 21):
            retstr = retstr + '- '
        retstr = retstr + '\n'
        for i in range(0, len(times)):
            retstr = retstr + times[i] + '\n'
        return retstr

    def _timestamps_summary_print(self,
                                  value,
                                  verbosity_level,
                                  interactive=False):
        """Used to display timestamps when JobTime object is displayed.
        """
        return self._display(interactive=interactive)

    # This didn't work:
    #
    # def __str__(self):
    #    """ string cast """
    #    return self._display()

    def details(self, subjob=None):
        """Obtains all timestamps available from the job's specific backend.

           Subjob arguement: None  = default
                             'all' = gets details for ALL SUBJOBS. You have been warned.
                             int   = gets details for subjob number 'int'

           No argument is required for a job with no subjobs.    
        """
        j = self.getJobObject()
        idstr = ''
        detdict = {}

        # If job is SUBJOB do the normal procedure. Not sure this clause is
        # neccessary as subjobs will be caught normally
        if j.master:
            logger.debug("j.time.details(): subjob %d.%d caught.", j.master.id,
                         j.id)
            detdict = j.backend.timedetails()
            return detdict

        # If job is MASTER iterate over subjobs and do normal method. This
        # isn't going to be ideal for a large number of subjobs
        if j.subjobs:
            logger.debug("j.time.details(): master job %d caught.", j.id)
            idstr = str(j.id)

            # User wants 'all'
            if subjob == 'all':
                keyin = None

                # NOTE: The interactive loop below was more an exercise for learning how 'keyin' is used than a useful addition.
                # ask whether user really wants to print timedetails for all
                # their jobs:
                while keyin is None:
                    keyin = raw_input(
                        "Are you sure you want details for ALL %d subjobs(y/n)?"
                        % len(j.subjobs))
                    # if yes carry on at for loop
                    if keyin == 'y':
                        pass
                    # if no return None. Doesn't execute rest of method
                    elif keyin == 'n':
                        return None
                    # if something else - asks again
                    else:
                        logger.info("y/n please!")
                        keyin = None

                for jobs in j.subjobs:
                    subidstr = idstr + '.' + str(jobs.id)
                    # String needs more info if it is going to stay in.
                    logger.debug("Subjob: %d, Backend ID: %d", jobs.id,
                                 jobs.backend.id)
                    detdict[subidstr] = jobs.backend.timedetails()
                return detdict

            # no arguement specified
            elif subjob is None:
                logger.debug(
                    "j.time.details(): no subjobs specified for this master job."
                )
                return None

            # Subjob id or string passed
            else:
                # string = error
                if not isinstance(subjob, int):
                    raise TypeError("Subjob id requires type 'int'")
                # subjob id supplied
                for sj in j.subjobs:
                    if sj.id == subjob:
                        logger.debug("Subjob: %d, Backend ID: %d", sj.id,
                                     sj.backend.id)
                        detdict = sj.backend.timedetails()
                        return detdict
                    else:
                        pass
                if subjob >= len(j.subjobs):
                    logger.warning(
                        "Index '%s' is out of range. Corresponding subjob does not exist.",
                        str(subjob))
                    return None

            logger.debug(
                "subjob arguement '%s' has failed to be caught and dealt with.",
                subjob)
            return None

        detdict = j.backend.timedetails()  # called if no subjobs
        return detdict

    def printdetails(self, subjob=None):
        """Prints backend details to screen by calling details() and printing the returned dictionary.
        """
        j = self.getJobObject()
        if subjob == 'all':
            # the warning and action taken below are pretty annoying, but I was
            # unsure how to deal with the request to print the details for all
            # n subjobs, which seems unlikely to be made.
            logger.warning(
                "It might be unwise to print all subjobs details. Use details() and extract relevant info from dictionary."
            )
            return None
        pd = self.details(subjob)
        for key in pd.keys():
            logger.info(key, '\t', pd[key])

    def runtime(self):
        """Method which returns the 'runtime' of the specified job.

           The runtime is calculated as the duration between the job entering the 'running' state and the job entering the 'completed' state.
        """
        end_list = ['killed', 'completed', 'failed']
        end_stamps = {}

        # if master job, sum:
        j = self.getJobObject()
        if j.subjobs:
            masterrun = datetime.timedelta(0, 0, 0)
            for jobs in j.subjobs:
                masterrun = masterrun + jobs.time.runtime()
            return masterrun
        # all other jobs:
        return self.duration('backend_running', 'backend_final')

    def waittime(self):
        """Method which returns the waiting time of the specified job.

           The waiting time is calculated as the duration between the job entering the 'submitted' state and entering the 'running' state.
        """
        # master job:
        j = self.getJobObject()
        if j.subjobs:
            start_list = []
            end_list = []
            for jobs in j.subjobs:
                start_list.append(jobs.time.timestamps['submitted'])
                end_list.append(jobs.time.timestamps['backend_running'])
            start_list.sort()
            end_list.sort()
            start = start_list[0]
            end = end_list[len(end_list) - 1]
            masterwait = end - start
            return masterwait
        # all other jobs:
        return self.duration('submitted', 'backend_running')

    def submissiontime(self):
        """Method which returns submission time of specified job.

           Calculation: sub_time =  submitted - submitting.
        """
        j = self.getJobObject()
        if j.subjobs:
            start_list = []
            end_list = []
            for jobs in j.subjobs:
                end_list.append(jobs.time.timestamps['submitted'])
            end_list.sort()
            start = j.time.timestamps['submitting']
            end = end_list[len(end_list) - 1]
            mastersub = end - start
            return mastersub
        return self.duration('submitting', 'submitted')

    def duration(self, start, end):
        """Returns duration between two specified timestamps as timedelta object.
        """
        if start in self.timestamps.keys():
            if end in self.timestamps.keys():
                s, e = self.timestamps[start], self.timestamps[end]
                s_micro, e_micro = datetime.timedelta(
                    0, 0,
                    s.microsecond), datetime.timedelta(0, 0, e.microsecond)
                e, s = e - e_micro, s - s_micro
                td = e - s

                # method for rounding removed because timestamps aren't always recorded with microsecond precision, and stamping accuracy isn't high enough to justify doing so
                #                ds = td.days
                #               secs = td.seconds
                #                micros = td.microseconds
                #               if micros >= 500000:
                #                  secs +=1

                dur = td  # datetime.timedelta(days=ds, seconds=secs)
                return dur
            else:
                logger.warning("Could not calculate duration: '%s' not found.",
                               end)
        else:
            logger.warning("Could not calculate duration: '%s' not found.",
                           start)
        return None

    def statetime(self, status, format=None):
        """General method for obtaining the specified timestamp in specified format.
        """
        if status not in self.timestamps:
            logger.debug("Timestamp '%s' not available.", status)
            return None
        if format is not None:
            return self.timestamps[status].strftime(format)
        return self.timestamps[status]

    def new(self, format=None):
        """Method for obtaining 'new' timestamp.
        """
        return self.statetime('new', format)

    def submitting(self, format=None):
        """Method for obtaining 'submitting' timestamp.
        """
        return self.statetime('submitting', format)

    def submitted(self, format=None):
        """Method for obtaining 'submitted' timestamp.
        """
        return self.statetime('submitted', format)

    def backend_running(self, format=None):
        """Method for obtaining 'backend_running' timestamp.
        """
        return self.statetime('backend_running', format)

    def backend_final(self, format=None):
        """Method for obtaining 'backend_final' timestamp.
        """
        return self.statetime('backend_final', format)

    def backend_completing(self, format=None):
        """Method for obtaining 'backend_completing' timestamp.
        """
        return self.statetime('backend_completing', format)

    def completing(self, format=None):
        """Method for obtaining 'completing' timestamp.
        """
        return self.statetime('completing', format)

    def final(self, format=None):
        """Method for obtaining 'final' timestamp.
        """
        return self.statetime('final', format)

    def running(self, format=None):
        """Method for obtaining 'running' timestamp.
        """
        return self.statetime('running', format)
Exemplo n.º 9
0
class LogicalFile(DiracFile):
    #  Logical File schema
    # Observing the 'old' 1.0 schema whilst preserving backwards compatability
    # with the fact that we're translating the object into a DiracFile in this
    # case
    _schema = Schema(
        Version(1, 0), {
            'name':
            SimpleItem(
                defvalue="",
                doc='the LFN filename a LogicalFile is constructed with'),
            'namePattern':
            SimpleItem(
                defvalue="", doc='pattern of the file name', transient=1),
            'localDir':
            SimpleItem(
                defvalue=None,
                copyable=1,
                typelist=['str', 'type(None)'],
                doc=
                'local dir where the file is stored, used from get and put methods',
                transient=1),
            'remoteDir':
            SimpleItem(
                defvalue="",
                doc=
                'remote directory where the LFN is to be placed in the dirac base directory by the put method.',
                transient=1),
            'locations':
            SimpleItem(
                defvalue=[],
                copyable=1,
                typelist=['str'],
                sequence=1,
                doc="list of SE locations where the outputfiles are uploaded",
                transient=1),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=['bool'],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere',
                transient=1),
            'lfn':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the logical file name/set the logical file name to use if not '
                'using wildcards in namePattern',
                transient=1),
            'guid':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the GUID/set the GUID to use if not using wildcards in the namePattern.',
                transient=1),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          sequence=1,
                          copyable=0,
                          typelist=['GangaDirac.Lib.Files.DiracFile'],
                          doc="collected files from the wildcard namePattern",
                          transient=1),
            'failureReason':
            SimpleItem(defvalue="",
                       protected=1,
                       copyable=0,
                       doc='reason for the upload failure',
                       transient=1)
        })
    _name = "LogicalFile"

    # TODO:  Add warning to User NOT to create these objects themselves and that they should
    #       only be used for backwards compatability to load old jobs

    def __init__(self, name=""):

        super(LogicalFile, self).__init__(lfn=name)

        self.name = name

        logger.warning(
            "!!! LogicalFile has been deprecated, this is now just a wrapper to the DiracFile object"
        )
        logger.warning(
            "!!! Please update your scripts before LogicalFile is removed")

        self._setLFNnamePattern(lfn=self.name, namePattern='')

    def __setattr__(self, name, value):

        if name == "name":
            #elf.name = value
            self.lfn = value
            import os.path
            self.namePattern = os.path.basename(value)
            self.remoteDir = os.path.dirname(value)
        super(LogicalFile, self).__setattr__(name, value)

    def _attribute_filter__set__(self, attrib_name, value):
        if attrib_name == "name":
            self._setLFNnamePattern(lfn=value, namePattern='')
        return super(LogicalFile,
                     self)._attribute_filter__set__(attrib_name, value)
Exemplo n.º 10
0
class ITransform(GangaObject):
    _schema = Schema(Version(1, 0), {
        'status': SimpleItem(defvalue='new', protected=1, copyable=0, doc='Status - running, pause or completed', typelist=["str"]),
        'name': SimpleItem(defvalue='Simple Transform', doc='Name of the transform (cosmetic)', typelist=["str"]),
        'application': ComponentItem('applications', defvalue=None, optional=1, load_default=False, doc='Application of the Transform.'),
        'inputsandbox': FileItem(defvalue=[], typelist=['str', 'Ganga.GPIDev.Lib.File.File.File'], sequence=1, doc="list of File objects shipped to the worker node "),
        'outputsandbox': SimpleItem(defvalue=[], typelist=['str'], sequence=1, doc="list of filenames or patterns shipped from the worker node"),
        'backend': ComponentItem('backends', defvalue=None, optional=1, load_default=False, doc='Backend of the Transform.'),
        'splitter': ComponentItem('splitters', defvalue=None, optional=1, load_default=False, doc='Splitter used on each unit of the Transform.'),
        'postprocessors': ComponentItem('postprocessor', defvalue=None, doc='list of postprocessors to run after job has finished'),
        'merger': ComponentItem('mergers', defvalue=None, hidden=1, copyable=0, load_default=0, optional=1, doc='Merger to be done over all units when complete.'),
        'unit_merger': ComponentItem('mergers', defvalue=None, load_default=0, optional=1, doc='Merger to be copied and run on each unit separately.'),
        'copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy all units output to, e.g. Grid dataset -> Local Dataset'),
        'unit_copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy each individual unit output to, e.g. Grid dataset -> Local Dataset'),
        'run_limit': SimpleItem(defvalue=8, doc='Number of times a partition is tried to be processed.', protected=1, typelist=["int"]),
        'minor_run_limit': SimpleItem(defvalue=3, doc='Number of times a unit can be resubmitted', protected=1, typelist=["int"]),
        'major_run_limit': SimpleItem(defvalue=3, doc='Number of times a junit can be rebrokered', protected=1, typelist=["int"]),
        'units': ComponentItem('units', defvalue=[], sequence=1, copyable=1, doc='list of units'),
        'inputdata': ComponentItem('datasets', defvalue=[], sequence=1, protected=1, optional=1, load_default=False, doc='Input datasets to run over'),
        'outputdata': ComponentItem('datasets', defvalue=None, optional=1, load_default=False, doc='Output dataset template'),
        'inputfiles': GangaFileItem(defvalue=[], typelist=['str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'], sequence=1, doc="list of file objects that will act as input files for a job"),
        'outputfiles' : GangaFileItem(defvalue=[], typelist=['str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'], sequence=1, doc="list of \
OutputFile objects to be copied to all jobs"),
        'metadata': ComponentItem('metadata', defvalue=MetadataDict(), doc='the metadata', protected=1),
        'rebroker_on_job_fail': SimpleItem(defvalue=True, doc='Rebroker if too many minor resubs'),
        'abort_loop_on_submit': SimpleItem(defvalue=True, doc='Break out of the Task Loop after submissions'),
        'required_trfs': SimpleItem(defvalue=[], typelist=['int'], sequence=1, doc="IDs of transforms that must complete before this unit will start. NOTE DOESN'T COPY OUTPUT DATA TO INPUT DATA. Use TaskChainInput Dataset for that."),
        'chain_delay': SimpleItem(defvalue=0, doc='Minutes delay between a required/chained unit completing and starting this one', protected=0, typelist=["int"]),
        'submit_with_threads': SimpleItem(defvalue=False, doc='Use Ganga Threads for submission'),
        'max_active_threads': SimpleItem(defvalue=10, doc='Maximum number of Ganga Threads to use. Note that the number of simultaneous threads is controlled by the queue system (default is 5)'),
        'info' : SimpleItem(defvalue=[],typelist=['str'],protected=1,sequence=1,doc="Info showing status transitions and unit info"),
        'id': SimpleItem(defvalue=-1, protected=1, doc='ID of the Transform', typelist=["int"]),
        #'force_single_unit' : SimpleItem(defvalue=False, doc='Force all input data into one Unit'),
    })

    _category = 'transforms'
    _name = 'ITransform'
    _exportmethods = ['addInputData', 'resetUnit', 'setRunLimit', 'getJobs', 'setMinorRunLimit',
                      'setMajorRunLimit', 'getID', 'overview', 'resetUnitsByStatus', 'removeUnusedJobs',
                      'showInfo', 'showUnitInfo', 'pause', 'n_all', 'n_status' ]
    _hidden = 0

    def showInfo(self):
        """Print out the info in a nice way"""
        print("\n".join( self.info ))

    def showUnitInfo(self, uid):
        """Print out the given unit info in a nice way"""
        self.units[uid].showInfo()

    def getJobs(self):
        """Return a list of the currently active job ids"""
        joblist = []
        for u in self.units:
            joblist += u.active_job_ids
        return joblist

    def setMinorRunLimit(self, newRL):
        """Set the number of times a job will be resubmitted before a major resubmit is attempted"""
        self.minor_run_limit = newRL

    def setMajorRunLimit(self, newRL):
        """Set the number of times a job will be rebrokered before the transform is paused"""
        self.major_run_limit = newRL

    def setRunLimit(self, newRL):
        """Set the total (minor+major) number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL

    def overview(self, status=''):
        """Show the status of the units in this transform"""
        for unit in self.units:
            # display colour given state
            o = ""
            o += ("%d:  " % self.units.index(unit)) + unit.name

            # is unit active?
            if unit.active:
                o += " " * (40 - len(o) + 3) + "*"
            else:
                o += " " * (40 - len(o) + 3) + "-"

            # sub job status
            o += "\t %i" % unit.n_status("submitted")
            o += "\t %i" % unit.n_status("running")
            o += "\t %i" % unit.n_status("completed")
            o += "\t %i" % unit.n_status("failed")
            o += "\t %i" % unit.minor_resub_count
            o += "\t %i" % unit.major_resub_count

            # change colour on state
            if unit.status == 'completed':
                o = markup(o, overview_colours["completed"])
            elif not unit.active:
                o = markup(o, overview_colours["bad"])
            elif unit.status == "recreating":
                o = markup(o, overview_colours["attempted"])
            elif len(unit.active_job_ids) == 0:
                o = markup(o, overview_colours["hold"])
            else:
                o = markup(o, overview_colours["running"])

            print(o)


# Special methods:
    def __init__(self):
        super(ITransform, self).__init__()
        self.initialize()

    def _readonly(self):
        """A transform is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def initialize(self):
        from Ganga import GPI
        self.backend = stripProxy(GPI.Local())
        self.updateStatus("new")

    def check(self):
        """Check this transform has valid data, etc. and has the correct units"""

        # ignore anything but new transforms
        if self.status != "new":
            return

        # first, validate the transform
        if not self.validate():
            raise ApplicationConfigurationError(
                None, "Validate failed for Transform %s" % self.name)

        self.updateStatus("running")

    def startup(self):
        """This function is used to set the status after restarting Ganga"""
        pass

# Public methods
    def resetUnit(self, uid):
        """Reset the given unit"""
        addInfoString( self, "Reseting Unit %i" % ( uid ) )

        for u in self.units:
            if u.getID() == uid:
                u.reset()
                break

        # find any chained units and mark for recreation
        for trf in self._getParent().transforms:
            for u2 in trf.units:
                for req in u2.req_units:
                    if req == "%d:%d" % (self.getID(), u.getID()) or req == "%d:ALL" % (self.getID()):
                        trf.resetUnit(u2.getID())

        self.updateStatus("running")

    def getID(self):
        """Return the index of this trf in the parent task"""

        # if the id isn't already set, use the index from the parent Task
        if self.id < 0:
            task = self._getParent()
            if not task:
                raise ApplicationConfigurationError(
                    None, "This transform has not been associated with a task and so there is no ID available")
            self.id = task.transforms.index(self)
        
        return self.id

    def run(self, check=True):
        """Sets this transform to running status"""
        if self.status == "new" and check:
            self.check()
        if self.status != "completed":
            self.updateStatus("running")
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.warning("Transform is already completed!")

    def update(self):
        """Called by the parent task to check for status updates, submit jobs, etc."""
        #logger.warning("Entered Transform %d update function..." % self.getID())

        if self.status == "pause" or self.status == "new":
            return 0

        # check for complete required units
        task = self._getParent()
        for trf_id in self.required_trfs:
            if task.transforms[trf_id].status != "completed":
                return 0

        # set the start time if not already set
        if len(self.required_trfs) > 0 and self.units[0].start_time == 0:
            for unit in self.units:
                unit.start_time = time.time() + self.chain_delay * 60 - 1

        # report the info for this transform
        unit_status = { "new":0, "hold":0, "running":0, "completed":0, "bad":0, "recreating":0 }
        for unit in self.units:
            unit_status[unit.status] += 1
         
        info_str = "Unit overview: %i units, %i new, %i hold, %i running, %i completed, %i bad. to_sub %i" % (len(self.units), unit_status["new"], unit_status["hold"],
                                                                                                              unit_status["running"], unit_status["completed"],
                                                                                                              unit_status["bad"], self._getParent().n_tosub())
      
        addInfoString(self, info_str)
                
        # ask the unit splitter if we should create any more units given the
        # current data
        self.createUnits()

        # loop over units and update them ((re)submits will be called here)
        old_status = self.status
        unit_status_list = []

        # find submissions first
        unit_update_list = []
        for unit in self.units:

            if not unit.checkForSubmission() and not unit.checkForResubmission():
                unit_update_list.append(unit)
                continue

            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        # now check for download
        for unit in unit_update_list:
            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for any TaskChainInput completions
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:
                if task.transforms[ds.input_trf_id].status != "completed":
                    return 0

        # update status and check
        old_status = self.status
        for state in ['running', 'hold', 'bad', 'completed']:
            if state in unit_status_list:
                if state == 'hold':
                    state = "running"
                if state != self.status:
                    self.updateStatus(state)
                break

    def createUnits(self):
        """Create new units if required given the inputdata"""

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for chaining
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:

                # check for single unit
                if ds.single_unit:

                    # is there a unit already linked?
                    done = False
                    rec_unit = None
                    for out_unit in self.units:
                        if '%d:ALL' % (ds.input_trf_id) in out_unit.req_units:
                            done = True
                            # check if the unit is being recreated
                            if out_unit.status == "recreating":
                                rec_unit = out_unit
                            break

                    if not done or rec_unit:
                        new_unit = self.createChainUnit(
                            self._getParent().transforms[ds.input_trf_id].units, ds.use_copy_output)
                        if new_unit:
                            self.addChainUnitToTRF(
                                new_unit, ds, -1, prev_unit=rec_unit)

                else:

                    # loop over units in parent trf and create units as
                    # required
                    for in_unit in self._getParent().transforms[ds.input_trf_id].units:

                        # is there a unit already linked?
                        done = False
                        rec_unit = None
                        for out_unit in self.units:
                            if '%d:%d' % (ds.input_trf_id, in_unit.getID()) in out_unit.req_units:
                                done = True
                                # check if the unit is being recreated
                                if out_unit.status == "recreating":
                                    rec_unit = out_unit
                                break

                        if not done or rec_unit:
                            new_unit = self.createChainUnit(
                                [in_unit], ds.use_copy_output)
                            if new_unit:
                                self.addChainUnitToTRF(
                                    new_unit, ds, in_unit.getID(), prev_unit=rec_unit)

    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create a chained unit given the parent outputdata"""
        return IUnit()

    def addChainUnitToTRF(self, unit, inDS, unit_id=-1, prev_unit=None):
        """Add a chained unit to this TRF. Override for more control"""
        if unit_id == -1:
            unit.req_units.append('%d:ALL' % (inDS.input_trf_id))
            unit.name = "Parent: TRF %d, All Units" % (inDS.input_trf_id)
        else:
            unit.req_units.append('%d:%d' % (inDS.input_trf_id, unit_id))
            unit.name = "Parent: TRF %d, Unit %d" % (
                inDS.input_trf_id, unit_id)

        self.addUnitToTRF(unit, prev_unit)

    def addInputData(self, inDS):
        """Add the given input dataset to the list"""
        self.inputdata.append(inDS)

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        if self.status != "completed":
            self.updateStatus("pause")
            #self.status = "pause"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.debug("Transform is already completed!")

    def setRunlimit(self, newRL):
        """Set the number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL
        logger.debug("Runlimit set to %i", newRL)

# Methods that can/should be overridden by derived classes
    def validate(self):
        """Override this to validate that the transform is OK"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        # make sure a path has been selected for any local downloads
        if self.unit_copy_output is not None and isType(self.unit_copy_output, TaskLocalCopy):
            if self.unit_copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        if self.copy_output is not None and isType(self.copy_output, TaskLocalCopy):
            if self.copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        # this is a generic trf so assume the application and splitter will do
        # all the work
        return True

    def addUnitToTRF(self, unit, prev_unit=None):
        """Add a unit to this Transform given the input and output data"""
        if not unit:
            raise ApplicationConfigurationError(None, "addUnitTOTRF failed for Transform %d (%s): No unit specified" % (self.getID(), self.name))

        addInfoString( self, "Adding Unit to TRF...")
        unit.updateStatus("hold")
        unit.active = True
        if prev_unit:
            unit.prev_job_ids += prev_unit.prev_job_ids
            self.units[prev_unit.getID()] = unit
        else:
            self.units.append(unit)
            stripProxy(unit).id = len(self.units) - 1

# Information methods
    def fqn(self):
        task = self._getParent()
        if task:
            return "Task %i Transform %i" % (task.id, task.transforms.index(self))
        else:
            return "Unassigned Transform '%s'" % (self.name)

    def n_active(self):
        return sum([u.n_active() for u in self.units])

    def n_all(self):
        return sum([u.n_all() for u in self.units])

    def n_status(self, status):
        return sum([u.n_status(status) for u in self.units])

    def info(self):
        logger.info(markup("%s '%s'" % (getName(self), self.name), status_colours[self.status]))
        logger.info("* backend: %s" % getName(self.backend))
        logger.info("Application:")
        self.application.printTree()

    def updateStatus(self, status):
        """Update the transform status"""
        self.status = status

    def createUnitCopyOutputDS(self, unit_id):
        """Create a the Copy Output dataset to use with this unit. Overload to handle more than the basics"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        if isType(self.unit_copy_output, TaskLocalCopy):
            logger.warning("Default implementation of createUnitCopyOutputDS can't handle datasets of type '%s'" % getName(self.unit_copy_output))
            return

        # create copies of the Copy Output DS and add Unit name to path
        self.units[unit_id].copy_output = self.unit_copy_output.clone()
        self.units[unit_id].copy_output.local_location = os.path.join(
            self.unit_copy_output.local_location, self.units[unit_id].name.replace(":", "_").replace(" ", "").replace(",", "_"))

    def __setattr__(self, attr, value):

        if attr == 'outputfiles':

            if value != []:
                if self.outputdata is not None:
                    logger.error(
                        'ITransform.outputdata is set, you can\'t set ITransform.outputfiles')
                    return
                elif self.outputsandbox != []:
                    logger.error(
                        'ITransform.outputsandbox is set, you can\'t set ITransform.outputfiles')
                    return

            # reduce duplicate values here, leave only duplicates for LCG,
            # where we can have replicas
            uniqueValuesDict = []
            uniqueValues = []

            for val in value:
                key = '%s%s' % (getName(val), val.namePattern)
                if key not in uniqueValuesDict:
                    uniqueValuesDict.append(key)
                    uniqueValues.append(val)
                elif getName(val) == 'LCGSEFile':
                    uniqueValues.append(val)

            super(ITransform, self).__setattr__(attr, uniqueValues)

        elif attr == 'inputfiles':

            if value != []:
                if self.inputsandbox != []:
                    logger.error(
                        'ITransform.inputsandbox is set, you can\'t set ITransform.inputfiles')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputsandbox is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'inputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyInput']:
                    logger.error(
                        'Use of ITransform.inputsandbox is forbidden, please use ITransform.inputfiles')
                    return

                if self.inputfiles != []:
                    logger.error(
                        'ITransform.inputfiles is set, you can\'t set ITransform.inputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputdata':

            if value is not None:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputdata is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputdata')
                    return
            super(ITransform, self).__setattr__(attr, value)

        else:
            super(ITransform, self).__setattr__(attr, value)

    def resetUnitsByStatus(self, status='bad'):
        """Reset all units of a given status"""
        for unit in self.units:
            if unit.status == status:
                logger.info("Resetting Unit %d, Transform %d..." %
                            (unit.getID(), self.getID()))
                self.resetUnit(unit.getID())

    def checkUnitsAreCompleted(self, parent_units):
        """Check the given parent units are complete"""
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return False

        return True

    def getChainInclExclMasks(self, parent_units):
        """return the include/exclude masks from the TaskChainInput"""
        incl_pat_list = []
        excl_pat_list = []
        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        for parent in parent_units:
            for inds in self.inputdata:
                if isType(inds, TaskChainInput) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        return incl_pat_list, excl_pat_list

    def getParentUnitJobs(self, parent_units, include_subjobs=True):
        """Return the list of parent jobs"""
        job_list = []
        for parent in parent_units:
            job = GPI.jobs(parent.active_job_ids[0])
            if job.subjobs:
                job_list += job.subjobs
            else:
                job_list += [job]

        return job_list

    def removeUnusedJobs(self):
        """Remove all jobs that aren't being used, e.g. failed jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing job '%d'..." % jid)
                    job = GPI.jobs(jid)
                    job.remove()
                except Exception as err:
                    logger.debug("removeUnused: %s" % str(err))
                    logger.error("Problem removing job '%d'" % jid)
Exemplo n.º 11
0
class ISplitter(GangaObject):
    """
    """
    _schema = Schema(Version(0, 0), {})
    _category = 'splitters'
    _hidden = 1

    def createSubjob(self, job, additional_skip_args=None):
        """ Create a new subjob by copying the master job and setting all fields correctly.
        """
        from Ganga.GPIDev.Lib.Job.Job import Job
        if additional_skip_args is None:
            additional_skip_args = []

        j = Job()
        skipping_args = [
            'splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs'
        ]
        for arg in additional_skip_args:
            skipping_args.append(arg)
        j.copyFrom(job, skipping_args)
        j.splitter = None
        j.inputsandbox = []
        j.inputfiles = []
        j.inputdata = None
        return j

    def split(self, job):
        """ Return a list of subjobs generated from a master job.  The
        original  master  job should  not  be  modified.  This  method
        should be implemented in the derived classes.

        Splitter  changes certain  parts of  the subjobs  i.e. mutates
        certain properties (otherwise all  subjobs would be the same).
        Only  these  properties  may  be mutated  which  are  declared
        'splitable'  in  the   schema.  This  restriction  applies  to
        application  objects to  avoid inconsistencies  if application
        handler is not able to deal with modified arguments.

        In the current implementation the type of the backend cannot
        be changed either.

        """

        raise NotImplementedError

    def validatedSplit(self, job):
        """ Perform splitting using the split() method and validate the mutability
        invariants. If the invariants are broken (or exception occurs in the
        split() method) then SplittingError exception is raised. This method is
        called directly by the framework and should not be modified in the derived
        classes. """

        # try:
        subjobs = self.split(stripProxy(job))
        # except Exception,x:
        #raise SplittingError(x)
        #raise x
        # if not len(subjobs):
        #raise SplittingError('splitter did not create any subjobs')

        cnt = 0
        for s in subjobs:
            if not isType(s.backend, type(stripProxy(job.backend))):
                raise SplittingError(
                    'masterjob backend %s is not the same as the subjob (probable subjob id=%d) backend %s'
                    % (job.backend._name, cnt, getName(s.backend)))
            cnt += 1

        return subjobs
Exemplo n.º 12
0
class File(GangaObject):
    """Represent the files, both local and remote and provide an interface to transparently get access to them.

    Typically in the context of job submission, the files are copied to the directory where the application
    runs on the worker node. The 'subdir' attribute influances the destination directory. The 'subdir' feature
    is not universally supported however and needs a review.

    """
    _schema = Schema(
        Version(1, 1), {
            'name':
            SimpleItem(defvalue="", doc='path to the file source'),
            'subdir':
            SimpleItem(defvalue=os.curdir,
                       doc='destination subdirectory (a relative path)'),
            'executable':
            SimpleItem(
                defvalue=False,
                hidden=True,
                transient=True,
                doc=
                'specify if executable bit should be set when the file is created (internal framework use)'
            )
        })
    _category = 'files'
    _name = "File"
    _exportmethods = ["getPathInSandbox", "exists", "create", "isExecutable"]

    # added a subdirectory to the File object. The default is os.curdir, that is "." in Unix.
    # The subdir is a relative path and will be appended to the pathname when writing out files.
    # Therefore changing subdir to a anything starting with "/" will still end up relative
    # to the pathname when the file is copied.
    #
    # There is no protection on putting the parent directory. So ".." is legal and will make
    # the file end up in the parent directory. - AM
    def __init__(self, name=None, subdir=os.curdir):
        super(File, self).__init__()

        if not name is None:
            assert (isinstance(name, str))
            self.name = name

        if not subdir is None:
            self.subdir = subdir

    def __construct__(self, args):
        if len(args) == 1 and isinstance(args[0], str):
            v = args[0]
            import os.path
            expanded = expandfilename(v)
            # if it is not already an absolute filename
            if not urlprefix.match(expanded):
                if os.path.exists(os.path.abspath(expanded)):
                    self.name = os.path.abspath(expanded)
                else:
                    self.name = v
            else:  # bugfix #20545
                self.name = expanded
        else:
            super(File, self).__construct__(args)

    def _attribute_filter__set__(self, attribName, attribValue):
        if attribName is 'name':
            return expandfilename(attribValue)
        return attribValue

    def getPathInSandbox(self):
        """return a relative location of a file in a sandbox: subdir/name"""
        from Ganga.Utility.files import real_basename
        return os.path.join(self.subdir, real_basename(self.name))

    def exists(self):
        """check if the file exists (as specified by 'name')"""
        import os.path
        return os.path.isfile(expandfilename(self.name))

    def create(self, outname):
        """create a file in  a local filesystem as 'outname', maintain
        the original permissions """
        import shutil

        shutil.copy(expandfilename(self.name), outname)
        if self.executable:
            chmod_executable(outname)

    def __repr__(self):
        """Get   the  representation   of  the   file.  Since   the  a
        SimpleStreamer uses  __repr__ for persistency  it is important
        to return  a valid python expression  which fully reconstructs
        the object.  """

        return "File(name='%s',subdir='%s')" % (self.name, self.subdir)

    def isExecutable(self):
        """  return true  if  a file  is  create()'ed with  executable
        permissions,  i.e. the  permissions of  the  existing 'source'
        file are checked"""
        return self.executable or is_executable(expandfilename(self.name))
Exemplo n.º 13
0
class ShareDir(GangaObject):
    """Represents the directory used to store resources that are shared amongst multiple Ganga objects.

    Currently this is only used in the context of the prepare() method for certain applications, such as
    the Executable() application. A single ("prepared") application can be associated to multiple jobs.

    """
    _schema = Schema(
        Version(1, 0), {
            'name':
            SimpleItem(defvalue='', doc='path to the file source'),
            'subdir':
            SimpleItem(defvalue=os.curdir,
                       doc='destination subdirectory (a relative path)')
        })

    _category = 'shareddirs'
    _exportmethods = ['add', 'ls']
    _name = "ShareDir"
    _data = None

    #    def _readonly(self):
    #        return True

    def __init__(self, name=None, subdir=os.curdir):
        super(ShareDir, self).__init__()
        self._setRegistry(None)

        if not name is None:
            self.name = name
        else:
            # continue generating directory names until we create a unique one
            # (which will likely be on the first attempt).
            while True:
                name = 'conf-{0}'.format(uuid.uuid4())
                if not os.path.isdir(os.path.join(getSharedPath(), name)):
                    os.makedirs(os.path.join(getSharedPath(), name))

                if not os.path.isdir(os.path.join(getSharedPath(), name)):
                    logger.error("ERROR creating path: %s" %
                                 os.path.join(getSharedPath(), name))
                    raise GangaException("ShareDir ERROR")
                else:
                    break
            self.name = str(name)

            # incrementing then decrementing the shareref counter has the effect of putting the newly
            # created ShareDir into the shareref table. This is desirable if a ShareDir is created in isolation,
            # filled with files, then assigned to an application.
            #a=Job(); s=ShareDir(); a.application.is_prepared=s
        #shareref = GPIProxyObjectFactory(getRegistry("prep").getShareRef())
        # shareref.increase(self.name)
        # shareref.decrease(self.name)

    def __deepcopy__(self, memo):
        return super(ShareDir, self).__deepcopy__(memo)

    def add(self, input):
        from Ganga.Core.GangaRepository import getRegistry
        if not isType(input, list):
            input = [input]
        for item in input:
            if isType(item, str):
                if os.path.isfile(expandfilename(item)):
                    logger.info('Copying file %s to shared directory %s' %
                                (item, self.name))
                    shutil.copy2(expandfilename(item),
                                 os.path.join(getSharedPath(), self.name))
                    shareref = GPIProxyObjectFactory(
                        getRegistry("prep").getShareRef())
                    shareref.increase(self.name)
                    shareref.decrease(self.name)
                else:
                    logger.error('File %s not found' % expandfilename(item))
            elif isType(item, File) and item.name is not '' and os.path.isfile(
                    expandfilename(item.name)):
                logger.info('Copying file object %s to shared directory %s' %
                            (item.name, self.name))
                shutil.copy2(expandfilename(item.name),
                             os.path.join(getSharedPath(), self.name))
                shareref = GPIProxyObjectFactory(
                    getRegistry("prep").getShareRef())
                shareref.increase(self.name)
                shareref.decrease(self.name)
            else:
                logger.error('File %s not found' % expandfilename(item.name))

    def ls(self):
        """
        Print the contents of the ShareDir
        """
        full_shareddir_path = os.path.join(getSharedPath(), self.name)
        try:
            os.path.isdir(full_shareddir_path)
            cmd = "find '%s'" % (full_shareddir_path)
            files = os.popen(cmd).read().strip().split('\n')
            padding = '|  '
            for file in files:
                level = file.count(os.sep)
                level = level - 6
                pieces = file.split(os.sep)
                symbol = {0: '', 1: '/'}[os.path.isdir(file)]
                logger.info(padding * level + pieces[-1] + symbol)
        except IOError:
            logger.warn('ShareDir %s not found on storage' %
                        full_shareddir_path)

    def exists(self):
        """check if the file exists (as specified by 'name')"""
        import os.path
        return os.path.isdir(expandfilename(self.name))

    def create(self, outname):
        """create a file in  a local filesystem as 'outname', maintain
        the original permissions """
        import shutil

        shutil.copy(expandfilename(self.name), outname)
        if self.executable:
            chmod_executable(outname)

    def __repr__(self):
        """Get   the  representation   of  the   file.  Since   the  a
        SimpleStreamer uses  __repr__ for persistency  it is important
        to return  a valid python expression  which fully reconstructs
        the object.  """

        return "ShareDir(name='%s',subdir='%s')" % (self.name, self.subdir)

    def isExecutable(self):
        """  return true  if  a file  is  create()'ed with  executable
        permissions,  i.e. the  permissions of  the  existing 'source'
        file are checked"""
        return self.executable or is_executable(expandfilename(self.name))
Exemplo n.º 14
0
class IMerger(IPostProcessor):
    """
    Contains the interface for all mergers, all mergers should inherit from this object.
    """

    # set outputdir for auto merge policy flag
    # the default behaviour (True) is that outputdir is set by runAutoMerge() function in Merger.py module
    # however if this flag is set to False then merge() will be called for auto merge with sum_outputdir set to None
    # thus it is up to the subclass to decide where the output goes in case of
    # auto merge
    set_outputdir_for_automerge = True

    _category = 'postprocessor'
    _exportmethods = ['merge']
    _name = 'IMerger'
    _hidden = 1
    _schema = Schema(
        Version(1, 0), {
            'files':
            SimpleItem(defvalue=[],
                       typelist=[str],
                       sequence=1,
                       doc='A list of files to merge.'),
            'ignorefailed':
            SimpleItem(
                defvalue=False,
                doc=
                'Jobs that are in the failed or killed states will be excluded from the merge when this flag is set to True.'
            ),
            'overwrite':
            SimpleItem(
                defvalue=False,
                doc=
                'The default behaviour for this Merger object. Will overwrite output files.'
            ),
        })
    order = 1

    def execute(self, job, newstatus):
        """
        Execute
        """
        if (len(job.subjobs) != 0):
            try:
                return self.merge(job.subjobs, job.outputdir)
            except PostProcessException as e:
                logger.error("%s" % e)
                return self.failure
        else:
            return True

    def merge(self, jobs, outputdir=None, ignorefailed=None, overwrite=None):

        if ignorefailed == None:
            ignorefailed = self.ignorefailed

        if overwrite == None:
            overwrite = self.overwrite

        from Ganga.GPIDev.Lib.Job import Job

        if not outputdir:
            outputdir = getDefaultMergeDir()
        else:
            if isType(outputdir, Job):
                # use info from job
                outputdir = outputdir.outputdir
            else:
                outputdir = os.path.expanduser(outputdir)

        files = {}

        if isType(jobs, Job):
            if outputdir is None:
                outputdir = jobs.outputdir
            return self.merge(jobs.subjobs,
                              outputdir=outputdir,
                              ignorefailed=ignorefailed,
                              overwrite=overwrite)

        if not len(jobs):
            logger.warning(
                'The jobslice given was empty. The merge will not continue.')
            return self.success

        for j in jobs:
            # first check that the job is ok
            if j.status != 'completed':
                # check if we can keep going
                if j.status == 'failed' or j.status == 'killed':
                    if ignorefailed:
                        logger.warning(
                            'Job %s has status %s and is being ignored.',
                            j.fqid, j.status)
                        continue
                    else:
                        raise PostProcessException(
                            'Job %s has status %s and so the merge can not continue. '
                            'This can be overridden with the ignorefailed flag.'
                            % (j.fqid, j.status))
                else:
                    raise PostProcessException(
                        "Job %s is in an unsupported status %s and so the merge can not continue. '\
                    'Supported statuses are 'completed', 'failed' or 'killed' (if the ignorefailed flag is set)."
                        % (j.fqid, j.status))

            if len(j.subjobs):
                sub_result = self.merge(j.subjobs,
                                        outputdir=j.outputdir,
                                        ignorefailed=ignorefailed,
                                        overwrite=overwrite)
                if (sub_result == self.failure) and not ignorefailed:
                    raise PostProcessException(
                        'The merge of Job %s failed and so the merge can not continue. '
                        'This can be overridden with the ignorefailed flag.' %
                        j.fqid)

            import glob
            for f in self.files:

                for matchedFile in glob.glob(os.path.join(j.outputdir, f)):
                    relMatchedFile = ''
                    try:
                        relMatchedFile = os.path.relpath(
                            matchedFile, j.outputdir)
                    except Exception as err:
                        logger.debug("Err: %s" % err)
                        Ganga.Utility.logging.log_unknown_exception()
                        relMatchedFile = relpath(matchedFile, j.outputdir)
                    if relMatchedFile in files:
                        files[relMatchedFile].append(matchedFile)
                    else:
                        files[relMatchedFile] = [matchedFile]

                if not len(glob.glob(os.path.join(j.outputdir, f))):
                    if ignorefailed:
                        logger.warning(
                            'The file pattern %s in Job %s was not found. The file will be ignored.',
                            f, j.fqid)
                        continue
                    else:
                        raise PostProcessException(
                            'The file pattern %s in Job %s was not found and so the merge can not continue. '
                            'This can be overridden with the ignorefailed flag.'
                            % (f, j.fqid))
                # files[f].extend(matchedFiles)

        for k in files.keys():
            # make sure we are not going to over write anything
            outputfile = os.path.join(outputdir, k)
            if os.path.exists(outputfile) and not overwrite:
                raise PostProcessException(
                    'The merge process can not continue as it will result in over writing. '
                    'Either move the file %s or set the overwrite flag to True.'
                    % outputfile)

            # make the directory if it does not exist
            if not os.path.exists(outputdir):
                os.makedirs(outputdir)

            # recreate structure from output sandbox
            outputfile_dirname = os.path.dirname(outputfile)
            if outputfile_dirname != outputdir:
                if not os.path.exists(outputfile_dirname):
                    os.mkdir(outputfile_dirname)

            # check that we are merging some files
            if not files[k]:
                logger.warning(
                    'Attempting to merge with no files. Request will be ignored.'
                )
                continue

            # check outputfile != inputfile
            for f in files[k]:
                if f == outputfile:
                    raise PostProcessException(
                        'Output file %s equals input file %s. The merge will fail.'
                        % (outputfile, f))
            # merge the lists of files with a merge tool into outputfile
            msg = None
            try:
                self.mergefiles(files[k], outputfile)

                # create a log file of the merge
                # we only get to here if the merge_tool ran ok
                log_file = '%s.merge_summary' % outputfile
                with open(log_file, 'w') as log:
                    log.write('# -- List of files merged -- #\n')
                    for f in files[k]:
                        log.write('%s\n' % f)
                    log.write('# -- End of list -- #\n')

            except PostProcessException as e:
                msg = str(e)

                # store the error msg
                log_file = '%s.merge_summary' % outputfile
                with open(log_file, 'w') as log:
                    log.write('# -- Error in Merge -- #\n')
                    log.write('\t%s\n' % msg)
                raise e

        return self.success