Пример #1
0
 def setUp(self):
     self.dd = {
         'application':
         ComponentItem(category='applications'),
         'backend':
         ComponentItem(category='backends'),
         'name':
         SimpleItem('', comparable=0),
         'workdir':
         SimpleItem(defvalue=None,
                    type='string',
                    transient=1,
                    protected=1,
                    comparable=0),
         'status':
         SimpleItem(defvalue='new', protected=1, comparable=0),
         'id':
         SimpleItem(defvalue=None,
                    typelist=[str],
                    protected=1,
                    comparable=0),
         'inputbox':
         FileItem(defvalue=[], sequence=1),
         'outputbox':
         FileItem(defvalue=[], sequence=1),
         'overriden_copyable':
         SimpleItem(defvalue=None, protected=1, copyable=1),
         'plain_copyable':
         SimpleItem(defvalue=None, copyable=0)
     }
     self.s = Schema(Version(1, 0), self.dd)
Пример #2
0
class SampleGangaObject(GangaObject):
    _schema = Schema(
        Version(1, 0),
        {
            'a': SimpleItem(42, typelist=[int]),
            # 'b' is skipped on purpose
            'c': ComponentItem('gangafiles'),
        })
    _category = 'TestGangaObject'
    _name = 'TestGangaObject'

    _exportmethods = ['example', 'check_not_proxy']

    def example(self):
        return 'example_string'

    def check_not_proxy(self, obj):
        assert not Ganga.GPIDev.Base.Proxy.isProxy(
            obj), 'incoming argument should be proxy-stripped'
        ret = SampleGangaObject()
        assert not Ganga.GPIDev.Base.Proxy.isProxy(
            ret), 'new object should not be proxy-wrapped'
        return ret

    def not_proxied(self):
        return 'example_string'
Пример #3
0
class TestGangaObject(GangaObject):
    """Test Ganga Object. Is used to construct test jobs"""
    _schema = Schema(
        Version(1, 0), {
            'id':
            SimpleItem('0', doc='ID Needed for tests'),
            'name':
            SimpleItem(
                '',
                doc=
                'optional label which may be any combination of ASCII characters',
                typelist=['str']),
            'subjobs':
            ComponentItem('internal',
                          defvalue=[],
                          sequence=1,
                          protected=1,
                          load_default=0,
                          copyable=0,
                          optional=1,
                          doc='test subjobs'),
        })
    _name = "TestGangaObject"
    _category = "internal"

    def __init__(self, name='TestObjectName', sj=0):
        super(TestGangaObject, self).__init__()
        self.name = name
        for i in range(sj):
            self.subjobs.append(TestGangaObject(name + "." + str(i)))
Пример #4
0
class ThreadedTestGangaObject(GangaObject):
    _schema = Schema(Version(1, 0), {
        'a': SimpleItem(42, typelist=[int]),
        'b': ComponentItem('TestGangaObject', defvalue='SimpleGangaObject'),
    })
    _category = 'TestGangaObject'
    _hidden = True
    _enable_plugin = True
Пример #5
0
class SandboxFile(LocalFile):
    _schema = Schema(
        Version(1, 1), {
            'namePattern':
            SimpleItem(defvalue="", doc='pattern of the file name'),
            'localDir':
            SimpleItem(
                defvalue="",
                doc=
                'local dir where the file is stored, used from get and put methods'
            ),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          typelist=['Ganga.GPIDev.Lib.File.SandboxFile'],
                          sequence=1,
                          copyable=0,
                          doc="collected files from the wildcard namePattern"),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=['bool'],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere'
            )
        })
    _category = 'gangafiles'
    _name = "SandboxFile"

    def __init__(self, namePattern='', localDir='', **kwds):
        """ name is the name of the output file that is going to be processed
           in some way defined by the derived class
        """
        logger.warning(
            "SandboxFile is now deprecated please change your configuration to use LocalFile instead!"
        )
        super(SandboxFile, self).__init__(namePattern, localDir, **kwds)
Пример #6
0
class ExeSplitter(ISplitter):

    """ Split executable applications (OBSOLETE).

    This splitter allows the creation of subjobs where each subjob has a different Executable application.
    This splitter is OBSOLETED use GenericSplitter or ArgSplitter instead.
    """
    _name = "ExeSplitter"
    _schema = Schema(Version(1, 0), {
        'apps': ComponentItem('applications', defvalue=[], sequence=1, doc='a list of Executable app objects')
    })

    def split(self, job):
        subjobs = []
        for a in self.apps:
            # for each subjob make a full copy of the master job
            j = self.createSubjob(job)
            j.application = a
            if not a.exe:
                j.application.exe = job.application.exe
            subjobs.append(j)
        return subjobs
Пример #7
0
class LCGSEFile(IGangaFile):
    """LCGSEFile represents a class marking an output file to be written into LCG SE
    """

    _schema = Schema(
        Version(1, 1), {
            'namePattern':
            SimpleItem(defvalue="", doc='pattern of the file name'),
            'localDir':
            SimpleItem(
                defvalue="",
                copyable=1,
                doc=
                'local dir where the file is stored, used from get and put methods'
            ),
            'joboutputdir':
            SimpleItem(
                defvalue="",
                doc=
                'outputdir of the job with which the outputsandbox file object is associated'
            ),
            'se':
            SimpleItem(defvalue=getLCGConfig()['dest_SRM'],
                       copyable=1,
                       doc='the LCG SE hostname'),
            'se_type':
            SimpleItem(defvalue='', copyable=1, doc='the LCG SE type'),
            'se_rpath':
            SimpleItem(
                defvalue='',
                copyable=1,
                doc=
                'the relative path to the file from the VO directory on the SE'
            ),
            'lfc_host':
            SimpleItem(defvalue=getLCGConfig()['LFC_HOST'],
                       copyable=1,
                       doc='the LCG LFC hostname'),
            'srm_token':
            SimpleItem(
                defvalue='',
                copyable=1,
                doc=
                'the SRM space token, meaningful only when se_type is set to srmv2'
            ),
            'SURL':
            SimpleItem(defvalue='', copyable=1, doc='the LCG SE SURL'),
            'port':
            SimpleItem(defvalue='', copyable=1, doc='the LCG SE port'),
            'locations':
            SimpleItem(
                defvalue=[],
                copyable=1,
                typelist=[str],
                sequence=1,
                doc="list of locations where the outputfiles were uploaded"),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          sequence=1,
                          copyable=0,
                          doc="collected files from the wildcard namePattern"),
            'failureReason':
            SimpleItem(defvalue="",
                       protected=1,
                       copyable=0,
                       doc='reason for the upload failure'),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=[bool],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere'
            )
        })
    _category = 'gangafiles'
    _name = "LCGSEFile"
    _exportmethods = ["location", "setLocation", "get", "put", "getUploadCmd"]

    def __init__(self, namePattern='', localDir='', **kwds):
        """ namePattern is the pattern of the output file that has to be written into LCG SE
        """
        super(LCGSEFile, self).__init__()
        self.namePattern = namePattern
        self.localDir = localDir

        self.locations = []

        self.shell = GridShell.getShell()

    def __setattr__(self, attr, value):
        if attr == 'se_type' and value not in ['', 'srmv1', 'srmv2', 'se']:
            raise AttributeError('invalid se_type: %s' % value)
        super(LCGSEFile, self).__setattr__(attr, value)

    def _on_attribute__set__(self, obj_type, attrib_name):
        r = copy.deepcopy(self)
        if getName(obj_type) == 'Job' and attrib_name == 'outputfiles':
            r.locations = []
            r.localDir = ''
            r.failureReason = ''
        return r

    def __repr__(self):
        """Get the representation of the file."""

        return "LCGSEFile(namePattern='%s')" % self.namePattern

    def __get_unique_fname__(self):
        '''gets an unique filename'''

        import random
        import time

        uuid = (str(random.uniform(0, 100000000)) + '-' +
                str(time.time())).replace('.', '-')
        user = getConfig('Configuration')['user']

        fname = 'user.%s.%s' % (user, uuid)
        return fname

    def setLocation(self):
        """
        Sets the location of output files that were uploaded to lcg storage element from the WN
        """

        job = self.getJobObject()

        postprocessLocationsPath = os.path.join(
            job.outputdir,
            getConfig('Output')['PostProcessLocationsFileName'])
        if not os.path.exists(postprocessLocationsPath):
            return

        def lcgse_line_processor(line, lcgse_file):
            guid = line[line.find('->') + 2:]
            pattern = line.split(' ')[1]
            name = line.split(' ')[2].strip('.gz')

            if regex.search(lcgse_file.namePattern) is not None:
                d = LCGSEFile(namePattern=name)
                d.compressed = lcgse_file.compressed
                d.lfc_host = lcgse_file.lfc_host
                d.se = lcgse_file.se
                # todo copy also the other attributes
                lcgse_file.subfiles.append(GPIProxyObjectFactory(d))
                lcgse_line_processor(line, d)
            elif pattern == lcgse_file.namePattern:
                if guid.startswith('ERROR'):
                    logger.error("Failed to upload file to LCG SE")
                    logger.error(guid[6:])
                    lcgse_file.failureReason = guid[6:]
                    return
                lcgse_file.locations = guid

        for line in open(postprocessLocationsPath, 'r'):

            if line.strip() == '':
                continue

            if line.startswith('lcgse'):
                lcgse_line_processor(line.strip(), self)

    def location(self):
        """
        Return list with the locations of the post processed files (if they were configured to upload the output somewhere)
        """
        return self.locations

    def getUploadCmd(self):

        vo = getConfig('LCG')['VirtualOrganisation']

        cmd = 'lcg-cr --vo %s ' % vo
        if self.se != '':
            cmd = cmd + ' -d %s' % self.se
        if self.se_type == 'srmv2' and self.srm_token != '':
            cmd = cmd + ' -D srmv2 -s %s' % self.srm_token

        # specify the physical location
        if self.se_rpath != '':
            cmd = cmd + \
                ' -P %s/ganga.%s/filename' % (self.se_rpath,
                                              self.__get_unique_fname__())

        return cmd

    def put(self):
        """
        Executes the internally created command for file upload to LCG SE, this method will
        be called on the client
        """
        import glob

        sourceDir = ''

        # if used as a stand alone object
        if self._getParent() is None:
            if self.localDir == '':
                logger.warning(
                    'localDir attribute is empty, don\'t know from which dir to take the file'
                )
                return
            else:
                sourceDir = self.localDir
        else:
            job = self.getJobObject()
            sourceDir = job.outputdir
        import os
        os.environ['LFC_HOST'] = self.lfc_host

        fileName = self.namePattern

        if self.compressed:
            fileName = '%s.gz' % self.namePattern

        if regex.search(fileName) is not None:
            for currentFile in glob.glob(os.path.join(sourceDir, fileName)):
                cmd = self.getUploadCmd()
                cmd = cmd.replace('filename', currentFile)
                cmd = cmd + ' file:%s' % currentFile

                (exitcode, output, m) = self.shell.cmd1(cmd,
                                                        capture_stderr=True)

                d = LCGSEFile(namePattern=os.path.basename(currentFile))
                d.compressed = self.compressed
                d.lfc_host = self.lfc_host
                d.se = self.se
                # todo copy also the other attributes

                if exitcode == 0:

                    match = re.search('(guid:\S+)', output)
                    if match:
                        d.locations = output.strip()

                    # Alex removed this as more general approach in job.py after put() is called
                    # remove file from output dir if this object is attached to a job
                    # if self._getParent() is not None:
                    #    os.system('rm %s' % os.path.join(sourceDir, currentFile))

                else:
                    d.failureReason = output
                    if self._getParent() is not None:
                        logger.error(
                            "Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s"
                            %
                            (str(self._getParent().fqid), self.failureReason))
                    else:
                        logger.error(
                            "The file can't be uploaded because of %s" %
                            (self.failureReason))

                self.subfiles.append(GPIProxyObjectFactory(d))

        else:
            logger.debug("sourceDir: %s" % sourceDir)
            logger.debug("fileName: %s" % fileName)
            currentFile = os.path.join(sourceDir, fileName)
            import os.path
            if os.path.isfile(currentFile):
                logger.debug("currentFile: %s exists!" % currentFile)
            else:
                logger.debug("currentFile: %s DOES NOT exist!" % currentFile)

            cmd = self.getUploadCmd()
            cmd = cmd.replace('filename', currentFile)
            cmd = cmd + ' file:%s' % currentFile

            logger.debug("cmd is: %s" % cmd)

            (exitcode, output, m) = self.shell.cmd1(cmd, capture_stderr=True)

            if exitcode == 0:

                match = re.search('(guid:\S+)', output)
                if match:
                    self.locations = output.strip()

                # Alex removed this as more general approach in job.py after put() is called
                # remove file from output dir if this object is attached to a job
                # if self._getParent() is not None:
                #    os.system('rm %s' % os.path.join(sourceDir, currentFile))

            else:
                self.failureReason = output
                if self._getParent() is not None:
                    logger.error(
                        "Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s"
                        % (str(self._getParent().fqid), self.failureReason))
                else:
                    logger.error("The file can't be uploaded because of %s" %
                                 (self.failureReason))

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip,
                            postProcessLocationsFP):
        """
        Returns script that have to be injected in the jobscript for postprocessing on the WN
        """
        lcgCommands = []

        for outputFile in outputFiles:
            lcgCommands.append('lcgse %s %s %s' %
                               (outputFile.namePattern, outputFile.lfc_host,
                                outputFile.getUploadCmd()))
            logger.debug("OutputFile (%s) cmd for WN script is: %s" %
                         (outputFile.namePattern, outputFile.getUploadCmd()))

        import inspect
        script_location = os.path.join(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))),
            'scripts/LCGSEFileWNScript.py')

        from Ganga.GPIDev.Lib.File import FileUtils
        script = FileUtils.loadScript(script_location, '###INDENT###')

        script = script.replace('###LCGCOMMANDS###', str(lcgCommands))
        script = script.replace('###PATTERNSTOZIP###', str(patternsToZip))
        script = script.replace('###INDENT###', indent)
        script = script.replace('###POSTPROCESSLOCATIONSFP###',
                                postProcessLocationsFP)

        return script

    def get(self):
        """
        Retrieves locally all files matching this LCGSEFile object pattern
        """
        to_location = self.localDir

        if not os.path.isdir(self.localDir):
            if self._getParent() is not None:
                to_location = self.getJobObject().outputdir
            else:
                logger.info(
                    "%s is not a valid directory.... Please set the localDir attribute"
                    % self.localDir)
                return

        # set lfc host
        os.environ['LFC_HOST'] = self.lfc_host

        vo = getConfig('LCG')['VirtualOrganisation']

        for location in self.locations:
            destFileName = os.path.join(to_location, self.namePattern)
            cmd = 'lcg-cp --vo %s %s file:%s' % (vo, location, destFileName)
            (exitcode, output, m) = self.shell.cmd1(cmd, capture_stderr=True)

            if exitcode != 0:
                logger.error(
                    'command %s failed to execute , reason for failure is %s' %
                    (cmd, output))

    def getWNScriptDownloadCommand(self, indent):

        script = """\n

###INDENT###os.environ['LFC_HOST'] = '###LFC_HOST###'
###INDENT###cwDir = os.getcwd()
###INDENT###dwnCmd = 'lcg-cp --vo ###VO### lfn:/grid/###VO###/###LOCATION###/###NAMEPATTERN### file:%s' % os.path.join(cwDir, '###NAMEPATTERN###')
###INDENT###os.system(dwnCmd)
"""

        script = script.replace('###INDENT###', indent)
        script = script.replace('###LFC_HOST###', self.lfc_host)
        script = script.replace('###VO###',
                                getConfig('LCG')['VirtualOrganisation'])
        script = script.replace('###LOCATION###', self.se_rpath)
        script = script.replace('###NAMEPATTERN###', self.namePattern)

        return script

    def processWildcardMatches(self):
        if self.subfiles:
            return self.subfiles

        from fnmatch import fnmatch

        if regex.search(self.namePattern):
            # TODO namePattern shouldn't contain slashes and se_rpath should
            # not contain wildcards
            exitcode, output, m = self.shell.cmd1(
                'lcg-ls lfn:/grid/' + getConfig('LCG')['VirtualOrganisation'] +
                '/' + self.se_rpath,
                capture_stderr=True)

            for filename in output.split('\n'):
                if fnmatch(filename, self.namePattern):
                    subfile = LCGSEFile(namePattern=filename)
                    subfile.se_rpath = self.se_rpath
                    subfile.lfc_host = self.lfc_host

                    self.subfiles.append(GPIProxyObjectFactory(subfile))
Пример #8
0
class BKQueryDict(GangaObject):
    """Class for handling LHCb bookkeeping queries using dictionaries.

    Use BKQuery if you do not know how to use BK dictionaries!

    Example Usage:

    bkqd = BKQueryDict()
    bkqd.dict['ConfigVersion'] = 'Collision09'
    bkqd.dict['ConfigName'] = 'LHCb'
    bkqd.dict['ProcessingPass'] = '******'
    bkqd.dict['EventType'] = '90000000'
    bkqd.dict['FileType'] = 'DST'
    bkqd.dict['DataTakingConditions'] = 'Beam450GeV-VeloOpen-MagDown'
    data = bkqd.getDataset()
    """

    _bkQueryTemplate = {
        'SimulationConditions': 'All',
        'DataTakingConditions': 'All',
        'ProcessingPass': '******',
        'FileType': 'All',
        'EventType': 'All',
        'ConfigName': 'All',
        'ConfigVersion': 'All',
        'ProductionID': 0,
        'StartRun': 0,
        'EndRun': 0,
        'DataQuality': 'All'
    }

    schema = {}
    docstr = 'Dirac BK query dictionary.'
    schema['dict'] = SimpleItem(
        defvalue=_bkQueryTemplate,  # typelist=['dict'],
        doc=docstr)
    schema['credential_requirements'] = ComponentItem('CredentialRequirement',
                                                      defvalue='DiracProxy')
    _schema = Schema(Version(1, 0), schema)
    _category = ''
    _name = "BKQueryDict"
    _exportmethods = ['getDataset', 'getDatasetMetadata']

    def __init__(self):
        super(BKQueryDict, self).__init__()

    @require_credential
    def getDatasetMetadata(self):
        '''Gets the dataset from the bookkeeping for current dict.'''
        if not self.dict:
            return None
        cmd = 'bkQueryDict(%s)' % self.dict
        try:
            value = get_result(
                cmd,
                'BK query error.',
                credential_requirements=self.credential_requirements)
        except GangaDiracError as err:
            return {'OK': False, 'Value': {}}

        files = []
        if 'LFNs' in value:
            files = value['LFNs']
        metadata = {}
        if not type(files) is list:
            if 'LFNs' in files:  # i.e. a dict of LFN:Metadata
                metadata = files['LFNs'].copy()

        if metadata:
            return {'OK': True, 'Value': metadata}
        return {'OK': False, 'Value': metadata}

    @require_credential
    def getDataset(self):
        '''Gets the dataset from the bookkeeping for current dict.'''
        if not self.dict:
            return None
        cmd = 'bkQueryDict(%s)' % self.dict
        value = get_result(
            cmd,
            'BK query error.',
            credential_requirements=self.credential_requirements)

        files = []
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:
            if 'LFNs' in files:  # i.e. a dict of LFN:Metadata
                files = files['LFNs'].keys()

        from GangaDirac.Lib.Files.DiracFile import DiracFile
        this_list = [DiracFile(lfn=f) for f in files]

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset
        ds = LHCbDataset(files=this_list, fromRef=True)

        return addProxy(ds)
Пример #9
0
class BKQuery(GangaObject):
    '''Class for handling LHCb bookkeeping queries.

    Currently 4 types of queries are supported: Path, RunsByDate, Run and
    Production.  These correspond to the Dirac API methods
    DiracLHCb.bkQuery<type> (see Dirac docs for details).  


    Path formats are as follows:

    type = "Path":
    /<ConfigurationName>/<Configuration Version>/\
<Sim or Data Taking Condition>/<Processing Pass>/<Event Type>/<File Type>

    type = "RunsByDate":
     /<ConfigurationName>/<Configuration Version>/<Processing Pass>/\
<Event Type>/<File Type> 

    type = "Run":
    /<Run Number>/<Processing Pass>/<Event Type>/<File Type>
    - OR -
    /<Run Number 1>-<Run Number 2>/<Processing Pass>/<Event Type>/<File Type>

    type = "Production":
    /<ProductionID>/<Processing Pass>/<Event Type>/<File Type>

    Example Usage:

    bkq = BKQuery (
    dqflag = "All" ,
    path = "/LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data/\
RecoToDST-07/90000000/DST" ,
    type = "Path" 
    ) 

    bkq = BKQuery (
    startDate = "2010-05-18" ,
    selection = "Runs" ,
    endDate = "2010-05-20" ,
    dqflag = "All" ,
    path = "/LHCb/Collision10/Real Data/90000000/RAW" ,
    type = "RunsByDate" 
    ) 

    bkq = BKQuery (
    dqflag = "All" ,
    path = "111183-126823/Real Data/Reco14/Stripping20/90000000/DIMUON.DST" ,
    type = "Run" 
    ) 

    bkq = BKQuery (
    dqflag = "All" ,
    path = "/5842/Real Data/RecoToDST-07/90000000/DST" ,
    type = "Production" 
    ) 

    then (for any type) one can get the data set by doing the following:
    data = bkq.getDataset()

    This will query the bookkeeping for the up-to-date version of the data.
    N.B. BKQuery objects can be stored in your Ganga box.

    '''
    schema = {}
    docstr = 'Bookkeeping query path (type dependent)'
    schema['path'] = SimpleItem(defvalue='', doc=docstr)
    docstr = 'Start date string yyyy-mm-dd (only works for type="RunsByDate")'
    schema['startDate'] = SimpleItem(defvalue='', doc=docstr)
    docstr = 'End date string yyyy-mm-dd (only works for type="RunsByDate")'
    schema['endDate'] = SimpleItem(defvalue='', doc=docstr)
    docstr = 'Data quality flag (string or list of strings).'
    schema['dqflag'] = SimpleItem(defvalue='OK',
                                  typelist=['str', 'list'],
                                  doc=docstr)
    docstr = 'Type of query (Path, RunsByDate, Run, Production)'
    schema['type'] = SimpleItem(defvalue='Path', doc=docstr)
    docstr = 'Selection criteria: Runs, ProcessedRuns, NotProcessed (only works for type="RunsByDate")'
    schema['selection'] = SimpleItem(defvalue='', doc=docstr)
    schema['credential_requirements'] = ComponentItem('CredentialRequirement',
                                                      defvalue='DiracProxy')
    _schema = Schema(Version(1, 2), schema)
    _category = 'query'
    _name = "BKQuery"
    _exportmethods = ['getDataset', 'getDatasetMetadata']

    def __init__(self, path=''):
        super(BKQuery, self).__init__()
        self.path = path

    @require_credential
    def getDatasetMetadata(self):
        '''Gets the dataset from the bookkeeping for current path, etc.'''
        if not self.path:
            return None
        if not self.type in ['Path', 'RunsByDate', 'Run', 'Production']:
            raise GangaException('Type="%s" is not valid.' % self.type)
        if not self.type is 'RunsByDate':
            if self.startDate:
                msg = 'startDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.endDate:
                msg = 'endDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.selection:
                msg = 'selection not supported for type="%s".' % self.type
                raise GangaException(msg)
        cmd = "getDataset('%s','%s','%s','%s','%s','%s')" % (
            self.path, self.dqflag, self.type, self.startDate, self.endDate,
            self.selection)
        from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
        knownLists = [tuple, list, GangaList]
        if isType(self.dqflag, knownLists):
            cmd = "getDataset('%s',%s,'%s','%s','%s','%s')" % (
                self.path, self.dqflag, self.type, self.startDate,
                self.endDate, self.selection)

        try:
            value = get_result(
                cmd,
                'BK query error.',
                credential_requirements=self.credential_requirements)
        except GangaDiracError as err:
            return {'OK': False, 'Value': str(err)}

        files = []
        metadata = {}
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:  # i.e. a dict of LFN:Metadata
            # if 'LFNs' in files: # i.e. a dict of LFN:Metadata
            metadata = files.copy()

        if metadata:
            return {'OK': True, 'Value': metadata}

        return {'OK': False, 'Value': metadata}

    @require_credential
    def getDataset(self):
        '''Gets the dataset from the bookkeeping for current path, etc.'''
        if not self.path:
            return None
        if not self.type in ['Path', 'RunsByDate', 'Run', 'Production']:
            raise GangaException('Type="%s" is not valid.' % self.type)
        if not self.type is 'RunsByDate':
            if self.startDate:
                msg = 'startDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.endDate:
                msg = 'endDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.selection:
                msg = 'selection not supported for type="%s".' % self.type
                raise GangaException(msg)
        cmd = "getDataset('%s','%s','%s','%s','%s','%s')" % (
            self.path, self.dqflag, self.type, self.startDate, self.endDate,
            self.selection)
        from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
        knownLists = [tuple, list, GangaList]
        if isType(self.dqflag, knownLists):
            cmd = "getDataset('%s',%s,'%s','%s','%s','%s')" % (
                self.path, self.dqflag, self.type, self.startDate,
                self.endDate, self.selection)
        result = get_result(
            cmd,
            'BK query error.',
            credential_requirements=self.credential_requirements)

        logger.debug("Finished Running Command")

        files = []
        value = result
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:  # i.e. a dict of LFN:Metadata
            # if 'LFNs' in files: # i.e. a dict of LFN:Metadata
            files = files.keys()

        logger.debug("Creating DiracFile objects")

        ## Doesn't work not clear why
        from GangaDirac.Lib.Files.DiracFile import DiracFile
        #new_files = []
        #def _createDiracLFN(this_file):
        #    return DiracFile(lfn = this_file)
        #GangaObject.__createNewList(new_files, files, _createDiracLFN)

        logger.debug("Creating new list")
        new_files = [DiracFile(lfn=f) for f in files]

        #new_files = [DiracFile(lfn=_file) for _file in files]
        #for f in files:
        #    new_files.append(DiracFile(lfn=f))
        #ds.extend([DiracFile(lfn = f)])

        logger.info("Constructing LHCbDataset")

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset
        logger.debug("Imported LHCbDataset")
        ds = LHCbDataset(files=new_files, fromRef=True)

        logger.debug("Returning Dataset")

        return addProxy(ds)
Пример #10
0
class ICredential(GangaObject):
    """
    Interface class for working with credentials
    """

    _schema = Schema(
        Version(1, 0), {
            "maxTry":
            SimpleItem(
                defvalue=1,
                typelist=[int],
                doc=
                "Number of password attempts allowed when creating credential"
            ),
            "minValidity":
            SimpleItem(defvalue="00:15",
                       typelist=[str],
                       doc="Default minimum validity"),
            "validityAtCreation":
            SimpleItem(defvalue="24:00",
                       typelist=[str],
                       doc="Default credential validity at creation"),
            "command":
            ComponentItem(
                category="credential_commands",
                defvalue="ICommandSet",
                doc=
                "Set of commands to be used for credential-related operations")
        })

    _category = "credentials"
    _name = "ICredential"
    _hidden = 1

    _exportmethods = [
        "create", "destroy", "isAvailable", "isValid", "location", "renew",
        "timeleft"
    ]

    def __init__(self):
        super(ICredential, self).__init__()
        self.shell = Shell()
        self.inputPW_Widget = None
        return

    def create(self, validity="", maxTry=0, minValidity="", check=False):
        """
        Create credential.

        Arguments other than self:
           validity    - Validity with which credential should be created,
                         specified as string of format "hh:mm"
                         [ Defaults to value of self.validityAtCreation ]
           maxTry      - Number of password attempts allowed
                         [ Defaults to value of self.maxTry ]
           minValidity - Minimum validity in case checking of
                         pre-existing credential is performed,
                         specified as strong of format "hh:mm"
                         [ Defaults to value of self.minValidity ]
           check       - Flag to request checking of pre-existing
                         credential; if flag is set to true, then new
                         credential is created only if the validity of
                         any pre-existing credential is less than the
                         value of minValidity
                         [ Default: False ] 

        Note: create is the same as renew, except for the default value of check

        Return value: True if credential is created successfully, and False
        otherwise.
        """
        global logTimeStamp

        dummy = False
        if not self.command.init:
            dummy = True
        if "valid" in self.command.init_parameters:
            if not self.command.init_parameters["valid"]:
                dummy = True

        if dummy:
            logger.warning("Dummy CommandSet used - no credential created")
            return False

        if not maxTry:
            maxTry = self.maxTry

        if not minValidity:
            minValidity = self.minValidity

        if not validity:
            validity = self.validityAtCreation

        validityInSeconds = self.timeInSeconds(validity)

        if not validityInSeconds:
            logger.warning("Problems with requested validity: %s" %
                           str(validity))
            return False
        if check and self.isValid(minValidity):
            return True

        ntry = 0

        while ntry < maxTry:

            ntry = ntry + 1
            # Test if GUI widget is to be used.
            if self.inputPW_Widget:
                # Since self.inputPW_Widget is called, current arguments are
                # ignored since renew() and create() in GUI mode will not be
                # called with any arguments.
                #proxy_obj = self._proxyObject ## This is removed to get rid of ref to _proxyObject
                proxy_obj = self
                if self.inputPW_Widget.ask(proxy_obj):
                    logger.dg(
                        "Proceeding to retrieve password from inputPW_Widget.")
                    __pw = self.inputPW_Widget.getPassword(proxy_obj)
                    if not __pw:
                        logger.warning("Password/passphrase expected!")
                        return False
                    try:
                        tFile = tempfile.NamedTemporaryFile()
                        tFile.write(__pw)
                        tFile.flush()
                    except:
                        del __pw
                        logger.warning(
                            "Could not create secure temporary file for password!"
                        )
                        return False
                    del __pw
                else:
                    # Current credential modification denied for various reasons.
                    # see GangaGUI.customDialogs.ask() method for more details.
                    return False
                # self.inputPW_Widget.ask() may have modified parameters.
                # Calling buildOpts() to take them into account.
                self.buildOpts(self.command.init, False)
                # Create initialisation list with the 'pipe' parameter
                initList = [
                    self.command.init, self.command.init_parameters["pipe"]
                ]
                # Append option value pairs
                for optName, optVal in self.command.currentOpts.iteritems():
                    initList.append("%s %s" % (optName, optVal))
                status = self.shell.system("cat %s|%s" %
                                           (tFile.name, " ".join(initList)))
                tFile.close()
                # self.inputPW_Widget dialog postprocessing.
                # E.g. disable autorenew mechanism if status != 0.
                self.inputPW_Widget.renewalStatus(proxy_obj, status)
                if status == 0:
                    logger.info("%s creation/renewal successful." % self._name)
                    return True
                else:
                    logger.warning("%s creation/renewal failed [%s]." %
                                   (self._name, status))
                    return False
            else:  # Non-GUI credential renewal/creation
                # Check if renewal is from main process (i.e. by bootstrap or
                # user)
                if threading.currentThread().getName() == 'MainThread' or\
                        threading.currentThread().getName().startswith('GANGA_Update_Thread_Ganga_Worker_'):
                    if "valid" in self.command.init_parameters:
                        self.command.currentOpts[
                            self.command.init_parameters['valid']] = validity
                    initList = [self.command.init]
                    # Append option value pairs
                    for optName, optVal in self.command.currentOpts.iteritems(
                    ):
                        initList.append("%s %s" % (optName, optVal))
                    status = self.shell.system(" ".join(initList))
                    if status == 0:
                        logger.info("%s creation/renewal successful." %
                                    self._name)
                        return True
                    else:
                        logger.warning("%s creation/renewal failed [%s]." %
                                       (self._name, status))
                # create initiated from worker thread from monitoring
                # component.
                else:
                    currTime = time.time()
                    if currTime - logTimeStamp >= logRepeatDuration:
                        logTimeStamp = currTime

                        # Check validity but print logging messages this time
                        self.isValid("", True)
                        _credentialObject = self._name[0].lower(
                        ) + self._name[1:]
                        logger.warning(
                            "Renew by typing '%s.renew()' at the prompt." %
                            (_credentialObject))

                        # notify the Core that the credential is not valid
                        _validity = self.timeInSeconds(self.timeleft())
                        _minValidity = self.timeInSeconds(minValidity) / 2.
                        if _validity <= max(120, _minValidity):
                            Coordinator.notifyInvalidCredential(self)

                    return True

        logger.warning("%s creation/renewal attempts exceeded %s tries!" %
                       (self._name, maxTry))
        return False

    def destroy(self, allowed_exit=[0]):
        """
        Destroy credential

        Argument other than self:
           allowed_exit - List of exit codes accepted without error
                          when issuing system command for destroying credential

        Return value: False if command for destroying credential is undefined,
                      or True otherwise
        """

        if not self.command.destroy:
            logger.warning("Dummy CommandSet used - no credential created")
            return False

        destroyList = [self.command.destroy]
        for optName, optVal in self.command.destroyOpts.iteritems():
            destroyList.append("%s %s" % (optName, optVal))

        Coordinator.notifyInvalidCredential(self)

        status, output, message = \
            self.shell.cmd1(" ".join(destroyList), allowed_exit)
        proxyPath = self.location()
        if proxyPath:
            os.remove(proxyPath)
        return True

    def isAvailable(self):
        """
        Check whether credential is available with system/configuration used

        No arguments other than self

        Return value: True if credential is available, false otherwise
        """

        logger.warning("Dummy method used - this always returns True")

        return True

    def isValid(self, validity="", log=False, force_check=False):
        """
        Check validity

        Arguments other than self:
           validity    - Minimum time for which credential should be valid,
                         specified as string of format "hh:mm"
                         [ Defaults to valud of self.minValidity ]

           log         - Print logger messages if credential not valid 

           force_check - Force credential check, rather than relying on cache

        Return value: True if credential is valid for required time, False
        otherwise.
        """

        valid = True

        if not validity or validity is None:
            validity = self.minValidity
        validityInSeconds = self.timeInSeconds(validity)
        timeleft = self.timeleft(force_check=force_check)

        if not timeleft:
            valid = False
        else:
            timeleftInSeconds = self.timeInSeconds(timeleft)
            if timeleftInSeconds <= validityInSeconds:
                valid = False

        if not valid and log:
            _tl = self.timeleft(force_check=force_check)
            if _tl == "-1" or _tl == "0:00:00":
                _expiryStatement = "has expired!"
            else:
                _expiryStatement = "will expire in %s!" % _tl

            itemList = []
            text = self._name[0]
            for i in range(len(self._name) - 1):
                character = self._name[i + 1]
                if character.isupper():
                    itemList.append(text)
                    text = character.lower()
                else:
                    text = "".join([text, character])
            itemList.append(text)
            _credentialName = " ".join(itemList)

            logger.warning("%s %s" % (_credentialName, _expiryStatement))

        return valid

    def location(self):
        """
        Determine credential location

        No arguments other than self

        Return value: Path to credential if found, or empty string otherwise
        """

        return ""

    def renew(self, validity="", maxTry=0, minValidity="", check=True):
        """
        Renew credential.

        Arguments other than self:
           validity    - Validity with which credential should be created,
                         specified as string of format "hh:mm"
                         [ Defaults to value of self.validityAtCreation ]
           maxTry      - Number of password attempts allowed
                         [ Defaults to value of self.maxTry ]
           minValidity - Minimum validity in case checking of
                         pre-existing credential is performed,
                         specified as strong of format "hh:mm"
                         [ Defaults to value of self.minValidity ]
           check       - Flag to request checking of pre-existing
                         credential; if flag is set to true, then new
                         credential is created only if the validity of
                         any pre-existing credential is less than the
                         value of minValidity
                         [ Default: True ] 

        Note: renew is the same as create, except for the default value of check

        Return value: True if new credential is created successfully, and False
        otherwise.
        """
        status = self.create(validity, maxTry, minValidity, check)

        return status

    def timeInSeconds(self, timeString=""):
        """
        Convert time string to time in seconds

        Arguments other than self:
           timeString - Time specified as string of format "hh:mm:ss"

        Return value: Time in seconds (integer)
        """

        totalTime = 0
        timeList = timeString.split(":")
        if len(timeList) >= 1:
            totalTime = totalTime + int(timeList[0]) * 60 * 60
        if len(timeList) >= 2:
            totalTime = totalTime + int(timeList[1]) * 60
        if len(timeList) >= 3:
            totalTime = totalTime + int(timeList[2])

        return totalTime

    def timeleft(self, units="hh:mm:ss", force_check=False):
        """
        Check time for which credential is valid.

        Arguments other than self:
           units       - String specifying units in which time is returned

           force_check - Force credential check, rather than relying on cache

        Allowed values for units are:
           "hours"              - time returned as in hours
           "minutes"            - time returned in minutes
           "seconds"            - time returned in seconds
           "hh:mm:ss" [default] - time returned as hours, minutes seconds


        Return value: Credential validity as string giving time in requested
           units, or empty string if command for querying credential validity
           is unavailable
        """

        timeRemaining = self.timeleftInHMS(force_check=force_check)
        if timeRemaining not in ["", "-1"]:
            if units in ["hours", "minutes", "seconds"]:
                timeleftInSeconds = self.timeInSeconds(timeRemaining)
                if "seconds" == units:
                    timeRemaining = "%.2f" % (timeleftInSeconds)
                elif "minutes" == units:
                    timeRemaining = "%.2f" % (timeleftInSeconds / 60.)
                elif "hours" == units:
                    timeRemaining = "%.2f" % (timeleftInSeconds / (60. * 60.))

        return timeRemaining

    def timeleftInHMS(self, force_check=False):
        """
        Determine remaining validity of credential in hours, minutes and seconds

        Argument other than self:
           force_check - Force credential check, rather than relying on cache

        Return value: String giving credential validity, or empty string
           if command for querying credential validity is unavailable
        """
        logger.warning("Dummy method used - no information returned")
        return ""
Пример #11
0
class Condor(IBackend):

    """Condor backend - submit jobs to a Condor pool.

    For more options see help on CondorRequirements.
    """

    _schema = Schema(Version(1, 0), {
        "requirements": ComponentItem(category="condor_requirements",
                                      defvalue="CondorRequirements",
                                      doc="Requirements for selecting execution host"),
        "env": SimpleItem(defvalue={},
                          doc='Environment settings for execution host'),
        "getenv": SimpleItem(defvalue="False",
                             doc='Flag to pass current envrionment to execution host'),
        "rank": SimpleItem(defvalue="Memory",
                           doc="Ranking scheme to be used when selecting execution host"),
        "submit_options": SimpleItem(defvalue=[], typelist=["str"],
                                     sequence=1, doc="Options passed to Condor at submission time"),
        "id": SimpleItem(defvalue="", protected=1, copyable=0,
                         doc="Condor jobid"),
        "status": SimpleItem(defvalue="", protected=1, copyable=0,
                             doc="Condor status"),
        "cputime": SimpleItem(defvalue="", protected=1, copyable=0,
                              doc="CPU time used by job"),
        "actualCE": SimpleItem(defvalue="", protected=1, copyable=0,
                               doc="Machine where job has been submitted"),
        "shared_filesystem": SimpleItem(defvalue=True,
                                        doc="Flag indicating if Condor nodes have shared filesystem"),
        "universe": SimpleItem(defvalue="vanilla",
                               doc="Type of execution environment to be used by Condor"),
        "globusscheduler": SimpleItem(defvalue="", doc="Globus scheduler to be used (required for Condor-G submission)"),
        "globus_rsl": SimpleItem(defvalue="",
                                 doc="Globus RSL settings (for Condor-G submission)"),

    })

    _category = "backends"
    _name = "Condor"
    statusDict = \
        {
            "0": "Unexpanded",
            "1": "Idle",
            "2": "Running",
            "3": "Removed",
            "4": "Completed",
            "5": "Held"
        }

    def __init__(self):
        super(Condor, self).__init__()

    def submit(self, jobconfig, master_input_sandbox):
        """Submit job to backend.

            Return value: True if job is submitted successfully,
                          or False otherwise"""

        cdfpath = self.preparejob(jobconfig, master_input_sandbox)
        status = self.submit_cdf(cdfpath)
        return status

    def submit_cdf(self, cdfpath=""):
        """Submit Condor Description File.

            Argument other than self:
               cdfpath - path to Condor Description File to be submitted

            Return value: True if job is submitted successfully,
                          or False otherwise"""

        commandList = ["condor_submit -v"]
        commandList.extend(self.submit_options)
        commandList.append(cdfpath)
        commandString = " ".join(commandList)

        status, output = commands.getstatusoutput(commandString)

        self.id = ""
        if 0 != status:
            logger.error\
                ("Tried submitting job with command: '%s'" % commandString)
            logger.error("Return code: %s" % str(status))
            logger.error("Condor output:")
            logger.error(output)
        else:
            tmpList = output.split("\n")
            for item in tmpList:
                if 1 + item.find("** Proc"):
                    localId = item.strip(":").split()[2]
                    queryCommand = " ".join\
                        (["condor_q -format \"%s\" GlobalJobId", localId])
                    qstatus, qoutput = commands.getstatusoutput(queryCommand)
                    if 0 != status:
                        logger.warning\
                            ("Problem determining global id for Condor job '%s'" %
                             localId)
                        self.id = localId
                    else:
                        self.id = qoutput
                    break

        return not self.id is ""

    def resubmit(self):
        """Resubmit job that has already been configured.

            Return value: True if job is resubmitted successfully,
                          or False otherwise"""

        job = self.getJobObject()

        inpDir = job.getInputWorkspace().getPath()
        outDir = job.getOutputWorkspace().getPath()

        # Delete any existing output files, and recreate output directory
        if os.path.isdir(outDir):
            shutil.rmtree(outDir)
        if os.path.exists(outDir):
            os.remove(outDir)
        os.mkdir(outDir)

        # Determine path to job's Condor Description File
        cdfpath = os.path.join(inpDir, "__cdf__")

        # Resubmit job
        if os.path.exists(cdfpath):
            status = self.submit_cdf(cdfpath)
        else:
            logger.warning\
                ("No Condor Description File for job '%s' found in '%s'" %
                 (str(job.id), inpDir))
            logger.warning("Resubmission failed")
            status = False

        return status

    def kill(self):
        """Kill running job.

           No arguments other than self

           Return value: True if job killed successfully,
                         or False otherwise"""

        job = self.getJobObject()

        if not self.id:
            logger.warning("Job %s not running" % job.id)
            return False

        idElementList = job.backend.id.split("#")
        if 3 == len(idElementList):
            if idElementList[1].find(".") != -1:
                killCommand = "condor_rm -name %s %s" % \
                    (idElementList[0], idElementList[1])
            else:
                killCommand = "condor_rm -name %s %s" % \
                    (idElementList[0], idElementList[2])
        else:
            killCommand = "condor_rm %s" % (idElementList[0])

        status, output = commands.getstatusoutput(killCommand)

        if (status != 0):
            logger.warning\
                ("Return code '%s' killing job '%s' - Condor id '%s'" %
                 (str(status), job.id, job.backend.id))
            logger.warning("Tried command: '%s'" % killCommand)
            logger.warning("Command output: '%s'" % output)
            logger.warning("Anyway continuing with job removal")

        job.backend.status = "Removed"
        killStatus = True

        return killStatus

    def preparejob(self, jobconfig, master_input_sandbox):
        """Prepare Condor description file"""

        job = self.getJobObject()
        inbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles())
        inpDir = job.getInputWorkspace().getPath()
        outDir = job.getOutputWorkspace().getPath()

        infileList = []

        exeString = jobconfig.getExeString().strip()
        quotedArgList = []
        for arg in jobconfig.getArgStrings():
            quotedArgList.append("\\'%s\\'" % arg)
        exeCmdString = " ".join([exeString] + quotedArgList)

        for filePath in inbox:
            if not filePath in infileList:
                infileList.append(filePath)

        for filePath in master_input_sandbox:
            if not filePath in infileList:
                infileList.append(filePath)

        fileList = []
        for filePath in infileList:
            fileList.append(os.path.basename(filePath))

        if job.name:
            name = job.name
        else:
            name = job.application._name
        name = "_".join(name.split())
        wrapperName = "_".join(["Ganga", str(job.id), name])

        commandList = [
            "#!/usr/bin/env python",
            "from __future__ import print_function",
            "# Condor job wrapper created by Ganga",
            "# %s" % (time.strftime("%c")),
            "",
            inspect.getsource(Sandbox.WNSandbox),
            "",
            "import os",
            "import time",
            "",
            "startTime = time.strftime"
            + "( '%a %d %b %H:%M:%S %Y', time.gmtime( time.time() ) )",
            "",
            "for inFile in %s:" % str(fileList),
            "   getPackedInputSandbox( inFile )",
            "",
            "exePath = '%s'" % exeString,
            "if os.path.isfile( '%s' ):" % os.path.basename(exeString),
            "   os.chmod( '%s', 0755 )" % os.path.basename(exeString),
            "wrapperName = '%s_bash_wrapper.sh'" % wrapperName,
            "wrapperFile = open( wrapperName, 'w' )",
            "wrapperFile.write( '#!/bin/bash\\n' )",
            "wrapperFile.write( 'echo \"\"\\n' )",
            "wrapperFile.write( 'echo \"Hostname: $(hostname -f)\"\\n' )",
            "wrapperFile.write( 'echo \"\\${BASH_ENV}: ${BASH_ENV}\"\\n' )",
            "wrapperFile.write( 'if ! [ -z \"${BASH_ENV}\" ]; then\\n' )",
            "wrapperFile.write( '  if ! [ -f \"${BASH_ENV}\" ]; then\\n' )",
            "wrapperFile.write( '    echo \"*** Warning: "
            + "\\${BASH_ENV} file not found ***\"\\n' )",
            "wrapperFile.write( '  fi\\n' )",
            "wrapperFile.write( 'fi\\n' )",
            "wrapperFile.write( 'echo \"\"\\n' )",
            "wrapperFile.write( '%s\\n' )" % exeCmdString,
            "wrapperFile.write( 'exit ${?}\\n' )",
            "wrapperFile.close()",
            "os.chmod( wrapperName, 0755 )",
            "result = os.system( './%s' % wrapperName )",
            "os.remove( wrapperName )",
            "",
            "endTime = time.strftime"
              + "( '%a %d %b %H:%M:%S %Y', time.gmtime( time.time() ) )",
            "print('\\nJob start: ' + startTime)",
            "print('Job end: ' + endTime)",
            "print('Exit code: %s' % str( result ))"
        ]

        commandString = "\n".join(commandList)
        wrapper = job.getInputWorkspace().writefile\
            (FileBuffer(wrapperName, commandString), executable=1)

        infileString = ",".join(infileList)
        outfileString = ",".join(jobconfig.outputbox)

        cdfDict = \
            {
                'universe': self.universe,
                'on_exit_remove': 'True',
                'should_transfer_files': 'YES',
                'when_to_transfer_output': 'ON_EXIT_OR_EVICT',
                'executable': wrapper,
                'transfer_executable': 'True',
                'notification': 'Never',
                'rank': self.rank,
                'initialdir': outDir,
                'error': 'stderr',
                'output': 'stdout',
                'log': 'condorLog',
                'stream_output': 'false',
                'stream_error': 'false',
                'getenv': self.getenv
            }

        envList = []
        if self.env:
            for key in self.env.keys():
                value = self.env[key]
                if (isinstance(value, str)):
                    value = os.path.expandvars(value)
                else:
                    value = str(value)
                envList.append("=".join([key, value]))
        envString = ";".join(envList)
        if jobconfig.env:
            for key in jobconfig.env.keys():
                value = jobconfig.env[key]
                if (isinstance(value, str)):
                    value = os.path.expandvars(value)
                else:
                    value = str(value)
                envList.append("=".join([key, value]))
        envString = ";".join(envList)
        if envString:
            cdfDict['environment'] = envString

        if infileString:
            cdfDict['transfer_input_files'] = infileString

        if self.globusscheduler:
            cdfDict['globusscheduler'] = self.globusscheduler

        if self.globus_rsl:
            cdfDict['globus_rsl'] = self.globus_rsl

        if outfileString:
            cdfDict['transfer_output_files'] = outfileString

        cdfList = [
            "# Condor Description File created by Ganga",
            "# %s" % (time.strftime("%c")),
            ""]
        for key, value in cdfDict.iteritems():
            cdfList.append("%s = %s" % (key, value))
        cdfList.append(self.requirements.convert())
        cdfList.append("queue")
        cdfString = "\n".join(cdfList)

        return job.getInputWorkspace().writefile\
            (FileBuffer("__cdf__", cdfString))

    def updateMonitoringInformation(jobs):

        jobDict = {}
        for job in jobs:
            if job.backend.id:
                jobDict[job.backend.id] = job

        idList = jobDict.keys()

        if not idList:
            return

        queryCommand = " ".join\
            ([
                "condor_q -global" if getConfig(
                    "Condor")["query_global_queues"] else "condor_q",
                "-format \"%s \" GlobalJobId",
                "-format \"%s \" RemoteHost",
                "-format \"%d \" JobStatus",
                "-format \"%f\\n\" RemoteUserCpu"
            ])
        status, output = commands.getstatusoutput(queryCommand)
        if 0 != status:
            logger.error("Problem retrieving status for Condor jobs")
            return

        if ("All queues are empty" == output):
            infoList = []
        else:
            infoList = output.split("\n")

        allDict = {}
        for infoString in infoList:
            tmpList = infoString.split()
            id, host, status, cputime = ("", "", "", "")
            if 3 == len(tmpList):
                id, status, cputime = tmpList
            if 4 == len(tmpList):
                id, host, status, cputime = tmpList
            if id:
                allDict[id] = {}
                allDict[id]["status"] = Condor.statusDict[status]
                allDict[id]["cputime"] = cputime
                allDict[id]["host"] = host

        fg = Foreground()
        fx = Effects()
        status_colours = {'submitted': fg.orange,
                          'running': fg.green,
                          'completed': fg.blue}

        for id in idList:

            printStatus = False
            if jobDict[id].status == "killed":
                continue

            localId = id.split("#")[-1]
            globalId = id

            if globalId == localId:
                queryCommand = " ".join\
                    ([
                        "condor_q -global" if getConfig(
                            "Condor")["query_global_queues"] else "condor_q",
                        "-format \"%s\" GlobalJobId",
                        id
                    ])
                status, output = commands.getstatusoutput(queryCommand)
                if 0 == status:
                    globalId = output

            if globalId in allDict.keys():
                status = allDict[globalId]["status"]
                host = allDict[globalId]["host"]
                cputime = allDict[globalId]["cputime"]
                if status != jobDict[id].backend.status:
                    printStatus = True
                    stripProxy(jobDict[id])._getWriteAccess()
                    jobDict[id].backend.status = status
                    if jobDict[id].backend.status == "Running":
                        jobDict[id].updateStatus("running")

                if host:
                    if jobDict[id].backend.actualCE != host:
                        jobDict[id].backend.actualCE = host
                jobDict[id].backend.cputime = cputime
            else:
                jobDict[id].backend.status = ""
                outDir = jobDict[id].getOutputWorkspace().getPath()
                condorLogPath = "".join([outDir, "condorLog"])
                checkExit = True
                if os.path.isfile(condorLogPath):
                    checkExit = False
                    for line in open(condorLogPath):
                        if -1 != line.find("terminated"):
                            checkExit = True
                            break
                        if -1 != line.find("aborted"):
                            checkExit = True
                            break

                if checkExit:
                    printStatus = True
                    stdoutPath = "".join([outDir, "stdout"])
                    jobStatus = "failed"
                    if os.path.isfile(stdoutPath):
                        with open(stdoutPath) as stdout:
                            lineList = stdout.readlines()
                        try:
                            exitLine = lineList[-1]
                            exitCode = exitLine.strip().split()[-1]
                        except IndexError:
                            exitCode = -1

                        if exitCode.isdigit():
                            jobStatus = "completed"
                        else:
                            logger.error("Problem extracting exit code from job %s. Line found was '%s'." % (
                                jobDict[id].fqid, exitLine))

                    jobDict[id].updateStatus(jobStatus)

            if printStatus:
                if jobDict[id].backend.actualCE:
                    hostInfo = jobDict[id].backend.actualCE
                else:
                    hostInfo = "Condor"
                status = jobDict[id].status
                if status in status_colours:
                    colour = status_colours[status]
                else:
                    colour = fg.magenta
                if "submitted" == status:
                    preposition = "to"
                else:
                    preposition = "on"

                if jobDict[id].backend.status:
                    backendStatus = "".join\
                        ([" (", jobDict[id].backend.status, ") "])
                else:
                    backendStatus = ""

                logger.info(colour + 'Job %s %s%s %s %s - %s' + fx.normal,
                            jobDict[
                                id].fqid, status, backendStatus, preposition, hostInfo,
                            time.strftime('%c'))

        return None

    updateMonitoringInformation = \
        staticmethod(updateMonitoringInformation)
Пример #12
0
class LocalFile(IGangaFile):
    """LocalFile represents base class for output files, such as MassStorageFile, LCGSEFile, etc 
    """
    _schema = Schema(
        Version(1, 1), {
            'namePattern':
            SimpleItem(defvalue="", doc='pattern of the file name'),
            'localDir':
            SimpleItem(
                defvalue="",
                doc=
                'local dir where the file is stored, used from get and put methods'
            ),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          sequence=1,
                          copyable=0,
                          doc="collected files from the wildcard namePattern"),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=[bool],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere'
            ),
        })
    _category = 'gangafiles'
    _name = "LocalFile"
    _exportmethods = ["location", "remove", "accessURL"]

    def __init__(self, namePattern='', localDir='', **kwds):
        """ name is the name of the output file that is going to be processed
            in some way defined by the derived class
        """
        super(LocalFile, self).__init__()

        self.tmp_pwd = None

        if isinstance(namePattern, str):
            self.namePattern = namePattern
            if localDir:
                self.localDir = localDir
        elif isinstance(namePattern, File):
            self.namePattern = path.basename(namePattern.name)
            self.localDir = path.dirname(namePattern.name)
        elif isinstance(namePattern, FileBuffer):
            namePattern.create()
            self.namePattern = path.basename(namePattern.name)
            self.localDir = path.dirname(namePattern.name)
        else:
            logger.error(
                "Unkown type: %s . Cannot Create LocalFile from this!" %
                type(namePattern))

    def __setattr__(self, attr, value):
        """
        This is an overloaded setter method to make sure that we're auto-expanding the filenames of files which exist.
        In the case we're assigning any other attributes the value is simply passed through
        Args:
            attr (str): This is the name of the attribute which we're assigning
            value (unknown): This is the value being assigned.
        """
        actual_value = value
        if attr == 'namePattern':
            if len(value.split(os.sep)) > 1:
                this_dir = path.dirname(value)
                if this_dir:
                    self.localDir = this_dir
                elif path.isfile(path.join(os.getcwd(), path.basename(value))):
                    self.localDir = os.getcwd()
            actual_value = path.basename(value)
        elif attr == 'localDir':
            if value:
                new_value = path.abspath(expandfilename(value))
                if path.exists(new_value):
                    actual_value = new_value

        super(LocalFile, self).__setattr__(attr, actual_value)

    def __repr__(self):
        """Get the representation of the file."""
        return "LocalFile(namePattern='%s', localDir='%s')" % (
            self.namePattern, self.localDir)

    def location(self):
        return self.getFilenameList()

    def accessURL(self):
        URLs = []
        for file in self.location():
            URLs.append('file://' + path.join(os.sep, file))
        return URLs

    def setLocation(self):
        """This collects the subfiles for wildcarded output LocalFile"""
        import glob

        fileName = self.namePattern

        if self.compressed:
            fileName = '%s.gz' % self.namePattern

        sourceDir = self.getJobObject().outputdir

        if self.localDir:
            fileName = path.join(self.localDir, fileName)

        for currentFile in glob.glob(path.join(sourceDir, fileName)):

            base_name = path.basename(currentFile)

            d = LocalFile(base_name)
            d.compressed = self.compressed

            self.subfiles.append(d)

    def processWildcardMatches(self):

        if self.subfiles:
            return self.subfiles

        import glob

        fileName = self.namePattern

        if self.compressed:
            fileName = '%s.gz' % self.namePattern

        sourceDir = self.localDir

        if regex.search(fileName) is not None:
            for currentFile in glob.glob(path.join(sourceDir, fileName)):
                d = LocalFile(namePattern=path.basename(currentFile),
                              localDir=path.dirname(currentFile))
                d.compressed = self.compressed

                self.subfiles.append(d)

    def getFilenameList(self):
        """Return the files referenced by this LocalFile"""
        filelist = []
        self.processWildcardMatches()
        if self.subfiles:
            for f in self.subfiles:
                filelist.append(path.join(f.localDir, f.namePattern))
        else:
            if path.exists(path.join(self.localDir, self.namePattern)):
                logger.debug("File: %s found, Setting localDir: %s" %
                             (self.namePattern, self.localDir))

            filelist.append(path.join(self.localDir, self.namePattern))

        return filelist

    def hasMatchedFiles(self):
        """
        OK for checking subfiles but of no wildcards, need to actually check file exists
        """

        # check for subfiles
        if len(self.subfiles) > 0:
            # we have subfiles so we must have actual files associated
            return True
        else:
            if self.containsWildcards():
                return False

        # check if single file exists (no locations field to try)
        job = self.getJobObject()
        fname = self.namePattern
        if self.compressed:
            fname += ".gz"

        if path.isfile(path.join(job.getOutputWorkspace().getPath(), fname)):
            return True

        return False

    def remove(self):

        for this_file in self.getFilenameList():
            _actual_delete = False
            keyin = None
            while keyin is None:
                keyin = raw_input(
                    "Do you want to remove the LocalFile: %s ? ([y]/n) " %
                    this_file)
                if keyin.lower() in ['y', '']:
                    _actual_delete = True
                elif keyin.lower() == 'n':
                    _actual_delete = False
                else:
                    logger.warning("y/n please!")
                    keyin = None
            if _actual_delete:
                if not path.exists(this_file):
                    logger.warning("File %s did not exist, can't delete" %
                                   this_file)
                else:
                    logger.info("Deleting: %s" % this_file)

                    import time
                    remove_filename = this_file + "_" + str(
                        time.time()) + '__to_be_deleted_'
                    try:
                        os.rename(this_file, remove_filename)
                    except Exception as err:
                        logger.warning(
                            "Error in first stage of removing file: %s" %
                            this_file)
                        remove_filename = this_file

                    try:
                        os.remove(remove_filename)
                    except OSError as err:
                        if err.errno != errno.ENOENT:
                            logger.error("Error in removing file: %s" %
                                         remove_filename)
                            raise
                        pass

        return

    def internalCopyTo(self, targetPath):
        """
        Copy a the file to the local storage using the get mechanism
        Args:
            targetPath (str): Target path where the file is to copied to
        """
        for currentFile in glob.glob(
                os.path.join(self.localDir, self.namePattern)):
            shutil.copy(currentFile,
                        path.join(targetPath, path.basename(currentFile)))

    def get(self):
        """
        Method to get the Local file and/or to check that a file exists locally
        """
        # Deliberately do nothing.

    def put(self):
        """
        Copy the file to the detination (in the case of LocalFile the localDir)
        """
        # This is useful for placing the LocalFile in a subdir at the end of a job

        #FIXME this method should be written to work with some other parameter than localDir for job outputs but for now this 'works'
        if self.localDir:
            try:
                job = self.getJobObject()
            except AssertionError as err:
                return

            # Copy to 'desitnation'

            if path.isfile(path.join(job.outputdir, self.namePattern)):
                if not path.exists(path.join(job.outputdir, self.localDir)):
                    os.makedirs(path.join(job.outputdir, self.localDir))
                shutil.copy(
                    path.join(job.outputdir, self.namePattern),
                    path.join(job.outputdir, self.localDir, self.namePattern))

    def cleanUpClient(self):
        """
        This performs the cleanup method on the client output workspace to remove temporary files
        """
        # For LocalFile this is where the file is stored so don't remove it
        pass

    def getWNScriptDownloadCommand(self, indent):

        # create symlink
        shortScript = """
# create symbolic links for LocalFiles
for f in ###FILELIST###:
    if not os.path.exists(os.path.basename(f)):
        os.symlink(f, os.path.basename(f))
"""
        from Ganga.GPIDev.Lib.File import FileUtils
        shortScript = FileUtils.indentScript(shortScript, '###INDENT###')

        shortScript = shortScript.replace('###FILELIST###',
                                          "%s" % self.getFilenameList())

        return shortScript

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip,
                            postProcessLocationsFP):

        cp_template = """
###INDENT###os.system("###CP_COMMAND###")
"""
        script = ""

        j = self.getJobObject()
        output_dir = j.getOutputWorkspace(create=True).getPath()

        for this_file in outputFiles:
            filename = this_file.namePattern
            cp_cmd = 'cp %s %s' % (filename, quote(output_dir))

            this_cp = cp_template

            replace_dict = {'###INDENT###': indent, '###CP_COMMAND###': cp_cmd}

            for k, v in replace_dict.iteritems():
                this_cp = this_cp.replace(k, v)

            script = this_cp
            break

        return script
Пример #13
0
class MultiPostProcessor(IPostProcessor):

    """
    Contains and executes many postprocessors. This is the object which is attached to a job.
    Should behave like a list to the user.
    """

    _category = 'postprocessor'
    #_exportmethods = ['__add__', '__get__', '__str__', '__getitem__', 'append', 'remove']
    _exportmethods = ['__add__', '__get__', '__getitem__', '__len__', 'append', 'remove']
    _name = 'MultiPostProcessor'
    _schema = Schema(Version(1, 0), {
        'process_objects': ComponentItem('postprocessor', defvalue=[], hidden=1, doc='A list of Processors to run', sequence=1)
    })

    def __init__(self, *args):
        super(MultiPostProcessor, self).__init__()

        for process in args:
            if isinstance(process, MultiPostProcessor):
                for process_ in process.process_objects:
                    self.addProcess(process_)
            elif isinstance(process, (list, tuple, GangaList)):
                for process_ in process:
                    self.addProcess(process_)
            else:
                self.addProcess(process)

        if hasattr(self.process_objects, 'order'):
            self.process_objects = sorted(self.process_objects, key=lambda process: process.order)

    def __str__(self):
        if not isType(self.process_objects, GangaObject):
            return str(self.process_objects)
        else:
            return str(GPIProxyObjectFactory(self.process_objects))

    def append(self, value):
        self.addProcess(value)
        self.process_objects = sorted(
            self.process_objects, key=lambda process: process.order)

    def remove(self, value):
        for process in self.process_objects:
            if (isType(value, type(process)) == True):
                self.process_objects.remove(process)
                break

    def __get__(self):
        return GPIProxyObjectFactory(self.process_objects)

    def __getitem__(self, i):
        return GPIProxyObjectFactory(self.process_objects[i])

    def execute(self, job, newstatus, **options):
        # run the merger objects one at a time
        process_results = []
        for p in self.process_objects:
            # stop infinite recursion
            if p is self:
                continue
            # execute all postprocessors
            process_result = p.execute(job, newstatus, **options)
            if process_result == False:
                newstatus = 'failed'
            process_results.append(process_result)
        # if one fails then we all fail
        return not False in process_results

    def addProcess(self, process_object):
        """Adds a process object to the list of processes to be done."""
        self.process_objects.append(process_object)

    def __len__(self):
        return len(self.process_objects)

    def printSummaryTree(self, level=0, verbosity_level=0, whitespace_marker='', out=None, selection='', interactive=False):
        """If this method is overridden, the following should be noted:

        level: the hierachy level we are currently at in the object tree.
        verbosity_level: How verbose the print should be. Currently this is always 0.
        whitespace_marker: If printing on multiple lines, this allows the default indentation to be replicated.
                           The first line should never use this, as the substitution is 'name = %s' % printSummaryTree()
        out: An output stream to print to. The last line of output should be printed without a newline.'
        selection: See VPrinter for an explaintion of this.
        """
        out.write(str(self.process_objects))
Пример #14
0
class LogicalFile(DiracFile):
    #  Logical File schema
    # Observing the 'old' 1.0 schema whilst preserving backwards compatability
    # with the fact that we're translating the object into a DiracFile in this
    # case
    _schema = Schema(
        Version(1, 0), {
            'name':
            SimpleItem(
                defvalue="",
                doc='the LFN filename a LogicalFile is constructed with'),
            'namePattern':
            SimpleItem(
                defvalue="", doc='pattern of the file name', transient=1),
            'localDir':
            SimpleItem(
                defvalue=None,
                copyable=1,
                typelist=['str', 'type(None)'],
                doc=
                'local dir where the file is stored, used from get and put methods',
                transient=1),
            'remoteDir':
            SimpleItem(
                defvalue="",
                doc=
                'remote directory where the LFN is to be placed in the dirac base directory by the put method.',
                transient=1),
            'locations':
            SimpleItem(
                defvalue=[],
                copyable=1,
                typelist=['str'],
                sequence=1,
                doc="list of SE locations where the outputfiles are uploaded",
                transient=1),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=['bool'],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere',
                transient=1),
            'lfn':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the logical file name/set the logical file name to use if not '
                'using wildcards in namePattern',
                transient=1),
            'guid':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the GUID/set the GUID to use if not using wildcards in the namePattern.',
                transient=1),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          sequence=1,
                          copyable=0,
                          typelist=['GangaDirac.Lib.Files.DiracFile'],
                          doc="collected files from the wildcard namePattern",
                          transient=1),
            'failureReason':
            SimpleItem(defvalue="",
                       protected=1,
                       copyable=0,
                       doc='reason for the upload failure',
                       transient=1)
        })
    _name = "LogicalFile"

    # TODO:  Add warning to User NOT to create these objects themselves and that they should
    #       only be used for backwards compatability to load old jobs

    def __init__(self, name=""):

        super(LogicalFile, self).__init__(lfn=name)

        self.name = name

        logger.warning(
            "!!! LogicalFile has been deprecated, this is now just a wrapper to the DiracFile object"
        )
        logger.warning(
            "!!! Please update your scripts before LogicalFile is removed")

        self._setLFNnamePattern(_lfn=self.name, _namePattern='')

    def __setstate__(self, dict):
        super(LogicalFile, self).__setstate__(dict)
        self._setLFNnamePattern(_lfn=self.name, _namePattern='')

    def __construct__(self, args):

        if len(args) >= 1:
            self.name = args[0]
            self._setLFNnamePattern(_lfn=self.name, _namePattern='')

        if (len(args) != 1) or (type(args[0]) is not type('')):
            super(LogicalFile, self).__construct__(args)
        else:
            self.name = strip_filename(args[0])

    def __setattr__(self, name, value):

        if name == "name":
            #elf.name = value
            self.lfn = value
            import os.path
            self.namePattern = os.path.basename(value)
            self.remoteDir = os.path.dirname(value)
        super(LogicalFile, self).__setattr__(name, value)

    def _attribute_filter__set__(self, attrib_name, value):
        if attrib_name == "name":
            self._setLFNnamePattern(lfn=value, namePattern='')
        return super(LogicalFile,
                     self)._attribute_filter__set__(attrib_name, value)
Пример #15
0
class IUnit(GangaObject):
    _schema = Schema(
        Version(1, 0), {
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       copyable=0,
                       doc='Status - running, pause or completed',
                       typelist=["str"]),
            'name':
            SimpleItem(defvalue='Simple Unit',
                       doc='Name of the unit (cosmetic)',
                       typelist=["str"]),
            'application':
            ComponentItem('applications',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Application of the Transform.'),
            'inputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Input dataset'),
            'outputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Output dataset'),
            'active':
            SimpleItem(defvalue=False, hidden=1, doc='Is this unit active'),
            'active_job_ids':
            SimpleItem(defvalue=[],
                       typelist=['int'],
                       sequence=1,
                       hidden=1,
                       doc='Active job ids associated with this unit'),
            'prev_job_ids':
            SimpleItem(defvalue=[],
                       typelist=['int'],
                       sequence=1,
                       hidden=1,
                       doc='Previous job ids associated with this unit'),
            'minor_resub_count':
            SimpleItem(defvalue=0, hidden=1, doc='Number of minor resubmits'),
            'major_resub_count':
            SimpleItem(defvalue=0, hidden=1, doc='Number of major resubmits'),
            'req_units':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                hidden=1,
                doc=
                'List of units that must complete for this to start (format TRF_ID:UNIT_ID)'
            ),
            'start_time':
            SimpleItem(
                defvalue=0,
                hidden=1,
                doc='Start time for this unit. Allows a delay to be put in'),
            'copy_output':
            ComponentItem(
                'datasets',
                defvalue=None,
                load_default=0,
                optional=1,
                doc=
                'The dataset to copy the output of this unit to, e.g. Grid dataset -> Local Dataset'
            ),
            'merger':
            ComponentItem('mergers',
                          defvalue=None,
                          load_default=0,
                          optional=1,
                          doc='Merger to be run after this unit completes.'),
            'splitter':
            ComponentItem('splitters',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Splitter used on each unit of the Transform.'),
            'postprocessors':
            ComponentItem(
                'postprocessor',
                defvalue=None,
                doc='list of postprocessors to run after job has finished'),
            'inputsandbox':
            FileItem(defvalue=[],
                     typelist=['str', 'Ganga.GPIDev.Lib.File.File.File'],
                     sequence=1,
                     doc="list of File objects shipped to the worker node "),
            'inputfiles':
            GangaFileItem(
                defvalue=[],
                typelist=[
                    'str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'
                ],
                sequence=1,
                doc=
                "list of file objects that will act as input files for a job"),
            'outputfiles':
            GangaFileItem(
                defvalue=[],
                typelist=[
                    'str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'
                ],
                sequence=1,
                doc="list of OutputFile objects to be copied to all jobs"),
            'info':
            SimpleItem(defvalue=[],
                       typelist=['str'],
                       protected=1,
                       sequence=1,
                       doc="Info showing status transitions and unit info"),
            'id':
            SimpleItem(defvalue=-1,
                       protected=1,
                       doc='ID of the Unit',
                       typelist=["int"]),
        })

    _category = 'units'
    _name = 'IUnit'
    _exportmethods = []
    _hidden = 0

    # Special methods:
    def __init__(self):
        super(IUnit, self).__init__()
        self.updateStatus("new")

    def _readonly(self):
        """A unit is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def validate(self):
        """Validate that this unit is OK and set it to active"""
        self.active = True
        return True

    def getID(self):
        """Get the ID of this unit within the transform"""

        # if the id isn't already set, use the index from the parent Task
        if self.id < 0:
            trf = self._getParent()
            if not trf:
                raise ApplicationConfigurationError(
                    None,
                    "This unit has not been associated with a transform and so there is no ID available"
                )
            self.id = trf.units.index(self)

        return self.id

    def updateStatus(self, status):
        """Update status hook"""
        addInfoString(
            self, "Status change from '%s' to '%s'" % (self.status, status))
        self.status = status

    def createNewJob(self):
        """Create any jobs required for this unit"""
        pass

    def checkCompleted(self, job):
        """Check if this unit is complete"""
        if job.status == "completed":
            return True
        else:
            return False

    def checkForSubmission(self):
        """Check if this unit should submit a job"""

        # check the delay
        if time.time() < self.start_time:
            return False

        # check if we already have a job
        if len(self.active_job_ids) != 0:
            return False

        # if we're using threads, check the max number
        if self._getParent(
        ).submit_with_threads and GPI.queues.totalNumUserThreads(
        ) > self._getParent().max_active_threads:
            return False

        return True

    def checkForResubmission(self):
        """check if this unit should be resubmitted"""

        # check if we already have a job
        if len(self.active_job_ids) == 0:
            return False
        else:
            job = GPI.jobs(self.active_job_ids[0])
            if job.status in ["failed", "killed"]:
                return True

            return False

    def checkParentUnitsAreComplete(self):
        """Check to see if the parent units are complete"""
        req_ok = True
        task = self._getParent()._getParent()
        for req in self.req_units:
            req_trf_id = int(req.split(":")[0])

            if req.find("ALL") == -1:
                req_unit_id = int(req.split(":")[1])
                if task.transforms[req_trf_id].units[
                        req_unit_id].status != "completed":
                    req_ok = False

            else:
                # need all units from this trf
                for u in task.transforms[req_trf_id].units:
                    if u.status != "completed":
                        req_ok = False

        return req_ok

    def checkMajorResubmit(self, job):
        """check if this job needs to be fully rebrokered or not"""
        pass

    def majorResubmit(self, job):
        """perform a mjor resubmit/rebroker"""
        self.prev_job_ids.append(job.id)
        self.active_job_ids.remove(job.id)

    def minorResubmit(self, job):
        """perform just a minor resubmit"""
        try:
            trf = self._getParent()
        except Exception as err:
            logger.debug("GetParent exception!\n%s" % str(err))
            trf = None
        if trf is not None and trf.submit_with_threads:
            addInfoString(self, "Attempting job re-submission with queues...")
            GPI.queues.add(job.resubmit)
        else:
            addInfoString(self, "Attempting job re-submission...")
            job.resubmit()

    def update(self):
        """Update the unit and (re)submit jobs as required"""
        #logger.warning("Entered Unit %d update function..." % self.getID())

        # if we're complete, then just return
        if self.status in ["completed", "recreating"] or not self.active:
            return 0

        # check if submission is needed
        task = self._getParent()._getParent()
        trf = self._getParent()
        maxsub = task.n_tosub()

        # check parent unit(s)
        req_ok = self.checkParentUnitsAreComplete()

        # set the start time if not already set
        if len(self.req_units) > 0 and req_ok and self.start_time == 0:
            self.start_time = time.time() + trf.chain_delay * 60 - 1

        if req_ok and self.checkForSubmission() and maxsub > 0:

            # create job and submit
            addInfoString(self, "Creating Job...")
            j = self.createNewJob()
            if j.name == '':
                j.name = "T%i:%i U%i" % (task.id, trf.getID(), self.getID())

            try:
                if trf.submit_with_threads:
                    addInfoString(self,
                                  "Attempting job submission with queues...")
                    GPI.queues.add(j.submit)
                else:
                    addInfoString(self, "Attempting job submission...")
                    j.submit()

            except Exception as err:
                logger.debug("update Err: %s" % str(err))
                addInfoString(self, "Failed Job Submission")
                addInfoString(self, "Reason: %s" % (formatTraceback()))
                logger.error("Couldn't submit the job. Deactivating unit.")
                self.prev_job_ids.append(j.id)
                self.active = False
                trf._setDirty()  # ensure everything's saved
                return 1

            self.active_job_ids.append(j.id)
            self.updateStatus("running")
            trf._setDirty()  # ensure everything's saved

            if trf.submit_with_threads:
                return 0

            return 1

        # update any active jobs
        for jid in self.active_job_ids:

            # we have an active job so see if this job is OK and resubmit if
            # not
            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("Update2 Err: %s" % str(err))
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            if job.status == "completed":

                # check if actually completed
                if not self.checkCompleted(job):
                    return 0

                # check for DS copy
                if trf.unit_copy_output:
                    if not self.copy_output:
                        trf.createUnitCopyOutputDS(self.getID())

                    if not self.copyOutput():
                        return 0

                # check for merger
                if trf.unit_merger:
                    if not self.merger:
                        self.merger = trf.createUnitMerger(self.getID())

                    if not self.merge():
                        return 0

                # all good so mark unit as completed
                self.updateStatus("completed")

            elif job.status == "failed" or job.status == "killed":

                # check for too many resubs
                if self.minor_resub_count + self.major_resub_count > trf.run_limit - 1:
                    logger.error(
                        "Too many resubmits (%i). Deactivating unit." %
                        (self.minor_resub_count + self.major_resub_count))
                    addInfoString(
                        self, "Deactivating unit. Too many resubmits (%i)" %
                        (self.minor_resub_count + self.major_resub_count))
                    self.active = False
                    return 0

                rebroker = False

                if self.minor_resub_count > trf.minor_run_limit - 1:
                    if self._getParent().rebroker_on_job_fail:
                        rebroker = True
                    else:
                        logger.error(
                            "Too many minor resubmits (%i). Deactivating unit."
                            % self.minor_resub_count)
                        addInfoString(
                            self,
                            "Deactivating unit. Too many resubmits (%i)" %
                            (self.minor_resub_count + self.minor_resub_count))
                        self.active = False
                        return 0

                if self.major_resub_count > trf.major_run_limit - 1:
                    logger.error(
                        "Too many major resubmits (%i). Deactivating unit." %
                        self.major_resub_count)
                    addInfoString(
                        self, "Deactivating unit. Too many resubmits (%i)" %
                        (self.minor_resub_count + self.major_resub_count))
                    self.active = False
                    return 0

                # check the type of resubmit
                if rebroker or self.checkMajorResubmit(job):

                    self.major_resub_count += 1
                    self.minor_resub_count = 0

                    try:
                        addInfoString(self, "Attempting major resubmit...")
                        self.majorResubmit(job)
                    except Exception as err:
                        logger.debug("Update Err3: %s" % str(err))
                        logger.error(
                            "Couldn't resubmit the job. Deactivating unit.")
                        addInfoString(self, "Failed Job resubmission")
                        addInfoString(self, "Reason: %s" % (formatTraceback()))
                        self.active = False

                    # break the loop now because we've probably changed the
                    # active jobs list
                    return 1
                else:
                    self.minor_resub_count += 1
                    try:
                        addInfoString(self, "Attempting minor resubmit...")
                        self.minorResubmit(job)
                    except Exception as err:
                        logger.debug("Update Err4: %s" % str(err))
                        logger.error(
                            "Couldn't resubmit the job. Deactivating unit.")
                        addInfoString(self, "Failed Job resubmission")
                        addInfoString(self, "Reason: %s" % (formatTraceback()))
                        self.active = False
                        return 1

    def reset(self):
        """Reset the unit completely"""
        addInfoString(self, "Reseting Unit...")
        self.minor_resub_count = 0
        self.major_resub_count = 0
        if len(self.active_job_ids) > 0:
            self.prev_job_ids += self.active_job_ids
        self.active_job_ids = []

        self.active = True

        # if has parents, set to recreate
        if len(self.req_units) > 0:
            self.updateStatus("recreating")
        else:
            self.updateStatus("running")

    # Info routines
    def n_active(self):

        if self.status == 'completed':
            return 0

        tot_active = 0
        active_states = ['submitted', 'running']

        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_active Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) > 0:
                    for sj_stat in j.getNodeIndexCache()['subjobs:status']:
                        if sj_stat in active_states:
                            tot_active += 1
                else:
                    if j.getNodeIndexCache()['status'] in active_states:
                        tot_active += 1
            else:
                #logger.warning("WARNING: (active check) No index cache for job object %d" % jid)
                if j.status in active_states:
                    if j.subjobs:
                        for sj in j.subjobs:
                            if sj.status in active_states:
                                tot_active += 1
                    else:
                        tot_active += 1

        return tot_active

    def n_status(self, status):
        tot_active = 0
        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_status Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) > 0:
                    for sj_stat in j.getNodeIndexCache()['subjobs:status']:
                        if sj_stat == status:
                            tot_active += 1
                else:
                    if j.getNodeIndexCache()['status'] == status:
                        tot_active += 1

            else:
                #logger.warning("WARNING: (status check) No index cache for job object %d" % jid)
                if j.subjobs:
                    for sj in j.subjobs:
                        if sj.status == status:
                            tot_active += 1
                else:
                    if j.status == status:
                        tot_active += 1

        return tot_active

    def n_all(self):
        total = 0
        for jid in self.active_job_ids:

            try:
                job = GPI.jobs(jid)
            except Exception as err:
                logger.debug("n_all Err: %s" % str(err))
                task = self._getParent()._getParent()
                trf = self._getParent()
                logger.warning(
                    "Cannot find job with id %d. Maybe reset this unit with: tasks(%d).transforms[%d].resetUnit(%d)"
                    % (jid, task.id, trf.getID(), self.getID()))
                continue

            j = stripProxy(job)

            # try to preserve lazy loading
            if hasattr(j, 'getNodeIndexCache') and j.getNodeIndexCache(
            ) and 'subjobs:status' in j.getNodeIndexCache():
                if len(j.getNodeIndexCache()['subjobs:status']) != 0:
                    total += len(j.getNodeIndexCache()['subjobs:status'])
                else:
                    total += 1
            else:
                #logger.warning("WARNING: (status check) No index cache for job object %d" % jid)
                if j.subjobs:
                    total = len(j.subjobs)
                else:
                    total = 1

        return total

    def overview(self):
        """Print an overview of this unit"""
        o = "    Unit %d: %s        " % (self.getID(), self.name)

        for s in ["submitted", "running", "completed", "failed", "unknown"]:
            o += markup("%i   " % self.n_status(s), overview_colours[s])

        print(o)

    def copyOutput(self):
        """Copy any output to the given dataset"""
        logger.error(
            "No default implementation for Copy Output - contact plugin developers"
        )
        return False
Пример #16
0
class Remote(IBackend):

    """Remote backend - submit jobs to a Remote pool.

    The remote backend works as an SSH tunnel to a remote site
    where a ganga session is opened and the job submitted there
    using the specified remote_backend. It is (in theory!)
    transparent to the user and should allow submission of any jobs
    to any backends that are already possible in Ganga.

    NOTE: Due to the file transfers required, there can be some slow
    down during submission and monitoring


    E.g. 1 - Hello World example submitted to local backend:

    j = Job(application=Executable(exe='/bin/echo',args=['Hello World']), backend="Remote")
    j.backend.host = "bluebear.bham.ac.uk"                  # Host name
    j.backend.username = "******"                         # User name
    j.backend.ganga_cmd = "/bb/projects/Ganga/runGanga"     # Ganga Command line on remote site
    j.backend.ganga_dir = "/bb/phy/slatermw/gangadir/remote_jobs"  # Where to store the jobs
    j.backend.remote_backend = Local()
    j.submit()


    E.g. 2 - Root example submitted to PBS backend:

    r = Root()
    r.version = '5.14.00'
    r.script = 'gengaus.C'

    j = Job(application=r,backend="Remote")
    j.backend.host = "bluebear.bham.ac.uk"
    j.backend.username = "******"
    j.backend.ganga_cmd = "/bb/projects/Ganga/runGanga"
    j.backend.ganga_dir = "/bb/phy/slatermw/gangadir/remote_jobs"
    j.outputsandbox = ['gaus.txt']
    j.backend.remote_backend = PBS()
    j.submit()


    E.g. 3 - Athena example submitted to LCG backend
    NOTE: you don't need a grid certificate (or UI) available on the local machine,
    just the remote machine

    j = Job()
    j.name='Ex3_2_1'
    j.application=Athena()
    j.application.prepare(athena_compile=False)
    j.application.option_file='/disk/f8b/home/mws/athena/testarea/13.0.40/PhysicsAnalysis/AnalysisCommon/UserAnalysis/run/AthExHelloWorld_jobOptions.py'

    j.backend = Remote()
    j.backend.host = "bluebear.bham.ac.uk"
    j.backend.username = "******"
    j.backend.ganga_cmd = "/bb/projects/Ganga/runGanga"
    j.backend.ganga_dir = "/bb/phy/slatermw/gangadir/remote_jobs"   
    j.backend.environment = {'ATLAS_VERSION' : '13.0.40'}     # Additional environment variables
    j.backend.remote_backend = LCG()
    j.backend.remote_backend.CE = 'epgce2.ph.bham.ac.uk:2119/jobmanager-lcgpbs-short'

    j.submit()

    E.g. 4 - Hello World submitted at CERN on LSF using atlas startup

    j = Job()
    j.backend = Remote()
    j.backend.host = "lxplus.cern.ch"
    j.backend.username = "******"
    j.backend.ganga_cmd = "ganga"
    j.backend.ganga_dir = "/afs/cern.ch/user/m/mslater/gangadir/remote_jobs"
    j.backend.pre_script = ['source /afs/cern.ch/sw/ganga/install/etc/setup-atlas.csh'] # source the atlas setup script before running ganga
    j.backend.remote_backend = LSF()
    j.submit()

    """

    _schema = Schema(Version(1, 0), {
        "remote_backend": ComponentItem('backends', doc='specification of the resources to be used (e.g. batch system)'),
        "host": SimpleItem(defvalue="", doc="The remote host and port number ('host:port') to use. Default port is 22."),
        "ssh_key": SimpleItem(defvalue="", doc="Set to true to the location of the the ssh key to use for authentication, e.g. /home/mws/.ssh/id_rsa. Note, you should make sure 'key_type' is also set correctly."),
        "key_type": SimpleItem(defvalue="RSA", doc="Set to the type of ssh key to use (if required). Possible values are 'RSA' and 'DSS'."),
        "username": SimpleItem(defvalue="", doc="The username at the remote host"),
        "ganga_dir": SimpleItem(defvalue="", doc="The directory to use for the remote workspace, repository, etc."),
        "ganga_cmd": SimpleItem(defvalue="", doc="Command line to start ganga on the remote host"),
        "environment": SimpleItem(defvalue={}, doc="Overides any environment variables set in the job"),
        "pre_script": SimpleItem(defvalue=[''], doc="Sequence of commands to execute before running Ganga on the remote site"),
        'remote_job_id': SimpleItem(defvalue=0, protected=1, copyable=0, doc='Remote job id.'),
        'exitcode': SimpleItem(defvalue=0, protected=1, copyable=0, doc='Application exit code'),
        'actualCE': SimpleItem(defvalue=0, protected=1, copyable=0, doc='Computing Element where the job actually runs.')
    })

    _category = "backends"
    _name = "Remote"
    #_hidden = False # KUBA: temporarily disabled from the public
    _port = 22
    _transport = None
    _sftp = None
    _code = randomString()
    _transportarray = None
    _key = {}

    _exportmethods = ['setup']

    def __init__(self):
        super(Remote, self).__init__()

    def __del__(self):
        if (self._transport != None):
            self._transport.close()
            self._transport = None

    def setup(self):  # KUBA: generic setup hook
        job = self.getJobObject()
        if job.status in ['submitted', 'running', 'completing']:

            # Send a script over to the remote site that updates this jobs
            # info with the info of the remote job
            import os

            # Create a ganga script that updates the job info from the remote
            # site
            script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This is a setup script for a remote job. It
# does very litte
#-----------------------------------------------------

# print a finished token
print("***_FINISHED_***")
"""

            # check for the connection
            if (self.opentransport() == False):
                return False

            # send the script
            #script_name = '/__setupscript__%s.py' % self._code
            #self._sftp.open(self.ganga_dir + script_name, 'w').write(script)

            # run the script
            #stdout, stderr = self.run_remote_script( script_name, self.pre_script )

            # remove the script
            #self._sftp.remove(self.ganga_dir + script_name)

        return True

    def opentransport(self):

        import paramiko
        import getpass
        import atexit

        if (self._transport != None):
            # transport is open
            return

        # check for a useable transport for this username and host
        if Remote._transportarray != None:
            for t in Remote._transportarray:
                if (t != None) and (t[0] == self.username) and (t[1] == self.host):

                    # check for too many retries on the same host
                    if t[2] == None or t[3] == None:
                        logger.warning("Too many retries for remote host " + self.username +
                                       "@" + self.host + ". Restart Ganga to have another go.")
                        return False

                    self._transport = t[2]
                    self._sftp = t[3]

                    # ensure that the remote dir is still there - it will crash if the dir structure
                    # changes with the sftp sill open
                    channel = self._transport.open_session()
                    channel.exec_command('mkdir -p ' + self.ganga_dir)
                    bufout = ""
                    while not channel.exit_status_ready():
                        if channel.recv_ready():
                            bufout = channel.recv(1024)

                    return

        # Ask user for password - give three tries
        num_try = 0
        password = ""
        while num_try < 3:

            try:
                temp_host = self.host
                temp_port = self._port
                if self.host.find(":") != -1:
                    # user specified port
                    temp_port = eval(self.host[self.host.find(":") + 1:])
                    temp_host = self.host[: self.host.find(":")]

                self._transport = paramiko.Transport((temp_host, temp_port))

                # avoid hang on exit my daemonising the thread
                self._transport.setDaemon(True)

                # register for proper shutdown
                atexit.register(shutdown_transport, self._transport)

                if self.ssh_key != "" and os.path.exists(os.path.expanduser(os.path.expandvars(self.ssh_key))):
                    privatekeyfile = os.path.expanduser(
                        os.path.expandvars(self.ssh_key))

                    if self.ssh_key not in Remote._key:

                        if self.key_type == "RSA":
                            password = getpass.getpass(
                                'Enter passphrase for key \'%s\': ' % (self.ssh_key))
                            Remote._key[self.ssh_key] = paramiko.RSAKey.from_private_key_file(
                                privatekeyfile, password=password)
                        elif self.key_type == "DSS":
                            password = getpass.getpass(
                                'Enter passphrase for key \'%s\': ' % (self.ssh_key))
                            Remote._key[self.ssh_key] = paramiko.DSSKey.from_private_key_file(
                                privatekeyfile, password=password)
                        else:
                            logger.error(
                                "Unknown ssh key_type '%s'. Unable to connect." % self.key_type)
                            return False

                    self._transport.connect(
                        username=self.username, pkey=Remote._key[self.ssh_key])
                else:
                    logger.debug("SSH key: %s" % self.ssh_key)
                    if os.path.exists(os.path.expanduser(os.path.expandvars(self.ssh_key))):
                        logger.debug(
                            "PATH: %s Exists" % os.path.expanduser(os.path.expandvars(self.ssh_key)))
                    else:
                        logger.debug("PATH: %s Does NOT Exist" % os.path.expanduser(
                            os.path.expandvars(self.ssh_key)))

                    if self.username != "" and self.host != "":
                        password = getpass.getpass(
                            'Password for %s@%s: ' % (self.username, self.host))
                        self._transport.connect(
                            username=self.username, password=password)
                    elif self.username == "":
                        logger.error("ERROR: USERNAME NOT DEFINED!!!")
                        return False
                    elif self.host == "":
                        logger.error("ERROR: HOSTNAME NOT DEFINED!!!")
                        return False
                    else:
                        pass

                # blank the password just in case
                password = "******"

                channel = self._transport.open_session()
                channel.exec_command('mkdir -p ' + self.ganga_dir)
                self._sftp = paramiko.SFTPClient.from_transport(
                    self._transport)

                # Add to the transport array
                Remote._transportarray = [Remote._transportarray,
                                          [self.username, self.host, self._transport, self._sftp]]
                num_try = 1000

            except Exception as err:
                logger.debug("Err: %s" %str(err))
                logger.warning("Error when comunicating with remote host. Retrying...")
                self._transport = None
                self._sftp = None
                if self.ssh_key in Remote._key:
                    del Remote._key[self.ssh_key]

            num_try = num_try + 1

        if num_try == 3:
            logger.error("Could not logon to remote host " + self.username + "@" +
                         self.host + " after three attempts. Restart Ganga to have another go.")
            Remote._transportarray = [Remote._transportarray,
                                      [self.username, self.host, None, None]]
            return False

        return True

    def run_remote_script(self, script_name, pre_script):
        """Run a ganga script on the remote site"""

        import getpass

        # Set up a command file to source. This gets around a silly alias
        # problem
        cmd_str = ""
        for c in pre_script:
            cmd_str += c + '\n'

        cmd_str += self.ganga_cmd + \
            " -o\'[Configuration]gangadir=" + self.ganga_dir + "\' "
        cmd_str += self.ganga_dir + script_name + '\n'
        cmd_file = os.path.join(
            self.ganga_dir, "__gangacmd__" + randomString())
        self._sftp.open(cmd_file, 'w').write(cmd_str)

        # run ganga command
        channel = self._transport.open_session()
        channel.exec_command("source " + cmd_file)

        # Read the output after command
        stdout = bufout = ""
        stderr = buferr = ""
        grid_ok = False

        while not channel.exit_status_ready():

            if channel.recv_ready():
                bufout = channel.recv(1024)
                stdout += bufout

            if channel.recv_stderr_ready():
                buferr = channel.recv_stderr(1024)
                stderr += buferr

            if stdout.find("***_FINISHED_***") != -1:
                break

            if (bufout.find("GRID pass") != -1 or buferr.find("GRID pass") != -1):
                grid_ok = True
                password = getpass.getpass('Enter GRID pass phrase: ')
                channel.send(password + "\n")
                password = ""

            bufout = buferr = ""

        self._sftp.remove(cmd_file)

        return stdout, stderr

    def submit(self, jobconfig, master_input_sandbox):
        """Submit the job to the remote backend.

            Return value: True if job is submitted successfully,
                          or False otherwise"""

        import os
        import getpass

        # First some sanity checks...
        fail = 0
        if self.remote_backend == None:
            logger.error("No backend specified for remote host.")
            fail = 1
        if self.host == "":
            logger.error("No remote host specified.")
            fail = 1
        if self.username == "":
            logger.error("No username specified.")
            fail = 1
        if self.ganga_dir == "":
            logger.error("No remote ganga directory specified.")
            fail = 1
        if self.ganga_cmd == "":
            logger.error("No ganga command specified.")
            fail = 1

        if fail:
            return 0

        # initiate the connection
        if self.opentransport() == False:
            return 0

        # Tar up the input sandbox and copy to the remote cluster
        job = self.getJobObject()
        subjob_input_sandbox = job.createPackedInputSandbox(
            jobconfig.getSandboxFiles())
        input_sandbox = subjob_input_sandbox + master_input_sandbox

        # send the sandbox
        sbx_name = '/__subjob_input_sbx__%s' % self._code
        self._sftp.put(subjob_input_sandbox[0], self.ganga_dir + sbx_name)
        sbx_name = '/__master_input_sbx__%s' % self._code
        self._sftp.put(master_input_sandbox[0], self.ganga_dir + sbx_name)

        # run the submit script on the remote cluster
        scriptpath = self.preparejob(jobconfig, master_input_sandbox)

        # send the script
        data = open(scriptpath, 'r').read()
        script_name = '/__jobscript_run__%s.py' % self._code
        self._sftp.open(self.ganga_dir + script_name, 'w').write(data)

        # run the script
        stdout, stderr = self.run_remote_script(script_name, self.pre_script)

        # delete the jobscript
        self._sftp.remove(self.ganga_dir + script_name)

        # Copy the job object
        if stdout.find("***_FINISHED_***") != -1:
            status, outputdir, id, be = self.grabremoteinfo(stdout)

            self.remote_job_id = id
            if hasattr(self.remote_backend, 'exitcode'):
                self.exitcode = be.exitcode
            if hasattr(self.remote_backend, 'actualCE'):
                self.actualCE = be.actualCE

            # copy each variable in the schema
            # Please can someone tell me why I can't just do
            # self.remote_backend = be?
            for o in be._schema.allItems():
                exec("self.remote_backend." + o[0] + " = be." + o[0])

            return 1
        else:
            logger.error("Problem submitting the job on the remote site.")
            logger.error("<last 1536 bytes of stderr>")
            cut = stderr[len(stderr) - 1536:]

            for ln in cut.splitlines():
                logger.error(ln)

            logger.error("<end of last 1536 bytes of stderr>")

        return 0

    def kill(self):
        """Kill running job.

           No arguments other than self

           Return value: True if job killed successfully,
                         or False otherwise"""

        script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This is a kill script for a remote job. It
# attempts to kill the given job and returns
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback

############################################################################################

###INLINEMODULES###

############################################################################################

code = ###CODE###
jid = ###JOBID###

j = jobs( jid )
j.kill()

# Start pickle token
print("***_START_PICKLE_***")

# pickle the job
import pickle
print(j.outputdir)
print(pickle.dumps(j._impl))
print(j)

# print a finished token
print("***_END_PICKLE_***")
print("***_FINISHED_***")
"""

        script = script.replace('###CODE###', repr(self._code))
        script = script.replace('###JOBID###', str(self.remote_job_id))

        # check for the connection
        if (self.opentransport() == False):
            return 0

        # send the script
        script_name = '/__jobscript_kill__%s.py' % self._code
        self._sftp.open(self.ganga_dir + script_name, 'w').write(script)

        # run the script
        stdout, stderr = self.run_remote_script(script_name, self.pre_script)

        # Copy the job object
        if stdout.find("***_FINISHED_***") != -1:
            status, outputdir, id, be = self.grabremoteinfo(stdout)

            if status == 'killed':
                return True

        return False

    def remove(self):
        """Remove the selected job from the remote site

           No arguments other than self

           Return value: True if job removed successfully,
                         or False otherwise"""

        script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This is a remove script for a remote job. It
# attempts to kill the given job and returns
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback

############################################################################################

###INLINEMODULES###

############################################################################################

code = ###CODE###
jid = ###JOBID###

j = jobs( jid )
j.remove()

jobs( jid )

# print a finished token
print("***_FINISHED_***")
"""

        script = script.replace('###CODE###', repr(self._code))
        script = script.replace('###JOBID###', str(self.remote_job_id))

        # check for the connection
        if (self.opentransport() == False):
            return 0

        # send the script
        script_name = '/__jobscript_remove__%s.py' % self._code
        self._sftp.open(self.ganga_dir + script_name, 'w').write(script)

        # run the script
        stdout, stderr = self.run_remote_script(script_name, self.pre_script)

        # Copy the job object
        if stdout.find("***_FINISHED_***") != -1:
            return True

        return False

    def resubmit(self):
        """Resubmit the job.

           No arguments other than self

           Return value: 1 if job was resubmitted,
                         or 0 otherwise"""

        script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This is a resubmit script for a remote job. It
# attempts to kill the given job and returns
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback

############################################################################################

###INLINEMODULES###

############################################################################################

code = ###CODE###
jid = ###JOBID###

j = jobs( jid )
j.resubmit()

# Start pickle token
print("***_START_PICKLE_***")

# pickle the job
import pickle
print(j.outputdir)
print(pickle.dumps(j._impl))
print(j)

# print a finished token
print("***_END_PICKLE_***")
print("***_FINISHED_***")
"""

        script = script.replace('###CODE###', repr(self._code))
        script = script.replace('###JOBID###', str(self.remote_job_id))

        # check for the connection
        if (self.opentransport() == False):
            return 0

        # send the script
        script_name = '/__jobscript_resubmit__%s.py' % self._code
        self._sftp.open(self.ganga_dir + script_name, 'w').write(script)

        # run the script
        stdout, stderr = self.run_remote_script(script_name, self.pre_script)

        # Copy the job object
        if stdout.find("***_FINISHED_***") != -1:
            status, outputdir, id, be = self.grabremoteinfo(stdout)

            if status == 'submitted' or status == 'running':
                return 1

        return 0

    def grabremoteinfo(self, out):

        import pickle

        # Find the start and end of the pickle
        start = out.find("***_START_PICKLE_***") + len("***_START_PICKLE_***")
        stop = out.find("***_END_PICKLE_***")
        outputdir = out[start + 1:out.find("\n", start + 1) - 1]
        pickle_str = out[out.find("\n", start + 1) + 1:stop]

        # Now unpickle and recreate the job
        j = pickle.loads(pickle_str)

        return j.status, outputdir, j.id, j.backend

    def preparejob(self, jobconfig, master_input_sandbox):
        """Prepare the script to create the job on the remote host"""

        import tempfile

        workdir = tempfile.mkdtemp()
        job = self.getJobObject()

        script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This job wrapper script is automatically created by
# GANGA Remote backend handler.
#
# It controls:
# 1. unpack input sandbox
# 2. create the new job
# 3. submit it
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback
import tarfile

############################################################################################

###INLINEMODULES###

############################################################################################

j = Job()

output_sandbox = ###OUTPUTSANDBOX###
input_sandbox = ###INPUTSANDBOX###
appexec = ###APPLICATIONEXEC###
appargs = ###APPLICATIONARGS###
back_end = ###BACKEND###
ganga_dir = ###GANGADIR###
code = ###CODE###
environment = ###ENVIRONMENT###
user_env = ###USERENV###

if user_env != None:
   for env_var in user_env:
      environment[env_var] = user_env[env_var]

j.outputsandbox = output_sandbox
j.backend = back_end

# Unpack the input sandboxes
shutil.move(os.path.expanduser(ganga_dir + "/__subjob_input_sbx__" + code), j.inputdir+"/__subjob_input_sbx__")
shutil.move(os.path.expanduser(ganga_dir + "/__master_input_sbx__" + code), j.inputdir+"/__master_input_sbx__")

# Add the files in the sandbox to the job
inputsbx = []
fullsbxlist = []
try:
   tar = tarfile.open(j.inputdir+"/__master_input_sbx__")
   filelist = tar.getnames()
   print(filelist)
   
   for f in filelist:
      fullsbxlist.append( f )
      inputsbx.append( j.inputdir + "/" + f )

except:
   print("Unable to open master input sandbox")

try:
   tar = tarfile.open(j.inputdir+"/__subjob_input_sbx__")
   filelist = tar.getnames()

   for f in filelist:
      fullsbxlist.append( f )
      inputsbx.append( j.inputdir + "/" + f )

except:
   print("Unable to open subjob input sandbox")

# sort out the path of the exe
if appexec in fullsbxlist:
   j.application = Executable ( exe = File(os.path.join(j.inputdir, appexec)), args = appargs, env = environment )
   print("Script found: %s" % appexec)
else:
   j.application = Executable ( exe = appexec, args = appargs, env = environment )

   
j.inputsandbox = inputsbx

getPackedInputSandbox(j.inputdir+"/__subjob_input_sbx__", j.inputdir + "/.")
getPackedInputSandbox(j.inputdir+"/__master_input_sbx__", j.inputdir + "/.")

# submit the job
j.submit()

# Start pickle token
print("***_START_PICKLE_***")

# pickle the job
import pickle
print(j.outputdir)
print(pickle.dumps(j._impl))

# print a finished token
print("***_END_PICKLE_***")
print("***_FINISHED_***")
"""
        import inspect
        import Ganga.Core.Sandbox as Sandbox
        script = script.replace('###ENVIRONMENT###', repr(jobconfig.env))
        script = script.replace('###USERENV###', repr(self.environment))
        script = script.replace(
            '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox))
        script = script.replace(
            '###OUTPUTSANDBOX###', repr(jobconfig.outputbox))
        script = script.replace(
            '###APPLICATIONEXEC###', repr(os.path.basename(jobconfig.getExeString())))
        script = script.replace(
            '###APPLICATIONARGS###', repr(jobconfig.getArgStrings()))

        # get a string describing the required backend
        import cStringIO
        be_out = cStringIO.StringIO()
        job.backend.remote_backend.printTree(be_out, "copyable")
        be_str = be_out.getvalue()
        script = script.replace('###BACKEND###', be_str)

        script = script.replace('###GANGADIR###', repr(self.ganga_dir))
        script = script.replace('###CODE###', repr(self._code))

        sandbox_list = jobconfig.getSandboxFiles()

        str_list = "[ "
        for fname in sandbox_list:
            str_list += "j.inputdir + '/' + " + \
                repr(os.path.basename(fname.name))
            str_list += ", "

        str_list += "j.inputdir + '/__master_input_sbx__' ]"

        script = script.replace('###INPUTSANDBOX###', str_list)
        return job.getInputWorkspace().writefile(FileBuffer('__jobscript__.py', script), executable=0)

    @staticmethod
    def updateMonitoringInformation(jobs):

        # Send a script over to the remote site that updates this jobs
        # info with the info of the remote job
        import os
        import getpass

        # first, loop over the jobs and sort by host, username, gangadir and
        # pre_script
        jobs_sort = {}
        for j in jobs:
            host_str = j.backend.username + "@" + j.backend.host + ":" + \
                j.backend.ganga_dir + "+" + ';'.join(j.backend.pre_script)
            if host_str not in jobs_sort:
                jobs_sort[host_str] = []

            jobs_sort[host_str].append(j)

        for host_str in jobs_sort:
            # Create a ganga script that updates the job info for all jobs at
            # this remote site
            script = """#!/usr/bin/env python
from __future__ import print_function
#-----------------------------------------------------
# This is a monitoring script for a remote job. It
# outputs some useful job info and exits
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback

############################################################################################

###INLINEMODULES###

############################################################################################

code = ###CODE###
jids = ###JOBID###

runMonitoring()

import pickle

for jid in jids:

    j = jobs( jid )

    # Start pickle token
    print("***_START_PICKLE_***")

    # pickle the job
    print(j.outputdir)
    print(pickle.dumps(j._impl))
    print(j)

    # print a finished token
    print("***_END_PICKLE_***")

print("***_FINISHED_***")
"""

            mj = jobs_sort[host_str][0]
            script = script.replace('###CODE###', repr(mj.backend._code))
            rem_ids = []
            for j in jobs_sort[host_str]:
                rem_ids.append(j.backend.remote_job_id)
            script = script.replace('###JOBID###', str(rem_ids))

            # check for the connection
            if (mj.backend.opentransport() == False):
                return 0

            # send the script
            script_name = '/__jobscript__%s.py' % mj.backend._code
            mj.backend._sftp.open(
                mj.backend.ganga_dir + script_name, 'w').write(script)

            # run the script
            stdout, stderr = mj.backend.run_remote_script(
                script_name, mj.backend.pre_script)

            # Copy the job object
            if stdout.find("***_FINISHED_***") != -1:

                start_pos = stdout.find("***_START_PICKLE_***")
                end_pos = stdout.find(
                    "***_END_PICKLE_***") + len("***_END_PICKLE_***")

                while start_pos != -1 and end_pos != -1:
                    pickle_str = stdout[start_pos:end_pos + 1]

                    status, outputdir, id, be = mj.backend.grabremoteinfo(
                        pickle_str)

                    # find the job and update it
                    found = False
                    for j in jobs_sort[host_str]:

                        if (id == j.backend.remote_job_id):
                            found = True
                            if status != j.status:
                                j.updateStatus(status)

                            if hasattr(j.backend.remote_backend, 'exitcode'):
                                j.backend.exitcode = be.exitcode
                            if hasattr(j.backend.remote_backend, 'actualCE'):
                                j.backend.actualCE = be.actualCE

                            for o in be._schema.allItems():
                                exec(
                                    "j.backend.remote_backend." + o[0] + " = be." + o[0])

                            # check for completed or failed and pull the output
                            # if required
                            if j.status == 'completed' or j.status == 'failed':

                                # we should have output, so get the file list
                                # first
                                filelist = j.backend._sftp.listdir(outputdir)

                                # go through and sftp them back
                                for fname in filelist:
                                    data = j.backend._sftp.open(
                                        outputdir + '/' + fname, 'r').read()
                                    open(
                                        j.outputdir + '/' + os.path.basename(fname), 'w').write(data)

                    if not found:
                        logger.warning(
                            "Couldn't match remote id %d with monitored job. Serious problems in Remote monitoring." % id)

                    start_pos = stdout.find("***_START_PICKLE_***", end_pos)
                    end_pos = stdout.find(
                        "***_END_PICKLE_***", end_pos) + len("***_END_PICKLE_***")

            # remove the script
            j.backend._sftp.remove(j.backend.ganga_dir + script_name)

        return None
Пример #17
0
class GridSandboxCache(GangaObject):
    '''
    Helper class for upladong/downloading/deleting sandbox files on a grid cache. 

    @author: Hurng-Chun Lee 
    @contact: [email protected]
    '''

    _schema = Schema(
        Version(1, 1), {
            'protocol':
            SimpleItem(defvalue='', copyable=1, doc='file transfer protocol'),
            'max_try':
            SimpleItem(defvalue=1,
                       doc='max. number of tries in case of failures'),
            'timeout':
            SimpleItem(defvalue=180,
                       copyable=0,
                       hidden=1,
                       doc='transfer timeout in seconds'),
            'uploaded_files':
            ComponentItem('GridFileIndex',
                          defvalue=[],
                          sequence=1,
                          protected=1,
                          copyable=0,
                          hidden=1,
                          doc='a repository record for the uploaded files')
        })

    _category = 'GridSandboxCache'
    _name = 'GridSandboxCache'
    _exportmethods = [
        'upload', 'download', 'delete', 'get_cached_files',
        'list_cached_files', 'cleanup'
    ]

    logger = getLogger()

    def __init__(self):
        super(GridSandboxCache, self).__init__()

    def upload(self, cred_req, files=[], opts=''):
        """
        Uploads multiple files to a remote grid storage.

        @param files is a list of local files to be uploaded to the grid.
               The element can be a file path or a File object.

        @return True if files are successfully uploaded; otherwise it returns False
        """
        status = False

        paths = []
        for f in files:
            if getName(f) == 'File':
                paths.append('file://%s' % f.name)
            elif getName(f) == 'str':
                paths.append('file://%s' % f)
            else:
                self.logger.warning('unknown file expression: %s' % repr(f))

        uploaded_files = self.impl_upload(cred_req=cred_req,
                                          files=paths,
                                          opts=opts)

        if len(uploaded_files) == len(files):
            status = self.impl_bookkeepUploadedFiles(uploaded_files,
                                                     append=True,
                                                     opts=opts)
        else:
            status = False

        if len(uploaded_files) == len(files):
            status = self.impl_bookkeepUploadedFiles(uploaded_files,
                                                     append=True,
                                                     opts=opts)
        else:
            status = False

        return status

    def download(self, cred_req, files=[], dest_dir=None, opts=''):
        """
        Downloads multiple files from remote grid storages to 
        a local directory.

        If the file is successfully downloaded, the local file path would be:

            - os.path.join(dest_dir, os.path.basename(local_fname_n)

        @param files is a list of files to be downloaded from the grid.
               The data format of it should be:
               - [index_grid_file_1, index_grid_file_2, ...]

        @param dest_dir is a local destination directory to store the downloaded files.

        @return True if files are successfully downloaded; otherwise it returns False
        """
        status = False
        myFiles = self.__get_file_index_objects__(files)
        downloadedFiles = self.impl_download(cred_req=cred_req,
                                             files=myFiles,
                                             dest_dir=dest_dir,
                                             opts=opts)

        if len(downloadedFiles) == len(myFiles):
            status = True
        else:
            self.logger.warning('some files not successfully downloaded')

        return status

    def delete(self, cred_req, files=[], opts=''):
        """
        Deletes multiple files from remote grid storages.

        @param files is a list of files to be deleted from the grid.
               The data format of it should be:
               - [index_grid_file_1, index_grid_file_2, ...]

        @return True if files are successfully deleted; otherwise it returns False
        """
        status = False
        myFiles = self.__get_file_index_objects__(files)
        deletedFiles = self.impl_delete(cred_req=cred_req,
                                        files=myFiles,
                                        opts=opts)

        if len(deletedFiles) == len(myFiles):
            status = True
        else:
            self.logger.warning('some files not successfully deleted')

        return status

    def cleanup(self, cred_req, opts=''):
        """
        Cleans up the uploaded files.

        @return True if all grid files listed in the index file are successfully deleted.
        """
        status = False

        all_files = self.get_cached_files()

        f_ids = []
        for f in all_files:
            f_ids.append(f.id)

        return self.delete(cred_req=cred_req, files=f_ids)

    def get_cached_files(self, opts=''):
        """
        Gets the indexes of the uploaded files on the grid. 

        @return the dictionary indexing the uploaded files on the grid.
                The key of the dictionary should be the main index (e.g. GUID) of the grid files.
        """
        return self.impl_getUploadedFiles(opts=opts)

    def list_cached_files(self, loop=True, opts=''):
        """
        Lists the uploaded files.

        if loop = True, it prints also the uploaded files associated with subjobs.
        """

        fc = 0
        ds = ''

        doColoring = True

        fg = Foreground()
        fx = Effects()

        status_colors = {'inuse': fg.orange, 'free': fg.blue, 'gone': fg.red}

        status_mapping = {
            'new': 'inuse',
            'submitted': 'inuse',
            'submitting': 'inuse',
            'running': 'inuse',
            'completed': 'free',
            'completing': 'free',
            'failed': 'free',
            'killed': 'free'
        }

        if doColoring:
            markup = ANSIMarkup()
        else:
            markup = NoMarkup()

        def __markup_by_status__(fileIndex, counter, status):

            fmtStr = '\n%4d\t%-30s\t%-12s\t%s' % (counter, fileIndex.name,
                                                  status, fileIndex.id)

            try:
                return markup(fmtStr, status_colors[status])
            except KeyError:
                return markup(fmtStr, fx.normal)

        j = self.getJobObject()

        for f in self.get_cached_files(opts=opts):

            my_status = 'unknown'

            if j:
                try:
                    my_status = status_mapping[j.status]
                except KeyError:
                    pass

            ds += __markup_by_status__(f, fc, my_status)

            fc += 1

        if j and loop:
            for sj in j.subjobs:
                for f in sj.backend.sandboxcache.get_cached_files(opts=opts):

                    my_status = 'unknown'

                    try:
                        my_status = status_mapping[sj.status]
                    except KeyError:
                        pass

                    ds += __markup_by_status__(f, fc, my_status)

                    fc += 1

        return ds

    # methods to be implemented in the child classes
    def impl_upload(self, cred_req, files=[], opts=''):
        """
        Uploads multiple files to a remote grid storage.

        @param files is a list of files in URL format (i.e. file://...)

        @return a list of successfully uploaded files represented by GridFileIndex objects
        """
        raise NotImplementedError

    def impl_download(self, cred_req, files=[], dest_dir=None, opts=''):
        """
        Downloads multiple files from remote grid storages to 
        a local directory.

        @param files is a list of files represented by GridFileIndex objects 
        @param dest_dir is a local destination directory to store the downloaded files.

        @return a list of successfully downloaded files represented by GridFileIndex objects
        """
        raise NotImplementedError

    def impl_delete(self, cred_req, files=[], opts=''):
        """
        Deletes multiple files from remote grid storages. 

        @param files is a list of files represented by GridFileIndex objects 
        @return a list of successfully deleted files represented by GridFileIndex objects
        """
        raise NotImplementedError

    def impl_bookkeepUploadedFiles(self, files=[], append=True, opts=''):
        """
        basic implementation for bookkeeping the uploaded files.
        It simply keeps the GridFileIndex objects in the job repository.

        @param files is a list of files represented by GridFileIndex objects 
        @return True if files are successfully logged in the local index file 
        """

        self.uploaded_files = files

        return True

    def impl_getUploadedFiles(self, opts=''):
        """
        basic implementation for getting the previously uploaded files from the
        job repository.

        @return a list of files represented by GridFileIndex objects
        """
        files = self.uploaded_files

        return files

    # private methods
    def __get_file_index_objects__(self, files=[]):
        '''Gets file index object according to the given file list
             - try to get the GridFileIndex object from the local index file.  

        @param files is a list of file indexes
        @return a list of files represented by GridFileIndex objects
        '''

        cachedFiles = self.get_cached_files()
        myFiles = []
        for f in cachedFiles:
            if f.id in files:
                myFiles.append(f)

        return myFiles

    def __get_unique_fname__(self):
        '''gets an unique filename'''
        fname = 'user.%s' % (get_uuid())
        return fname

    def __cmd_retry_loop__(self, shell, cmd, maxRetry=3):
        '''Executing system command with retry feature'''
        i = 0
        rc = 0
        output = None
        m = None
        try_again = True
        while try_again:
            i = i + 1
            self.logger.debug('run cmd: %s' % cmd)
            rc, output, m = shell.cmd1(cmd, allowed_exit=[0, 255])
            if rc in [0, 255]:
                try_again = False
            elif i == maxRetry:
                try_again = False
            else:
                self.logger.warning("trial %d: error: %s" % (i, output))

        return (rc, output, m)
Пример #18
0
class BKTestQuery(BKQuery):
    ##     schema = {}
    ##     docstr = 'Bookkeeping query path (type dependent)'
    ##     schema['path'] = SimpleItem(defvalue='' ,doc=docstr)
    ##     docstr = 'Start date string yyyy-mm-dd (only works for type="RunsByDate")'
    ##     schema['startDate'] = SimpleItem(defvalue='' ,doc=docstr)
    ##     docstr = 'End date string yyyy-mm-dd (only works for type="RunsByDate")'
    ##     schema['endDate'] = SimpleItem(defvalue='' ,doc=docstr)
    ##     docstr = 'Data quality flag (string or list of strings).'
    # schema['dqflag'] = SimpleItem(defvalue='All',typelist=['str','list'],
    # doc=docstr)
    ##     docstr = 'Type of query (Path, RunsByDate, Run, Production)'
    ##     schema['type'] = SimpleItem(defvalue='Path',doc=docstr)
    # docstr = 'Selection criteria: Runs, ProcessedRuns, NotProcessed (only \
    # works for type="RunsByDate")'
    ##     schema['selection'] = SimpleItem(defvalue='',doc=docstr)
    _schema = BKQuery._schema.inherit_copy()
    _schema.datadict['dataset'] = ComponentItem('datasets',
                                                defvalue=None,
                                                optional=1,
                                                load_default=False,
                                                doc='dataset',
                                                hidden=0)
    _schema.datadict['fulldataset'] = ComponentItem('datasets',
                                                    defvalue=None,
                                                    optional=1,
                                                    load_default=False,
                                                    doc='dataset',
                                                    hidden=1)
    _schema.datadict['fulldatasetptr'] = SimpleItem(
        defvalue=0,
        optional=0,
        load_default=True,
        doc='dataset position pointer',
        hidden=1,
        typeList=['int'])
    _schema.datadict['filesToRelease'] = SimpleItem(
        defvalue=3,
        optional=0,
        load_default=True,
        doc='number of files to release at a time',
        hidden=0,
        typeList=['int'])
    _category = 'query'
    _name = "BKTestQuery"
    _exportmethods = BKQuery._exportmethods
    _exportmethods += ['removeData']

    def getDataset(self):
        if self.fulldataset is None:
            self.fulldataset = LHCbDataset(
                super(BKTestQuery, self).getDataset().files)
        if self.dataset is None:
            self.dataset = LHCbDataset(
                self.fulldataset.files[:self.filesToRelease])
            self.fulldatasetptr = self.filesToRelease
        else:
            self.dataset.files += self.fulldataset.files[
                self.fulldatasetptr:self.fulldatasetptr + self.filesToRelease]
            self.fulldatasetptr += self.filesToRelease
        return self.dataset

    def removeData(self):
        if len(self.dataset):
            del self.dataset.files[0]
Пример #19
0
class ITask(GangaObject):
    """This is the framework of a task without special properties"""
    _schema = Schema(
        Version(1, 0), {
            'transforms':
            ComponentItem('transforms',
                          defvalue=[],
                          sequence=1,
                          copyable=0,
                          doc='list of transforms'),
            'id':
            SimpleItem(
                defvalue=-1, protected=1, doc='ID of the Task', typelist=[int
                                                                          ]),
            'name':
            SimpleItem(defvalue='NewTask',
                       copyable=1,
                       doc='Name of the Task',
                       typelist=[str]),
            'comment':
            SimpleItem(
                '', protected=0, doc='comment of the task', typelist=[str]),
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       doc='Status - new, running, pause or completed',
                       typelist=[str]),
            'float':
            SimpleItem(defvalue=0,
                       copyable=1,
                       doc='Number of Jobs run concurrently',
                       typelist=[int]),
            'metadata':
            ComponentItem('metadata',
                          defvalue=MetadataDict(),
                          doc='the metadata',
                          protected=1),
            'creation_date':
            SimpleItem(defvalue="19700101",
                       copyable=0,
                       protected=1,
                       doc='Creation date of the task',
                       typelist=[str]),
            'check_all_trfs':
            SimpleItem(
                defvalue=True,
                doc='Check all Transforms during each monitoring loop cycle'),
        })

    _category = 'tasks'
    _name = 'ITask'
    _exportmethods = [
        'run', 'appendTransform', 'overview', 'getJobs', 'remove', 'clone',
        'pause', 'check', 'setBackend', 'setParameter', 'insertTransform',
        'removeTransform', 'table', 'resetUnitsByStatus', 'removeUnusedJobs',
        'n_all', 'n_status', 'n_all'
    ]

    _tasktype = "ITask"

    default_registry = "tasks"

    # Special methods:
    def _auto__init__(self, registry=None):
        if registry is None:
            from Ganga.Core.GangaRepository import getRegistry
            registry = getRegistry(self.default_registry)
        # register the job (it will also commit it)
        # job gets its id now
        registry._add(self)
        self.creation_date = time.strftime('%Y%m%d%H%M%S')
        self.startup()
        self.status = 'new'

    def startup(self):
        """Startup function on Ganga startup"""
        for t in self.transforms:
            t.startup()

    def getTransform(self, trf):
        """Get transform using either index or name"""
        if isinstance(trf, str):
            for trfid in range(0, len(self.transforms)):
                if trf == self.transforms[trfid].name:
                    return self.transforms[trfid]
            logger.warning("Couldn't find transform with name '%s'." % trf)
        elif isinstance(trf, int):
            if trf < 0 and trf > len(self.transforms):
                logger.warning("Transform number '%d' out of range" % trf)
            else:
                return self.transforms[trf]
        else:
            logger.warning(
                'Incorrect type for transform referral. Allowed types are int or string.'
            )

        return None

    def update(self):
        """Called by the monitoring thread. Base class just calls update on each Transform"""

        # if we're new, then do nothing
        if self.status == "new":
            return

        # loop over all transforms and call update
        for trf in self.transforms:
            if trf.status != "running":
                continue

            if trf.update() and not self.check_all_trfs:
                break

        # update status and check
        self.updateStatus()

# Public methods:
#
# - remove() a task
# - clone() a task
# - check() a task (if updated)
# - run() a task to start processing
# - pause() to interrupt processing
# - setBackend(be) for all transforms
# - setParameter(myParam=True) for all transforms
# - insertTransform(id, tf) insert a new processing step
# - removeTransform(id) remove a processing step

    def remove(self, remove_jobs="do_nothing"):
        """Delete the task"""

        # make sure the task isn't running
        if self.status.find("running") != -1:
            logger.error(
                "Task is still running. Please pause before removing!")
            return

        if not remove_jobs in [True, False]:
            logger.info("You want to remove the task %i named '%s'." %
                        (self.id, self.name))
            logger.info(
                "Since this operation cannot be easily undone, please call this command again:"
            )
            logger.info(
                " * as tasks(%i).remove(remove_jobs=True) if you want to remove all associated jobs,"
                % (self.id))
            logger.info(
                " * as tasks(%i).remove(remove_jobs=False) if you want to keep the jobs."
                % (self.id))
            return
        if remove_jobs:

            for trf in self.transforms:
                for unit in trf.units:
                    for jid in unit.active_job_ids:
                        try:
                            j = getJobByID(jid)
                            j.remove()
                        except Exception as err:
                            logger.debug("Remove Err: %s" % str(err))
                            pass

                    for jid in unit.prev_job_ids:
                        try:
                            j = getJobByID(jid)
                            j.remove()
                        except Exception as err2:
                            logger.debug("Remove Err2: %s" % str(err2))
                            pass

        self._getRegistry()._remove(self, auto_removed=1)
        logger.info("Task #%s deleted" % self.id)

    def clone(self):
        c = super(ITask, self).clone()
        for tf in c.transforms:
            tf.status = "new"
        c.check()
        return c

    def check(self):
        """This function is called by run() or manually by the user"""
        if self.status != "new":
            logger.error(
                "The check() function may modify a task and can therefore only be called on new tasks!"
            )
            return
        try:
            for t in self.transforms:
                t.check()
        finally:
            self.updateStatus()
        return True

    def run(self):
        """Confirms that this task is fully configured and ready to be run."""
        if self.status == "new":
            self.check()

        if self.status != "completed":
            if self.float == 0:
                logger.warning(
                    "The 'float', the number of jobs this task may run, is still zero. Type 'tasks(%i).float = 5' to allow this task to submit 5 jobs at a time"
                    % self.id)
            try:
                for tf in self.transforms:
                    if tf.status != "completed":
                        tf.run(check=False)

            finally:
                self.updateStatus()
        else:
            logger.info("Task is already completed!")

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        float_cache = self.float
        self.float = 0
        if self.status != "completed":
            for tf in self.transforms:
                tf.pause()
            self.status = "pause"
        else:
            logger.info("Transform is already completed!")
        self.float = float_cache

    def insertTransform(self, id, tf):
        """Insert transfrm tf before index id (counting from 0)"""
        if self.status != "new" and id < len(self.transforms):
            logger.error(
                "You can only insert transforms at the end of the list. Only if a task is new it can be freely modified!"
            )
            return
        # self.transforms.insert(id,tf.copy()) # this would be safer, but
        # breaks user exspectations
        # this means that t.insertTransform(0,t2.transforms[0]) will cause
        # Great Breakage
        self.transforms.insert(id, tf)
        stripProxy(tf).id = id

    def appendTransform(self, tf):
        """Append transform"""
        return self.insertTransform(len(self.transforms), tf)

    def removeTransform(self, id):
        """Remove the transform with the index id (counting from 0)"""
        if self.status != "new":
            logger.error("You can only remove transforms if the task is new!")
            return
        del self.transforms[id]

    def getJobs(self):
        """ Get the job slice of all jobs that process this task """
        jobslice = JobRegistrySlice("tasks(%i).getJobs()" % (self.id))
        for trf in self.transforms:
            for jid in trf.getJobs():
                jobslice.objects[getJobByID(jid).fqid] = stripProxy(
                    getJobByID(jid))

        return JobRegistrySliceProxy(jobslice)

# Internal methods

    def updateStatus(self):
        """Updates status based on transform status.
           Called from check() or if status of a transform changes"""
        # Calculate status from transform status:
        states = [tf.status for tf in self.transforms]
        if "running" in states and "pause" in states:
            new_status = "running/pause"
        elif "running" in states:
            new_status = "running"
        elif "pause" in states:
            new_status = "pause"
        elif "new" in states:
            new_status = "new"
        elif "completed" in states:
            new_status = "completed"
        else:
            new_status = "new"  # no tranforms
        # Handle status changes here:
        if self.status != new_status:
            if new_status == "running/pause":
                logger.info(
                    "Some Transforms of Task %i '%s' have been paused. Check tasks.table() for details!"
                    % (self.id, self.name))
            elif new_status == "completed":
                logger.info("Task %i '%s' has completed!" %
                            (self.id, self.name))
            elif self.status == "completed":
                logger.warning("Task %i '%s' has been reopened!" %
                               (self.id, self.name))
        self.status = new_status
        return self.status

    # Information methods
    def n_tosub(self):
        return self.float - sum([t.n_active() for t in self.transforms])

    def n_all(self):
        return sum([t.n_all() for t in self.transforms])

    def n_status(self, status):
        return sum([t.n_status(status) for t in self.transforms])

    def table(self):
        from Ganga.Core.GangaRepository import getRegistryProxy
        t = getRegistryProxy('tasks').table(id=self.id)

    def overview(self, status=''):
        """ Show an overview of the Task """
        if status and not status in [
                'bad', 'hold', 'running', 'completed', 'new'
        ]:
            logger.error(
                "Not a valid status for unitOverview. Possible options are: 'bad', 'hold', 'running', 'completed', 'new'."
            )
            return

        print(
            "Lists the units in each transform and give the state of the subjobs"
        )
        print('')
        print(" " * 41 + "Active\tSub\tRun\tComp\tFail\tMinor\tMajor")
        for trfid in range(0, len(self.transforms)):
            print(
                "----------------------------------------------------------------------------------------------------------------------"
            )
            print("----   Transform %d:  %s" %
                  (trfid, self.transforms[trfid].name))
            print('')
            self.transforms[trfid].overview(status)
            print('')

    def info(self):
        for t in self.transforms:
            t.info()

    def help(self):
        print("This is a Task without special properties")

    def resetUnitsByStatus(self, status='bad'):
        """Reset all units of the given status"""
        for trf in self.transforms:
            trf.resetUnitsByStatus(status)

    def removeUnusedJobs(self):
        """Remove any unused jobs"""
        for trf in self.transforms:
            trf.removeUnusedJobs()
Пример #20
0
class ARC(IBackend):
    '''ARC backend - direct job submission to an ARC CE'''
    _schema = Schema(
        Version(1, 0), {
            'CE':
            SimpleItem(defvalue='', doc='ARC CE endpoint'),
            'jobtype':
            SimpleItem(defvalue='Normal', doc='Job type: Normal, MPICH'),
            'requirements':
            ComponentItem('LCGRequirements',
                          doc='Requirements for the resource selection'),
            'sandboxcache':
            ComponentItem(
                'GridSandboxCache',
                copyable=1,
                doc='Interface for handling oversized input sandbox'),
            'id':
            SimpleItem(defvalue='',
                       typelist=[str, list],
                       protected=1,
                       copyable=0,
                       doc='Middleware job identifier'),
            'status':
            SimpleItem(defvalue='',
                       typelist=[str, dict],
                       protected=1,
                       copyable=0,
                       doc='Middleware job status'),
            'exitcode':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='Application exit code'),
            'exitcode_arc':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='Middleware exit code'),
            'actualCE':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='The ARC CE where the job actually runs.'),
            'reason':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='Reason of causing the job status'),
            'workernode':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='The worker node on which the job actually runs.'),
            'isbURI':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='The input sandbox URI on ARC CE'),
            'osbURI':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       doc='The output sandbox URI on ARC CE'),
            'verbose':
            SimpleItem(defvalue=False,
                       doc='Use verbose options for ARC commands')
        })

    _category = 'backends'

    _name = 'ARC'

    def __init__(self):
        super(ARC, self).__init__()

        # dynamic requirement object loading
        try:
            reqName1 = config['Requirements']
            reqName = config['Requirements'].split('.').pop()
            reqModule = __import__(reqName1, globals(), locals(), [reqName1])
            reqClass = vars(reqModule)[reqName]
            self.requirements = reqClass()

            logger.debug('load %s as LCGRequirements' % reqName)
        except:
            logger.debug('load default LCGRequirements')
            pass

        # dynamic sandbox cache object loading
        # force to use GridftpSandboxCache
        self.sandboxcache = GridftpSandboxCache()
        try:
            scName1 = config['SandboxCache']
            scName = config['SandboxCache'].split('.').pop()
            scModule = __import__(scName1, globals(), locals(), [scName1])
            scClass = vars(scModule)[scName]
            self.sandboxcache = scClass()
            logger.debug('load %s as SandboxCache' % scName)
        except:
            logger.debug('load default SandboxCache')
            pass

    def __refresh_jobinfo__(self, job):
        '''Refresh the lcg jobinfo. It will be called after resubmission.'''
        job.backend.status = ''
        job.backend.reason = ''
        job.backend.actualCE = ''
        job.backend.exitcode = ''
        job.backend.exitcode_arc = ''
        job.backend.workernode = ''
        job.backend.isbURI = ''
        job.backend.osbURI = ''

    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type
                    in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        return True

    def __check_and_prestage_inputfile__(self, file):
        '''Checks the given input file size and if it's size is
           over "BoundSandboxLimit", prestage it to a grid SE.

           The argument is a path of the local file.

           It returns a dictionary containing information to refer to the file:

               idx = {'lfc_host': lfc_host,
                      'local': [the local file pathes],
                      'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... }
                     }

           If prestaging failed, None object is returned.

           If the file has been previously uploaded (according to md5sum),
           the prestaging is ignored and index to the previously uploaded file
           is returned.
           '''

        idx = {'lfc_host': '', 'local': [], 'remote': {}}

        job = self.getJobObject()

        # read-in the previously uploaded files
        uploadedFiles = []

        # getting the uploaded file list from the master job
        if job.master:
            uploadedFiles += job.master.backend.sandboxcache.get_cached_files()

        # set and get the $LFC_HOST for uploading oversized sandbox
        self.__setup_sandboxcache__(job)

        uploadedFiles += self.sandboxcache.get_cached_files()

        lfc_host = None

        # for LCGSandboxCache, take the one specified in the sansboxcache object.
        # the value is exactly the same as the one from the local grid shell env. if
        # it is not specified exclusively.
        if self.sandboxcache._name == 'LCGSandboxCache':
            lfc_host = self.sandboxcache.lfc_host

        # or in general, query it from the Grid object
        if not lfc_host:
            lfc_host = Grid.__get_lfc_host__()

        idx['lfc_host'] = lfc_host

        abspath = os.path.abspath(file)
        fsize = os.path.getsize(abspath)

        if fsize > config['BoundSandboxLimit']:

            md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True)

            doUpload = True
            for uf in uploadedFiles:
                if uf.md5sum == md5sum:
                    # the same file has been uploaded to the iocache
                    idx['remote'][os.path.basename(file)] = uf.id
                    doUpload = False
                    break

            if doUpload:

                logger.warning(
                    'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...'
                    % (file, config['BoundSandboxLimit']))

                if self.sandboxcache.upload([abspath]):
                    remote_sandbox = self.sandboxcache.get_cached_files()[-1]
                    idx['remote'][remote_sandbox.name] = remote_sandbox.id
                else:
                    logger.error(
                        'Oversized sandbox not successfully pre-staged')
                    return None
        else:
            idx['local'].append(abspath)

        return idx

    def __mt_job_prepare__(self, rjobs, subjobconfigs, masterjobconfig):
        '''preparing jobs in multiple threads'''

        logger.warning('preparing %d subjobs ... it may take a while' %
                       len(rjobs))

        # prepare the master job (i.e. create shared inputsandbox, etc.)
        master_input_sandbox = IBackend.master_prepare(self, masterjobconfig)

        # uploading the master job if it's over the WMS sandbox limitation
        for f in master_input_sandbox:
            master_input_idx = self.__check_and_prestage_inputfile__(f)

            if not master_input_idx:
                logger.error('master input sandbox perparation failed: %s' % f)
                return None

        # the algorithm for preparing a single bulk job
        class MyAlgorithm(Algorithm):
            def __init__(self):
                Algorithm.__init__(self)

            def process(self, sj_info):
                my_sc = sj_info[0]
                my_sj = sj_info[1]

                try:
                    logger.debug("preparing job %s" % my_sj.getFQID('.'))
                    jdlpath = my_sj.backend.preparejob(my_sc,
                                                       master_input_sandbox)

                    if (not jdlpath) or (not os.path.exists(jdlpath)):
                        raise GangaException('job %s not properly prepared' %
                                             my_sj.getFQID('.'))

                    self.__appendResult__(my_sj.id, jdlpath)
                    return True
                except Exception as x:
                    log_user_exception()
                    return False

        mt_data = []
        for sc, sj in zip(subjobconfigs, rjobs):
            mt_data.append([sc, sj])

        myAlg = MyAlgorithm()
        myData = Data(collection=mt_data)

        runner = MTRunner(name='lcg_jprepare',
                          algorithm=myAlg,
                          data=myData,
                          numThread=10)
        runner.start()
        runner.join(-1)

        if len(runner.getDoneList()) < len(mt_data):
            return None
        else:
            # return a JDL file dictionary with subjob ids as keys, JDL file
            # paths as values
            return runner.getResults()

    def __mt_bulk_submit__(self, node_jdls):
        '''submitting jobs in multiple threads'''

        job = self.getJobObject()

        logger.warning('submitting %d subjobs ... it may take a while' %
                       len(node_jdls))

        # the algorithm for submitting a single bulk job
        class MyAlgorithm(Algorithm):
            def __init__(self, masterInputWorkspace, ce, arcverbose):
                Algorithm.__init__(self)
                self.inpw = masterInputWorkspace
                self.ce = ce
                self.arcverbose = arcverbose

            def process(self, jdl_info):
                my_sj_id = jdl_info[0]
                my_sj_jdl = jdl_info[1]

                #my_sj_jid = self.gridObj.arc_submit(my_sj_jdl, self.ce, self.verbose)
                my_sj_jid = Grid.arc_submit(my_sj_jdl, self.ce,
                                            self.arcverbose)

                if not my_sj_jid:
                    return False
                else:
                    self.__appendResult__(my_sj_id, my_sj_jid)
                    return True

        mt_data = []
        for id, jdl in node_jdls.items():
            mt_data.append((id, jdl))

        myAlg = MyAlgorithm(masterInputWorkspace=job.getInputWorkspace(),
                            ce=self.CE,
                            arcverbose=self.verbose)
        myData = Data(collection=mt_data)

        runner = MTRunner(name='arc_jsubmit',
                          algorithm=myAlg,
                          data=myData,
                          numThread=config['SubmissionThread'])
        runner.start()
        runner.join(timeout=-1)

        if len(runner.getDoneList()) < len(mt_data):
            # not all bulk jobs are successfully submitted. canceling the
            # submitted jobs on WMS immediately
            logger.error(
                'some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS'
            )
            Grid.arc_cancelMultiple(runner.getResults().values())
            return None
        else:
            return runner.getResults()

    def __jobWrapperTemplate__(self):
        '''Create job wrapper'''

        script = """#!/usr/bin/env python
#-----------------------------------------------------
# This job wrapper script is automatically created by
# GANGA LCG backend handler.
#
# It controls:
# 1. unpack input sandbox
# 2. invoke application executable
# 3. invoke monitoring client
#-----------------------------------------------------
import os,os.path,shutil,tempfile
import sys,popen2,time,traceback

#bugfix #36178: subprocess.py crashes if python 2.5 is used
#try to import subprocess from local python installation before an
#import from PYTHON_DIR is attempted some time later
try:
    import subprocess
except ImportError:
    pass

## Utility functions ##
def timeString():
    return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time()))

def printInfo(s):
    out.write(timeString() + '  [Info]' +  ' ' + str(s) + os.linesep)
    out.flush()

def printError(s):
    out.write(timeString() + ' [Error]' +  ' ' + str(s) + os.linesep)
    out.flush()

def lcg_file_download(vo,guid,localFilePath,timeout=60,maxRetry=3):
    cmd = 'lcg-cp -t %d --vo %s %s file://%s' % (timeout,vo,guid,localFilePath)

    printInfo('LFC_HOST set to %s' % os.environ['LFC_HOST'])
    printInfo('lcg-cp timeout: %d' % timeout)

    i         = 0
    rc        = 0
    isDone    = False
    try_again = True

    while try_again:
        i = i + 1
        try:
            ps = os.popen(cmd)
            status = ps.close()

            if not status:
                isDone = True
                printInfo('File %s download from iocache' % os.path.basename(localFilePath))
            else:
                raise IOError("Download file %s from iocache failed with error code: %d, trial %d." % (os.path.basename(localFilePath), status, i))

        except IOError as e:
            isDone = False
            printError(str(e))

        if isDone:
            try_again = False
        elif i == maxRetry:
            try_again = False
        else:
            try_again = True

    return isDone

## system command executor with subprocess
def execSyscmdSubprocess(cmd, wdir=os.getcwd()):

    import os, subprocess

    global exitcode

    outfile   = file('stdout','w')
    errorfile = file('stderr','w')

    try:
        child = subprocess.Popen(cmd, cwd=wdir, shell=True, stdout=outfile, stderr=errorfile)

        while 1:
            exitcode = child.poll()
            if exitcode is not None:
                break
            else:
                outfile.flush()
                errorfile.flush()
                time.sleep(0.3)
    finally:
        pass

    outfile.flush()
    errorfile.flush()
    outfile.close()
    errorfile.close()

    return True

## system command executor with multi-thread
## stderr/stdout handler
def execSyscmdEnhanced(cmd, wdir=os.getcwd()):

    import os, threading

    cwd = os.getcwd()

    isDone = False

    try:
        ## change to the working directory
        os.chdir(wdir)

        child = popen2.Popen3(cmd,1)
        child.tochild.close() # don't need stdin

        class PipeThread(threading.Thread):

            def __init__(self,infile,outfile,stopcb):
                self.outfile = outfile
                self.infile = infile
                self.stopcb = stopcb
                self.finished = 0
                threading.Thread.__init__(self)

            def run(self):
                stop = False
                while not stop:
                    buf = self.infile.read(10000)
                    self.outfile.write(buf)
                    self.outfile.flush()
                    time.sleep(0.01)
                    stop = self.stopcb()
                #FIXME: should we do here?: self.infile.read()
                #FIXME: this is to make sure that all the output is read (if more than buffer size of output was produced)
                self.finished = 1

        def stopcb(poll=False):
            global exitcode
            if poll:
                exitcode = child.poll()
            return exitcode != -1

        out_thread = PipeThread(child.fromchild, sys.stdout, stopcb)
        err_thread = PipeThread(child.childerr, sys.stderr, stopcb)

        out_thread.start()
        err_thread.start()
        while not out_thread.finished and not err_thread.finished:
            stopcb(True)
            time.sleep(0.3)

        sys.stdout.flush()
        sys.stderr.flush()

        isDone = True

    except(Exception,e):
        isDone = False

    ## return to the original directory
    os.chdir(cwd)

    return isDone

############################################################################################

###INLINEMODULES###

############################################################################################

## Main program ##

outputsandbox = ###OUTPUTSANDBOX###
input_sandbox = ###INPUTSANDBOX###
wrapperlog = ###WRAPPERLOG###
appexec = ###APPLICATIONEXEC###
appargs = ###APPLICATIONARGS###
appenvs = ###APPLICATIONENVS###
timeout = ###TRANSFERTIMEOUT###

exitcode=-1

import sys, stat, os, os.path, commands

# Change to scratch directory if provided
scratchdir = ''
tmpdir = ''

orig_wdir = os.getcwd()

# prepare log file for job wrapper
out = open(os.path.join(orig_wdir, wrapperlog),'w')

if os.getenv('EDG_WL_SCRATCH'):
    scratchdir = os.getenv('EDG_WL_SCRATCH')
elif os.getenv('TMPDIR'):
    scratchdir = os.getenv('TMPDIR')

if scratchdir:
    (status, tmpdir) = commands.getstatusoutput('mktemp -d %s/gangajob_XXXXXXXX' % (scratchdir))
    if status == 0:
        os.chdir(tmpdir)
    else:
        ## if status != 0, tmpdir should contains error message so print it to stderr
        printError('Error making ganga job scratch dir: %s' % tmpdir)
        printInfo('Unable to create ganga job scratch dir in %s. Run directly in: %s' % ( scratchdir, os.getcwd() ) )

        ## reset scratchdir and tmpdir to disable the usage of Ganga scratch dir
        scratchdir = ''
        tmpdir = ''

wdir = os.getcwd()

if scratchdir:
    printInfo('Changed working directory to scratch directory %s' % tmpdir)
    try:
        os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stdout'), os.path.join(wdir, 'stdout')))
        os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stderr'), os.path.join(wdir, 'stderr')))
    except Exception as e:
        printError(sys.exc_info()[0])
        printError(sys.exc_info()[1])
        str_traceback = traceback.format_tb(sys.exc_info()[2])
        for str_tb in str_traceback:
            printError(str_tb)
        printInfo('Linking stdout & stderr to original directory failed. Looking at stdout during job run may not be possible')

os.environ['PATH'] = '.:'+os.environ['PATH']

vo = os.environ['GANGA_LCG_VO']

try:
    printInfo('Job Wrapper start.')

#   download inputsandbox from remote cache
    for f,guid in input_sandbox['remote'].iteritems():
        if not lcg_file_download(vo, guid, os.path.join(wdir,f), timeout=int(timeout)):
            raise IOError('Download remote input %s:%s failed.' % (guid,f) )
        else:
            if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']:
                getPackedInputSandbox(f)
            else:
                shutil.copy(f, os.path.join(os.getcwd(), os.path.basename(f)))

    printInfo('Download inputsandbox from iocache passed.')

#   unpack inputsandbox from wdir
    for f in input_sandbox['local']:
        if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']:
            getPackedInputSandbox(os.path.join(orig_wdir,f))

    printInfo('Unpack inputsandbox passed.')

    #get input files
    ###DOWNLOADINPUTFILES###

    printInfo('Loading Python modules ...')

    sys.path.insert(0,os.path.join(wdir,PYTHON_DIR))

    # check the python library path
    try:
        printInfo(' ** PYTHON_DIR: %s' % os.environ['PYTHON_DIR'])
    except KeyError:
        pass

    try:
        printInfo(' ** PYTHONPATH: %s' % os.environ['PYTHONPATH'])
    except KeyError:
        pass

    for lib_path in sys.path:
        printInfo(' ** sys.path: %s' % lib_path)

#   execute application

    ## convern appenvs into environment setup script to be 'sourced' before executing the user executable

    printInfo('Prepare environment variables for application executable')

    env_setup_script = os.path.join(os.getcwd(), '__ganga_lcg_env__.sh')

    f = open( env_setup_script, 'w')
    f.write('#!/bin/sh' + os.linesep )
    f.write('##user application environmet setup script generated by Ganga job wrapper' + os.linesep)
    for k,v in appenvs.items():

        str_env = 'export %s="%s"' % (k, v)

        printInfo(' ** ' + str_env)
        
        f.write(str_env + os.linesep)
    f.close()

    try: #try to make shipped executable executable
        os.chmod('%s/%s'% (wdir,appexec),stat.S_IXUSR|stat.S_IRUSR|stat.S_IWUSR)
    except:
        pass

    status = False
    try:
        # use subprocess to run the user's application if the module is available on the worker node
        import subprocess
        printInfo('Load application executable with subprocess module')
        status = execSyscmdSubprocess('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir)
    except ImportError as err:
        # otherwise, use separate threads to control process IO pipes
        printInfo('Load application executable with separate threads')
        status = execSyscmdEnhanced('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir)

    os.system("cp %s/stdout stdout.1" % orig_wdir)
    os.system("cp %s/stderr stderr.1" % orig_wdir)

    printInfo('GZipping stdout and stderr...')

    os.system("gzip stdout.1 stderr.1")

    # move them to the original wdir so they can be picked up
    os.system("mv stdout.1.gz %s/stdout.gz" % orig_wdir)
    os.system("mv stderr.1.gz %s/stderr.gz" % orig_wdir)

    if not status:
        raise OSError('Application execution failed.')
    printInfo('Application execution passed with exit code %d.' % exitcode)      

    ###OUTPUTUPLOADSPOSTPROCESSING###

    for f in os.listdir(os.getcwd()):
        command = "cp %s %s" % (os.path.join(os.getcwd(),f), os.path.join(orig_wdir,f))
        os.system(command)            

    createPackedOutputSandbox(outputsandbox,None,orig_wdir)

#   pack outputsandbox
#    printInfo('== check output ==')
#    for line in os.popen('pwd; ls -l').readlines():
#        printInfo(line)

    printInfo('Pack outputsandbox passed.')

    # Clean up after us - All log files and packed outputsandbox should be in "wdir"
    if scratchdir:
        os.chdir(orig_wdir)
        os.system("rm %s -rf" % wdir)
except Exception as e:
    printError(sys.exc_info()[0])
    printError(sys.exc_info()[1])
    str_traceback = traceback.format_tb(sys.exc_info()[2])
    for str_tb in str_traceback:
        printError(str_tb)

printInfo('Job Wrapper stop.')

out.close()

# always return exit code 0 so the in the case of application failure
# one can always get stdout and stderr back to the UI for debug.
sys.exit(0)
"""
        return script

    def preparejob(self, jobconfig, master_job_sandbox):
        '''Prepare the JDL'''

        script = self.__jobWrapperTemplate__()

        job = self.getJobObject()
        inpw = job.getInputWorkspace()

        wrapperlog = '__jobscript__.log'

        import Ganga.Core.Sandbox as Sandbox

        # FIXME: check what happens if 'stdout','stderr' are specified here
        script = script.replace('###OUTPUTSANDBOX###',
                                repr(jobconfig.outputbox))

        script = script.replace('###APPLICATION_NAME###',
                                getName(job.application))
        script = script.replace('###APPLICATIONEXEC###',
                                repr(jobconfig.getExeString()))
        script = script.replace('###APPLICATIONARGS###',
                                repr(jobconfig.getArguments()))

        from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles

        script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###',
                                getWNCodeForOutputPostprocessing(job, '    '))

        script = script.replace('###DOWNLOADINPUTFILES###',
                                getWNCodeForDownloadingInputFiles(job, '    '))

        if jobconfig.env:
            script = script.replace('###APPLICATIONENVS###',
                                    repr(jobconfig.env))
        else:
            script = script.replace('###APPLICATIONENVS###', repr({}))

        script = script.replace('###WRAPPERLOG###', repr(wrapperlog))
        import inspect
        script = script.replace('###INLINEMODULES###',
                                inspect.getsource(Sandbox.WNSandbox))

        mon = job.getMonitoringService()

        self.monInfo = None

        # set the monitoring file by default to the stdout
        if isinstance(self.monInfo, dict):
            self.monInfo['remotefile'] = 'stdout'

        # try to print out the monitoring service information in debug mode
        try:
            logger.debug('job info of monitoring service: %s' %
                         str(self.monInfo))
        except:
            pass

#       prepare input/output sandboxes
        import Ganga.Utility.files
        from Ganga.GPIDev.Lib.File import File
        from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR
        import inspect

        fileutils = File(inspect.getsourcefile(Ganga.Utility.files),
                         subdir=PYTHON_DIR)
        packed_files = jobconfig.getSandboxFiles() + [fileutils]
        sandbox_files = job.createPackedInputSandbox(packed_files)

        # sandbox of child jobs should include master's sandbox
        sandbox_files.extend(master_job_sandbox)

        # check the input file size and pre-upload larger inputs to the iocache
        lfc_host = ''

        input_sandbox_uris = []
        input_sandbox_names = []

        ick = True

        max_prestaged_fsize = 0
        for f in sandbox_files:

            idx = self.__check_and_prestage_inputfile__(f)

            if not idx:
                logger.error('input sandbox preparation failed: %s' % f)
                ick = False
                break
            else:

                if idx['lfc_host']:
                    lfc_host = idx['lfc_host']

                if idx['remote']:
                    abspath = os.path.abspath(f)
                    fsize = os.path.getsize(abspath)

                    if fsize > max_prestaged_fsize:
                        max_prestaged_fsize = fsize

                    input_sandbox_uris.append(
                        idx['remote'][os.path.basename(f)])

                    input_sandbox_names.append(os.path.basename(
                        urlparse(f)[2]))

                if idx['local']:
                    input_sandbox_uris += idx['local']
                    input_sandbox_names.append(os.path.basename(f))

        if not ick:
            logger.error('stop job submission')
            return None

        # determin the lcg-cp timeout according to the max_prestaged_fsize
        # - using the assumption of 1 MB/sec.
        max_prestaged_fsize = 0
        lfc_host = ''
        transfer_timeout = config['SandboxTransferTimeout']
        predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0))

        if predict_timeout > transfer_timeout:
            transfer_timeout = predict_timeout

        if transfer_timeout < 60:
            transfer_timeout = 60

        script = script.replace('###TRANSFERTIMEOUT###',
                                '%d' % transfer_timeout)

        # update the job wrapper with the inputsandbox list
        script = script.replace(
            '###INPUTSANDBOX###',
            repr({
                'remote': {},
                'local': input_sandbox_names
            }))

        # write out the job wrapper and put job wrapper into job's inputsandbox
        scriptPath = inpw.writefile(FileBuffer(
            '__jobscript_%s__' % job.getFQID('.'), script),
                                    executable=1)
        input_sandbox = input_sandbox_uris + [scriptPath]

        for isb in input_sandbox:
            logger.debug('ISB URI: %s' % isb)

        # compose output sandbox to include by default the following files:
        # - gzipped stdout (transferred only when the JobLogHandler is WMS)
        # - gzipped stderr (transferred only when the JobLogHandler is WMS)
        # - __jobscript__.log (job wrapper's log)
        output_sandbox = [wrapperlog]

        from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns
        for outputSandboxPattern in getOutputSandboxPatterns(job):
            output_sandbox.append(outputSandboxPattern)

        if config['JobLogHandler'] in ['WMS']:
            output_sandbox += ['stdout.gz', 'stderr.gz']

        if len(jobconfig.outputbox):
            output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME]

        # compose ARC XRSL
        xrsl = {
            #'VirtualOrganisation' : config['VirtualOrganisation'],
            'executable': os.path.basename(scriptPath),
            'environment': {
                'GANGA_LCG_VO': config['VirtualOrganisation'],
                'GANGA_LOG_HANDLER': config['JobLogHandler'],
                'LFC_HOST': lfc_host
            },
            #'stdout'                : 'stdout',
            #'stderr'                : 'stderr',
            'inputFiles': input_sandbox,
            'outputFiles': output_sandbox,
            #'OutputSandboxBaseDestURI': 'gsiftp://localhost'
        }

        xrsl['environment'].update({'GANGA_LCG_CE': self.CE})
        #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert()

        # if self.jobtype.upper() in ['NORMAL','MPICH']:
        #xrsl['JobType'] = self.jobtype.upper()
        # if self.jobtype.upper() == 'MPICH':
        #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)')
        # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)')
        #xrsl['NodeNumber'] = self.requirements.nodenumber
        # else:
        #    logger.warning('JobType "%s" not supported' % self.jobtype)
        #    return

        #       additional settings from the job
        if jobconfig.env:
            xrsl['environment'].update(jobconfig.env)

        xrslText = Grid.expandxrsl(xrsl)

        # append any additional requirements from the requirements object
        xrslText += '\n'.join(self.requirements.other)

        logger.debug('subjob XRSL: %s' % xrslText)
        return inpw.writefile(FileBuffer('__xrslfile__', xrslText))

    def kill(self):
        '''Kill the job'''
        job = self.getJobObject()

        logger.info('Killing job %s' % job.getFQID('.'))

        if not self.id:
            logger.warning('Job %s is not running.' % job.getFQID('.'))
            return False

        return Grid.arc_cancel([self.id])

    def master_kill(self):
        '''kill the master job to the grid'''

        job = self.getJobObject()

        if not job.master and len(job.subjobs) == 0:
            return IBackend.master_kill(self)
        elif job.master:
            return IBackend.master_kill(self)
        else:
            return self.master_bulk_kill()

    def master_bulk_kill(self):
        '''GLITE bulk resubmission'''

        job = self.getJobObject()

        # killing the individually re-submitted subjobs
        logger.debug('cancelling running/submitted subjobs.')

        # 1. collect job ids
        ids = []
        for sj in job.subjobs:
            if sj.status in ['submitted', 'running'] and sj.backend.id:
                ids.append(sj.backend.id)

        # 2. cancel the collected jobs
        ck = Grid.arc_cancelMultiple(ids)
        if not ck:
            logger.warning('Job cancellation failed')
            return False
        else:
            for sj in job.subjobs:
                if sj.backend.id in ids:
                    sj.updateStatus('killed')

            return True

    def master_bulk_submit(self, rjobs, subjobconfigs, masterjobconfig):
        '''submit multiple subjobs in parallel, by default using 10 concurrent threads'''

        from Ganga.Utility.logic import implies
        assert (implies(rjobs, len(subjobconfigs) == len(rjobs)))

        # prepare the subjobs, jdl repository before bulk submission
        node_jdls = self.__mt_job_prepare__(rjobs, subjobconfigs,
                                            masterjobconfig)

        if not node_jdls:
            logger.error('Some jobs not successfully prepared')
            return False

        # set all subjobs to submitting status
        for sj in rjobs:
            sj.updateStatus('submitting')

        node_jids = self.__mt_bulk_submit__(node_jdls)

        status = False

        if node_jids:
            for sj in rjobs:
                if sj.id in node_jids.keys():
                    sj.backend.id = node_jids[sj.id]
                    sj.backend.CE = self.CE
                    sj.backend.actualCE = sj.backend.CE
                    sj.updateStatus('submitted')
                    sj.info.submit_counter += 1
                else:
                    logger.warning('subjob %s not successfully submitted' %
                                   sj.getFQID('.'))

            status = True

        return status

    def master_bulk_resubmit(self, rjobs):
        '''ARC bulk resubmission'''

        from Ganga.Utility.logging import log_user_exception

        #        job = self.getJobObject()

        # compose master JDL for collection job
        node_jdls = {}
        for sj in rjobs:
            jdlpath = os.path.join(sj.inputdir, '__jdlfile__')
            node_jdls[sj.id] = jdlpath

        # set all subjobs to submitting status
        for sj in rjobs:
            sj.updateStatus('submitting')

        node_jids = self.__mt_bulk_submit__(node_jdls)

        status = False

        if node_jids:
            for sj in rjobs:
                if sj.id in node_jids.keys():
                    self.__refresh_jobinfo__(sj)
                    sj.backend.id = node_jids[sj.id]
                    sj.backend.CE = self.CE
                    sj.backend.actualCE = sj.backend.CE
                    sj.updateStatus('submitted')
                    sj.info.submit_counter += 1
                else:
                    logger.warning('subjob %s not successfully submitted' %
                                   sj.getFQID('.'))

            status = True


#            # set all subjobs to submitted status
#            # NOTE: this is just a workaround to avoid the unexpected transition
#            #       that turns the master job's status from 'submitted' to 'submitting'.
#            #       As this transition should be allowed to simulate a lock mechanism in Ganga 4, the workaround
#            #       is to set all subjobs' status to 'submitted' so that the transition can be avoided.
#            #       A more clear solution should be implemented with the lock mechanism introduced in Ganga 5.
#            for sj in rjobs:
#                sj.updateStatus('submitted')
#                sj.info.submit_counter += 1

        return status

    def master_submit(self, rjobs, subjobconfigs, masterjobconfig):
        '''Submit the master job to the grid'''

        profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG'))
        profiler.start()

        job = self.getJobObject()

        # finding ARC CE endpoint for job submission
        #allowed_celist = []
        # try:
        #    allowed_celist = self.requirements.getce()
        #    if not self.CE and allowed_celist:
        #        self.CE = allowed_celist[0]
        # except:
        #    logger.warning('ARC CE assigment from ARCRequirements failed.')

        # if self.CE and allowed_celist:
        #    if self.CE not in allowed_celist:
        #        logger.warning('submission to CE not allowed: %s, use %s instead' % ( self.CE, allowed_celist[0] ) )
        #        self.CE = allowed_celist[0]

        # use arc info to check for any endpoints recorded in the config file
        rc, output = Grid.arc_info()

        if not self.CE and rc != 0:
            raise GangaException(
                "ARC CE endpoint not set and no default settings in '%s'. " %
                config['ArcConfigFile'])
        elif self.CE:
            logger.info('ARC CE endpoint set to: ' + str(self.CE))
        else:
            logger.info("Using ARC CE endpoints defined in '%s'" %
                        config['ArcConfigFile'])

        # doing massive job preparation
        if len(job.subjobs) == 0:
            ick = IBackend.master_submit(self, rjobs, subjobconfigs,
                                         masterjobconfig)
        else:
            ick = self.master_bulk_submit(rjobs, subjobconfigs,
                                          masterjobconfig)

        profiler.check('==> master_submit() elapsed time')

        return ick

    def submit(self, subjobconfig, master_job_sandbox):
        '''Submit the job to the grid'''

        ick = False

        xrslpath = self.preparejob(subjobconfig, master_job_sandbox)

        if xrslpath:
            self.id = Grid.arc_submit(xrslpath, self.CE, self.verbose)

            if self.id:
                self.actualCE = self.CE
                ick = True

        return ick

    def master_auto_resubmit(self, rjobs):
        """
        Resubmit each subjob individually as bulk resubmission will overwrite
        previous master job statuses
        """

        # check for master failure - in which case bulk resubmit
        mj = self._getParent()
        if mj.status == 'failed':
            return self.master_resubmit(rjobs)

        for j in rjobs:
            if not j.backend.master_resubmit([j]):
                return False

        return True

    def master_resubmit(self, rjobs):
        '''Resubmit the master job to the grid'''

        profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG'))
        profiler.start()

        job = self.getJobObject()

        ick = False

        if not job.master and len(job.subjobs) == 0:
            # case 1: master job normal resubmission
            logger.debug('rjobs: %s' % str(rjobs))
            logger.debug('mode: master job normal resubmission')
            ick = IBackend.master_resubmit(self, rjobs)

        elif job.master:
            # case 2: individual subjob resubmission
            logger.debug('mode: individual subjob resubmission')
            ick = IBackend.master_resubmit(self, rjobs)

        else:
            # case 3: master job bulk resubmission
            logger.debug('mode: master job resubmission')

            ick = self.master_bulk_resubmit(rjobs)
            if not ick:
                raise GangaException('ARC bulk submission failure')

        profiler.check('job re-submission elapsed time')

        return ick

    def resubmit(self):
        '''Resubmit the job'''

        ick = False

        job = self.getJobObject()

        jdlpath = job.getInputWorkspace().getPath("__jdlfile__")

        if jdlpath:
            self.id = Grid.arc_submit(jdlpath, self.CE, self.verbose)

            if self.id:
                # refresh the lcg job information
                self.__refresh_jobinfo__(job)
                self.actualCE = self.CE
                ick = True

        return ick

    @staticmethod
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        import datetime

        backenddict = {}
        jobdict = {}
        for j in jobs:
            if j.backend.id and (
                (datetime.datetime.utcnow() - j.time.timestamps["submitted"]
                 ).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]):
                jobdict[j.backend.id] = j
                backenddict[j.backend.actualCE] = j

        if len(jobdict.keys()) == 0:
            return

        jobInfoDict = Grid.arc_status(jobdict.keys(), backenddict.keys())
        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.actualCE != urlparse(id)[1].split(":")[0]:
                    job.backend.actualCE = urlparse(id)[1].split(":")[0]

                if job.backend.status != info['State']:

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['State'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['State'] in [
                            'Finished', '(FINISHED)', 'Finished (FINISHED)'
                    ]:

                        # grab output sandbox
                        if Grid.arc_get_output(
                                job.backend.id,
                                job.getOutputWorkspace(create=True).getPath()):
                            (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                job.getOutputWorkspace(create=True).getPath())
                            job.backend.exitcode = app_exitcode

                            jidListForPurge.append(job.backend.id)

                        else:
                            logger.error('fail to download job output: %s' %
                                         jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['State']
                        if 'Exit Code' in info:
                            try:
                                job.backend.exitcode_arc = int(
                                    info['Exit Code'])
                            except:
                                job.backend.exitcode_arc = 1

                        if 'Job Error' in info:
                            try:
                                job.backend.reason = info['Job Error']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning('fail to retrieve job informaton: %s' %
                               jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            if not Grid.arc_purgeMultiple(jidListForPurge):
                logger.warning("Failed to purge all ARC jobs.")

    def updateGangaJobStatus(self):
        '''map backend job status to Ganga job status'''

        job = self.getJobObject()

        if self.status.startswith('Running') or self.status.startswith(
                'Finishing'):
            job.updateStatus('running')
        elif self.status.startswith('Finished'):
            if job.backend.exitcode and job.backend.exitcode != 0:
                job.backend.reason = 'non-zero app. exit code: %s' % repr(
                    job.backend.exitcode)
                job.updateStatus('failed')
            elif job.backend.exitcode_arc and job.backend.exitcode_arc != 0:
                job.backend.reason = 'non-zero ARC job exit code: %s' % repr(
                    job.backend.exitcode_arc)
                job.updateStatus('failed')
            else:
                job.updateStatus('completed')

        elif self.status in ['DONE-FAILED', 'ABORTED', 'UNKNOWN', 'Failed']:
            job.updateStatus('failed')

        elif self.status in ['CANCELLED']:
            job.updateStatus('killed')

        elif self.status.startswith('Queuing'):
            pass

        else:
            logger.warning('Unexpected job status "%s"', self.status)
Пример #21
0
class MassStorageFile(IGangaFile):
    """MassStorageFile represents a class marking a file to be written into mass storage (like Castor at CERN)
    """
    _schema = Schema(Version(1, 1), {'namePattern': SimpleItem(defvalue="", doc='pattern of the file name'),
                                     'localDir': SimpleItem(defvalue="", copyable=1, doc='local dir where the file is stored, used from get and put methods'),
                                     'joboutputdir': SimpleItem(defvalue="", doc='outputdir of the job with which the outputsandbox file object is associated'),
                                     'locations': SimpleItem(defvalue=[], copyable=1, typelist=['str'], sequence=1, doc="list of locations where the outputfiles are uploaded"),
                                     'outputfilenameformat': SimpleItem(defvalue=None, typelist=['str', 'type(None)'], protected=0,\
                                                    doc="keyword path to where the output should be uploaded, i.e. /some/path/here/{jid}/{sjid}/{fname},\
                                                        if this field is not set, the output will go in {jid}/{sjid}/{fname} or in {jid}/{fname}\
                                                        depending on whether the job is split or not"                                                                                                     ),
                                     'inputremotedirectory': SimpleItem(defvalue=None, typelist=['str', 'type(None)'], protected=0, doc="Directory on mass storage where the file is stored"),
                                     'subfiles': ComponentItem(category='gangafiles', defvalue=[], hidden=1, typelist=['Ganga.GPIDev.Lib.File.MassStorageFile'], sequence=1, copyable=0,\
                                                    doc="collected files from the wildcard namePattern"),
                                     'failureReason': SimpleItem(defvalue="", protected=1, copyable=0, doc='reason for the upload failure'),
                                     'compressed': SimpleItem(defvalue=False, typelist=['bool'], protected=0, doc='wheather the output file should be compressed before sending somewhere')
                                     })

    _category = 'gangafiles'
    _name = "MassStorageFile"
    _exportmethods = [
        "location", "get", "put", "setLocation", "remove", "accessURL"
    ]

    def __init__(self, namePattern='', localDir='', **kwds):
        """ namePattern is the pattern of the output file that has to be written into mass storage
        """
        super(MassStorageFile, self).__init__()
        self._setNamePath(_namePattern=namePattern, _localDir=localDir)
        self.locations = []

        self.shell = Shell.Shell()

    def __construct__(self, args):
        if len(args) == 1 and isinstance(args[0], str):
            self._setNamePath(args[0], '')
        elif len(args) == 2 and isinstance(args[0], str) and isinstance(
                args[1], str):
            self._setNamePath(args[0], args[1])
        self.locations = []
        self.shell = Shell.Shell()

    def _setNamePath(self, _namePattern='', _localDir=''):
        if _namePattern != '' and _localDir == '':
            import os.path
            self.namePattern = os.path.basename(_namePattern)
            self.localDir = os.path.dirname(_namePattern)
        elif _namePattern != '' and _localDir != '':
            self.namePattern = _namePattern
            self.localDir = _localDir

    def _on_attribute__set__(self, obj_type, attrib_name):
        r = copy.deepcopy(self)
        if getName(obj_type) == 'Job' and attrib_name == 'outputfiles':
            r.locations = []
            r.localDir = ''
            r.failureReason = ''
        return r

    def __repr__(self):
        """Get the representation of the file."""

        return "MassStorageFile(namePattern='%s')" % self.namePattern

    def setLocation(self):
        """
        Sets the location of output files that were uploaded to mass storage from the WN
        """
        job = self.getJobObject()

        postprocessLocationsPath = os.path.join(
            job.outputdir,
            getConfig('Output')['PostProcessLocationsFileName'])
        if not os.path.exists(postprocessLocationsPath):
            return

        def mass_line_processor(line, mass_file):
            lineParts = line.split(' ')
            pattern = lineParts[1]
            outputPath = lineParts[2]
            name = os.path.basename(outputPath).strip('.gz')
            if regex.search(mass_file.namePattern) is not None:
                if outputPath == 'ERROR':
                    logger.error("Failed to upload file to mass storage")
                    logger.error(line[line.find('ERROR') + 5:])
                    d = MassStorageFile(namePattern=pattern)
                    d.compressed = mass_file.compressed
                    d.failureReason = line[line.find('ERROR') + 5:]
                    mass_file.subfiles.append(GPIProxyObjectFactory(d))
                else:
                    d = MassStorageFile(namePattern=name)
                    d.compressed = mass_file.compressed
                    d.outputfilenameformat = mass_file.outputfilenameformat
                    mass_file.subfiles.append(GPIProxyObjectFactory(d))
                    mass_line_processor(line, d)
            elif name == mass_file.namePattern:
                if outputPath == 'ERROR':
                    logger.error("Failed to upload file to mass storage")
                    logger.error(line[line.find('ERROR') + 5:])
                    mass_file.failureReason = line[line.find('ERROR') + 5:]
                    return
                mass_file.locations = [outputPath.strip('\n')]

        for line in open(postprocessLocationsPath, 'r'):

            if line.strip() == '':
                continue

            if line.startswith('massstorage'):
                mass_line_processor(line.strip(), self)

    def location(self):
        """
        Return list with the locations of the post processed files (if they were configured to upload the output somewhere)
        """
        tmpLocations = []
        if self.locations == []:
            if self.subfiles != []:
                for i in self.subfiles:
                    for j in i:
                        tmpLocations.append(j)
        else:
            tmpLocations = self.locations
        return tmpLocations

    def get(self):
        """
        Retrieves locally all files matching this MassStorageFile object pattern
        """

        to_location = self.localDir

        if not os.path.isdir(self.localDir):
            if self._getParent() is not None:
                to_location = self.getJobObject().outputdir
            else:
                logger.error(
                    "%s is not a valid directory.... Please set the localDir attribute"
                    % self.localDir)
                return

        cp_cmd = getConfig(
            'Output')['MassStorageFile']['uploadOptions']['cp_cmd']

        for location in self.locations:
            targetLocation = os.path.join(to_location,
                                          os.path.basename(location))
            os.system('%s %s %s' % (cp_cmd, location, targetLocation))

    def getWNScriptDownloadCommand(self, indent):
        ## FIXME fix me for the situation of multiple files?

        script = """\n

###INDENT###os.system(\'###CP_COMMAND###\')

"""
        cp_cmd = '%s %s .' % (getConfig('Output')['MassStorageFile']
                              ['uploadOptions']['cp_cmd'], self.locations[0])

        replace_dict = {'###INDENT###': indent, '###CP_COMMAND###': cp_cmd}

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def put(self):
        """
        Creates and executes commands for file upload to mass storage (Castor), this method will
        be called on the client
        """
        import glob
        import re

        sourceDir = ''

        # if used as a stand alone object
        if self._getParent() is None:
            if self.localDir == '':
                import os
                _CWD = os.getcwd()
                if os.path.isfile(os.path.join(_CWD, self.namePattern)):
                    sourceDir = _CWD
                else:
                    logger.warning(
                        'localDir attribute is empty, don\'t know from which dir to take the file'
                    )
                    return
            else:
                sourceDir = self.localDir

                (result, message) = self.validate()

                if result == False:
                    logger.warning(message)
                    return

        else:
            job = self.getJobObject()
            sourceDir = job.outputdir

            # if there are subjobs, the put method will be called on every subjob
            # and will upload the resulted output file
            if len(job.subjobs) > 0:
                return

        massStorageConfig = getConfig(
            'Output')['MassStorageFile']['uploadOptions']

        mkdir_cmd = massStorageConfig['mkdir_cmd']
        cp_cmd = massStorageConfig['cp_cmd']
        ls_cmd = massStorageConfig['ls_cmd']
        massStoragePath = massStorageConfig['path']

        # create the last directory (if not exist) from the config path
        import os.path
        pathToDirName = os.path.dirname(massStoragePath)
        dirName = os.path.basename(massStoragePath)

        (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
            '%s %s' % (ls_cmd, pathToDirName))
        if exitcode != 0:
            self.handleUploadFailure(mystderr)
            return

        directoryExists = False
        for directory in mystdout.split('\n'):
            if directory.strip() == dirName:
                directoryExists = True
                break

        if not directoryExists:
            (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
                '%s %s' % (mkdir_cmd, massStoragePath))
            if exitcode != 0:
                self.handleUploadFailure(mystderr)
                return

        # the folder part of self.outputfilenameformat
        folderStructure = ''
        # the file name part of self.outputfilenameformat
        filenameStructure = ''

        if self._getParent() != None:
            jobfqid = self.getJobObject().fqid

            jobid = jobfqid
            subjobid = ''

            if (jobfqid.find('.') > -1):
                jobid = jobfqid.split('.')[0]
                subjobid = jobfqid.split('.')[1]

            if self.outputfilenameformat is None:
                filenameStructure = '{fname}'
                # create jid/sjid directories
                folderStructure = jobid
                if subjobid != '':
                    folderStructure = os.path.join(jobid, subjobid)

            else:
                filenameStructure = os.path.basename(self.outputfilenameformat)
                filenameStructure = filenameStructure.replace('{jid}', jobid)

                folderStructure = os.path.dirname(self.outputfilenameformat)
                folderStructure = folderStructure.replace('{jid}', jobid)

                if subjobid != '':
                    filenameStructure = filenameStructure.replace(
                        '{sjid}', subjobid)
                    folderStructure = folderStructure.replace(
                        '{sjid}', subjobid)
        else:
            if self.outputfilenameformat != None:
                folderStructure = os.path.dirname(self.outputfilenameformat)
                filenameStructure = os.path.basename(self.outputfilenameformat)
            else:
                filenameStructure = '{fname}'

        # create the folder structure
        if folderStructure != '':

            folderStructure = folderStructure.strip('/')
            massStoragePath = os.path.join(massStoragePath, folderStructure)
            command = '%s -p %s' % (mkdir_cmd, massStoragePath)
            (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(command)
            if exitcode != 0:
                self.handleUploadFailure(mystderr)
                return

        # here filenameStructure has replaced jid and sjid if any, and only not
        # replaced keyword is fname
        fileName = self.namePattern
        if self.compressed:
            fileName = '%s.gz' % self.namePattern

        if regex.search(fileName) is not None:
            for currentFile in glob.glob(os.path.join(sourceDir, fileName)):
                finalFilename = filenameStructure.replace(
                    '{fname}', os.path.basename(currentFile))
                (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
                    '%s %s %s' %
                    (cp_cmd, currentFile,
                     os.path.join(massStoragePath, finalFilename)))

                d = MassStorageFile(namePattern=os.path.basename(currentFile))
                d.compressed = self.compressed

                if exitcode != 0:
                    self.handleUploadFailure(mystderr)
                else:
                    logger.info(
                        '%s successfully uploaded to mass storage as %s' %
                        (currentFile,
                         os.path.join(massStoragePath, finalFilename)))
                    d.locations = os.path.join(massStoragePath,
                                               os.path.basename(finalFilename))

                    # Alex removed this as more general approach in job.py after put() is called
                    # remove file from output dir if this object is attached to a job
                    # if self._getParent() != None:
                    #    os.system('rm %s' % os.path.join(sourceDir, currentFile))

                self.subfiles.append(GPIProxyObjectFactory(d))
        else:
            currentFile = os.path.join(sourceDir, fileName)
            finalFilename = filenameStructure.replace(
                '{fname}', os.path.basename(currentFile))
            (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
                '%s %s %s' % (cp_cmd, currentFile,
                              os.path.join(massStoragePath, finalFilename)))
            if exitcode != 0:
                self.handleUploadFailure(mystderr)
            else:
                logger.info('%s successfully uploaded to mass storage as %s' %
                            (currentFile,
                             os.path.join(massStoragePath, finalFilename)))
                location = os.path.join(massStoragePath,
                                        os.path.basename(finalFilename))
                if location not in self.locations:
                    self.locations.append(location)

                # Alex removed this as more general approach in job.py after put() is called
                # remove file from output dir if this object is attached to a job
                # if self._getParent() != None:
                #    os.system('rm %s' % os.path.join(sourceDir, currentFile))

    def validate(self):

        # if the user has set outputfilenameformat, validate for presence of
        # jid, sjid and fname keywords depending on job type - split or
        # non-split
        if self.outputfilenameformat != None:

            searchFor = ['{fname}']
            isJob = False
            isSplitJob = False

            if self._getParent() != None:

                isJob = True

                if stripProxy(self.getJobObject()).master is not None:

                    isSplitJob = True
                    searchFor.append('{sjid}')

            missingKeywords = []

            for item in searchFor:
                if self.outputfilenameformat.find(item) == -1:
                    missingKeywords.append(item)

            if len(missingKeywords):
                return (
                    False,
                    'Error in MassStorageFile.outputfilenameformat field : missing keywords %s '
                    % ','.join(missingKeywords))

            if isSplitJob == False and self.outputfilenameformat.find(
                    '{sjid}') > -1:
                return (
                    False,
                    'Error in MassStorageFile.outputfilenameformat field :  job is non-split, but {\'sjid\'} keyword found'
                )

            if isJob == False and self.outputfilenameformat.find(
                    '{sjid}') > -1:
                return (
                    False,
                    'Error in MassStorageFile.outputfilenameformat field :  no parent job, but {\'sjid\'} keyword found'
                )

            if isJob == False and self.outputfilenameformat.find('{jid}') > -1:
                return (
                    False,
                    'Error in MassStorageFile.outputfilenameformat field :  no parent job, but {\'jid\'} keyword found'
                )

            invalidUnixChars = ['"', ' ']
            test = self.outputfilenameformat.replace('{jid}', 'a').replace(
                '{sjid}', 'b').replace('{fname}', 'c')

            for invalidUnixChar in invalidUnixChars:
                if test.find(invalidUnixChar) > -1:
                    return (
                        False,
                        'Error in MassStorageFile.outputfilenameformat field :  invalid char %s found'
                        % invalidUnixChar)

        return (True, '')

    def handleUploadFailure(self, error):

        self.failureReason = error
        if self._getParent() != None:
            logger.error(
                "Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s"
                % (str(self._getParent().fqid), self.failureReason))
        else:
            logger.error("The file can't be uploaded because of %s" %
                         (self.failureReason))

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip,
                            postProcessLocationsFP):
        """
        Returns script that have to be injected in the jobscript for postprocessing on the WN
        """
        massStorageCommands = []

        massStorageConfig = getConfig(
            'Output')['MassStorageFile']['uploadOptions']

        for outputFile in outputFiles:

            outputfilenameformat = 'None'
            if outputFile.outputfilenameformat != None and outputFile.outputfilenameformat != '':
                outputfilenameformat = outputFile.outputfilenameformat

            massStorageCommands.append([
                'massstorage', outputFile.namePattern, outputfilenameformat,
                massStorageConfig['mkdir_cmd'], massStorageConfig['cp_cmd'],
                massStorageConfig['ls_cmd'], massStorageConfig['path']
            ])

        import inspect
        script_location = os.path.join(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))),
            'scripts/MassStorageFileWNScript.py')

        from Ganga.GPIDev.Lib.File import FileUtils
        script = FileUtils.loadScript(script_location, '###INDENT###')

        jobfqid = self.getJobObject().fqid

        jobid = jobfqid
        subjobid = ''

        if (jobfqid.find('.') > -1):
            jobid = jobfqid.split('.')[0]
            subjobid = jobfqid.split('.')[1]

        replace_dict = {
            '###MASSSTORAGECOMMANDS###': repr(massStorageCommands),
            '###PATTERNSTOZIP###': str(patternsToZip),
            '###INDENT###': indent,
            '###POSTPROCESSLOCATIONSFP###': postProcessLocationsFP,
            '###FULLJOBDIR###': str(jobfqid.replace('.', os.path.sep)),
            '###JOBDIR###': str(jobid),
            '###SUBJOBDIR###': str(subjobid)
        }

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def processWildcardMatches(self):
        if self.subfiles:
            return self.subfiles

        from fnmatch import fnmatch

        if regex.search(self.namePattern):
            ls_cmd = getConfig(
                'Output')['MassStorageFile']['uploadOptions']['ls_cmd']
            exitcode, output, m = self.shell.cmd1(ls_cmd + ' ' +
                                                  self.inputremotedirectory,
                                                  capture_stderr=True)

            for filename in output.split('\n'):
                if fnmatch(filename, self.namePattern):
                    subfile = MassStorageFile(namePattern=filename)
                    subfile.inputremotedirectory = self.inputremotedirectory

                    self.subfiles.append(GPIProxyObjectFactory(subfile))

    def remove(self, force=False, removeLocal=False):
        """
        Removes file from remote storage ONLY by default
        """
        massStorageConfig = getConfig(
            'Output')['MassStorageFile']['uploadOptions']
        rm_cmd = massStorageConfig['rm_cmd']

        if force == True:
            _auto_delete = True
        else:
            _auto_delete = False

        for i in self.locations:

            if not _auto_delete:

                keyin = None

                while keyin is None:
                    keyin = raw_input(
                        "Do you want to delete file %s at Location: %s ? [y/n] "
                        % (str(self.namePattern), str(i)))
                    if keyin == 'y':
                        _delete_this = True
                    elif keyin == 'n':
                        _delete_this = False
                    else:
                        logger.warning("y/n please!")
                        keyin = None
            else:
                _delete_this = True

            if _delete_this:
                logger.info("Deleting File at Location: %s")
                self.execSyscmdSubprocess('%s %s' % (rm_cmd, i))
                self.locations.pop(i)

        if removeLocal:

            sourceDir = ''
            if self.localDir == '':
                import os
                _CWD = os.getcwd()
                if os.path.isfile(os.path.join(_CWD, self.namePattern)):
                    sourceDir = _CWD
            else:
                sourceDir = self.localDir

            _localFile = os.path.join(sourceDir, self.namePattern)

            if os.path.isfile(_localFile):

                if force:
                    _actual_delete = True
                else:

                    keyin = None
                    while keyin is None:
                        keyin = raw_input(
                            "Do you want to remove the local File: %s ? ([y]/n) "
                            % str(_localFile))
                        if keyin in ['y', '']:
                            _actual_delete = True
                        elif keyin == 'n':
                            _actual_delete = False
                        else:
                            logger.warning("y/n please!")
                            keyin = None

                if _actual_delete:
                    import time
                    remove_filename = _localFile + "_" + str(
                        time.time()) + '__to_be_deleted_'

                    try:
                        os.rename(_localFile, remove_filename)
                    except OSError as err:
                        logger.warning(
                            "Error in first stage of removing file: %s" %
                            remove_filename)
                        remove_filename = _localFile

                    try:
                        os.remove(remove_filename)
                    except OSError as err:
                        if err.errno != errno.ENOENT:
                            logger.error("Error in removing file: %s" %
                                         str(remove_filename))
                            raise
                        pass
        return

    def accessURL(self):

        # Need to come up with a prescription based upon the server address and
        # file on EOS or elsewhere to return a full URL which we can pass to
        # ROOT...

        protoPath = getConfig('Output')['MassStorageFile']['defaultProtocol']

        myLocations = self.location()

        accessURLs = []

        for file in myLocations:
            import os
            accessURLs.append(protoPath + os.path.join(os.sep, file))

        return accessURLs
Пример #22
0
class Transform(GangaObject):
    _schema = Schema(
        Version(1, 0), {
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       copyable=0,
                       doc='Status - running, pause or completed',
                       typelist=["str"]),
            'name':
            SimpleItem(defvalue='Simple Transform',
                       doc='Name of the transform (cosmetic)',
                       typelist=["str"]),
            'application':
            ComponentItem(
                'applications',
                defvalue=None,
                optional=1,
                load_default=False,
                filter="checkTaskApplication",
                doc=
                'Application of the Transform. Must be a Task-Supporting application.'
            ),
            'inputsandbox':
            FileItem(defvalue=[],
                     typelist=['str', 'Ganga.GPIDev.Lib.File.File.File'],
                     sequence=1,
                     doc="list of File objects shipped to the worker node "),
            'outputsandbox':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                doc="list of filenames or patterns shipped from the worker node"
            ),
            'inputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Input dataset'),
            'outputdata':
            ComponentItem('datasets',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Output dataset'),
            'backend':
            ComponentItem('backends',
                          defvalue=None,
                          optional=1,
                          load_default=False,
                          doc='Backend of the Transform.'),
            'run_limit':
            SimpleItem(
                defvalue=4,
                doc='Number of times a partition is tried to be processed.',
                protected=1,
                typelist=["int"]),
            '_partition_status':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map (only necessary) partitions to their status'),
            '_app_partition':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map of applications to partitions'),
            '_app_status':
            SimpleItem(defvalue={},
                       hidden=1,
                       copyable=0,
                       doc='Map of applications to status'),
            '_next_app_id':
            SimpleItem(defvalue=0,
                       hidden=1,
                       copyable=0,
                       doc='Next ID used for the application',
                       typelist=["int"]),
        })

    _category = 'transforms'
    _name = 'Transform'
    _exportmethods = [
        'run',
        'pause',  # Operations
        'setPartitionStatus',
        'setRunlimit',
        'setFailed',  # Control Partitions
        'getPartitionStatus',
        'getJobs',
        'getPartitionJobs',
        # Info
        'overview',
        'info',
        'n_all',
        'n_status',
        'retryFailed'
    ]

    #   _app_status = {}
    _partition_apps = None

    # possible partition status values:
    # ignored, hold, ready, running, completed, attempted, failed, bad

    # Special methods:
    def __init__(self):
        super(Transform, self).__init__()
        self.initialize()

    def _readonly(self):
        """A transform is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def initialize(self):
        from Ganga import GPI
        self.backend = stripProxy(GPI.Local())

    def check(self):
        pass

    def startup(self):
        """This function is used to set the status after restarting Ganga"""
        # Make sure that no partitions are kept "running" from previous
        # sessions
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)
        # At this point the applications still need to notify the Transformation of their status
        # Search jobs for task-supporting applications
        id = "%i:%i" % (self._getParent().id,
                        self._getParent().transforms.index(self))
        for j in GPI.jobs:
            if "tasks_id" in stripProxy(j.application).getNodeData():
                # print "tasks_id of jobid ", j.fqid,
                # stripProxy(j.application).getNodeAttribute("tasks_id"), id
                if stripProxy(j.application).getNodeAttribute(
                        "tasks_id").endswith(id):
                    try:
                        if j.subjobs:
                            for sj in j.subjobs:
                                app = stripProxy(sj.application)
                                stripProxy(app.getTransform()).setAppStatus(
                                    app,
                                    app._getParent().status)
                        else:
                            app = stripProxy(j.application)
                            stripProxy(app.getTransform()).setAppStatus(
                                app,
                                app._getParent().status)
                    except AttributeError as e:
                        logger.error("%s", e)

    def getPartitionApps(self):
        if self._partition_apps is None:
            # Create the reverse map _partition_apps from _app_partition
            self._partition_apps = {}
            for (app, partition) in self._app_partition.iteritems():
                if partition in self._partition_apps:
                    if not app in self._partition_apps[partition]:
                        self._partition_apps[partition].append(app)
                else:
                    self._partition_apps[partition] = [app]
        return self._partition_apps

    def fix(self):
        """This function fixes inconsistencies in application status"""
        # Create the reverse map _partition_apps from _app_partition
        self._app_status = {}
        # Make sure that no partitions are kept "running" from previous
        # sessions
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)
        # At this point the applications still need to notify the Transformation of their status
        # Search jobs for task-supporting applications

        id = "%i:%i" % (self._getParent().id,
                        self._getParent().transforms.index(self))
        for j in GPI.jobs:
            if "tasks_id" in stripProxy(j.application).getNodeData():
                if stripProxy(
                        j.application).getNodeAttribute("tasks_id") == id:
                    try:
                        if j.subjobs:
                            for sj in j.subjobs:
                                app = stripProxy(sj.application)
                                stripProxy(app.getTransform()).setAppStatus(
                                    app,
                                    app._getParent().status)
                        else:
                            app = stripProxy(j.application)
                            stripProxy(app.getTransform()).setAppStatus(
                                app,
                                app._getParent().status)
                    except AttributeError as e:
                        logger.error("%s", e)

# Public methods

    def run(self, check=True):
        """Sets this transform to running status"""
        if self.status == "new" and check:
            self.check()
        if self.status != "completed":
            self.updateStatus("running")
            #self.status = "running"
            # Check if this transform has completed in the meantime
            is_complete = True
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    is_complete = False
                    break
            if is_complete:
                self.updateStatus("completed")
                #self.status = "completed"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.warning("Transform is already completed!")

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        if self.status != "completed":
            self.updateStatus("pause")
            #self.status = "pause"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.debug("Transform is already completed!")

    def setRunlimit(self, newRL):
        """Set the number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL
        cs = self._partition_status.items()
        for (c, s) in cs:
            if s in ["attempted", "failed"]:
                failures = self.getPartitionFailures(c)
                if failures >= newRL:
                    self._partition_status[c] = "failed"
                else:
                    self._partition_status[c] = "attempted"
        logger.debug("Runlimit set to %i", newRL)

    def setPartitionStatus(self, partition, status):
        """ Set the Status of the given partition to "ready", "hold", "bad" or "completed".
            The status is then updated to the status indicated by the applications"""
        self.setPartitionsStatus([partition], status)

    def getJobs(self):
        """ Get the job slice of all jobs for this transform """
        return self.getPartitionJobs(None)

    def getPartitionJobs(self, partition):
        """ Get the job slice that processed the given partition. Iterates over the job list. """
        task = self._getParent()
        id = task.transforms.index(self)
        if partition is None:
            sname = "tasks(%i).transforms[%i].getJobs()" % (task.id, id)
        else:
            sname = "tasks(%i).transforms[%i].getPartitionJobs(%s)" % (
                task.id, id, partition)
        jobslice = JobRegistrySlice(sname)

        def addjob(j):
            if partition is None or self._app_partition[
                    j.application.id] == partition:
                jobslice.objects[j.fqid] = stripProxy(j)

        for j in GPI.jobs:
            try:
                stid = j.application.tasks_id.split(":")
                if int(stid[-2]) == task.id and int(stid[-1]) == id:
                    if j.subjobs:
                        for sj in j.subjobs:
                            addjob(sj)
                    else:
                        addjob(j)
            except Exception as err:
                logger.debug("getPartitionJobs Exception:\n%s" % str(err))
                pass
        return JobRegistrySliceProxy(jobslice)

    def setFailed(self, partition):
        """ Tells Tasks that all Applications that have executed this partition have actually failed."""
        for aid in self._app_partition:
            if aid in self._app_status and self._app_status[aid] == "removed":
                continue
            # Save the status
            self._app_status[aid] = "failed"
            # Update the corresponding partition status
        self.setPartitionStatus(partition, "ready")

    def retryFailed(self):
        """Retry all failed partitions (forget about failed jobs)"""
        for aid in self._app_partition:
            if aid in self._app_status and self._app_status[aid] == "failed":
                self._app_status[aid] = "removed"
        clist = self._partition_status.keys()
        for c in clist:
            self.updatePartitionStatus(c)

# Internal methods

    def finalise(self):
        """Finalise the transform - no-op by default"""
        return

    def submitJobs(self, n):
        """Create Ganga Jobs for the next N partitions that are ready and submit them."""
        next = self.getNextPartitions(n)
        if len(next) == 0:
            return 0
        numjobs = 0
        for j in self.getJobsForPartitions(next):
            stripProxy(j.application).transition_update("submitting")
            try:
                j.submit()
            except JobError:
                logger.error(
                    "Error on job submission! The current transform will be paused until this problem is fixed."
                )
                logger.error(
                    "type tasks(%i).run() to continue after the problem has been fixed.",
                    self._getParent().id)
                self.pause()
            numjobs += 1
        return numjobs

    def checkTaskApplication(self, app):
        """warns the user if the application is not compatible """
        if app is None:
            return None
        if not "tasks_id" in stripProxy(app).getNodeData():
            return taskApp(app)
        return app

    def setAppStatus(self, app, new_status):
        """Reports status changes in application jobs
           possible status values: 
           normal   : (new, submitting,) submitted, running, completing, completed
           failures : killed, failed
           transient: incomplete (->new), unknown, removed"""

        # Check if we know the occurring application...
        if app.id == -1:
            return
        if not app.id in self._app_partition:
            logger.warning("%s was contacted by an unknown application %i.",
                           self.fqn(), app.id)
            return
        # Silently ignore message if the application is already removed or
        # completed
        if app.id in self._app_status and self._app_status[app.id] in [
                "removed", "completed", "failed"
        ]:
            return
        # Check the status
        if new_status == "completed" and not self.checkCompletedApp(app):
            logger.error("%s app %i failed despite listed as completed!",
                         self.fqn(), app.id)
            new_status = "failed"
        # Save the status
        self._app_status[app.id] = new_status
        # Update the corresponding partition status
        self.updatePartitionStatus(self._app_partition[app.id])

    def setMasterJobStatus(self, job, new_status):
        """hook for a master job status update"""
        return

    def updatePartitionStatus(self, partition):
        """ Calculate the correct status of the given partition. 
            "completed" and "bad" is never changed here
            "hold" is only changed to "completed" here. """
        # print "updatePartitionStatus ", partition, " transform ", self.id
        # If the partition has status, and is not in a fixed state, check it!

        if partition in self._partition_status and (
                not self._partition_status[partition] in ["bad", "completed"]):
            # if we have no applications, we are in "ready" state
            if not partition in self.getPartitionApps():
                if self._partition_status[partition] != "hold":
                    self._partition_status[partition] = "ready"
            else:
                status = [
                    self._app_status[app]
                    for app in self.getPartitionApps()[partition]
                    if app in self._app_status
                    and not self._app_status[app] in ["removed", "killed"]
                ]
                # Check if we have completed this partition
                if "completed" in status:
                    self._partition_status[partition] = "completed"
                # Check if we are not on hold
                elif self._partition_status[partition] != "hold":
                    # Check if we are running
                    running = False
                    for stat in [
                            "completing", "running", "submitted", "submitting"
                    ]:
                        if stat in status:
                            self._partition_status[partition] = "running"
                            running = True
                            break
                    if not running:
                        # Check if we failed
                        #failures = len([stat for stat in status if stat in ["failed","new"]])
                        failures = self.getPartitionFailures(partition)

                        if failures >= self.run_limit:
                            self._partition_status[partition] = "failed"
                        elif failures > 0:
                            self._partition_status[partition] = "attempted"
                        else:
                            # Here we only have some "unknown" applications
                            # This could prove difficult when launching new applications. Care has to be taken
                            # to get the applications out of "unknown" stats as quickly as possible, to avoid double submissions.
                            #logger.warning("Partition with only unknown applications encountered. This is probably not a problem.")
                            self._partition_status[partition] = "ready"
        # Notify the next transform (if any) of the change in input status
        self.notifyNextTransform(partition)

        # Update the Tasks status if necessary
        task = self._getParent()
        if partition in self._partition_status and self._partition_status[
                partition] in ["completed", "bad"
                               ] and self.status == "running":
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    return
            #self.status = "completed"
            self.updateStatus("completed")
            if task:
                task.updateStatus()
        elif self.status == "completed":
            for s in self._partition_status.values():
                if s != "completed" and s != "bad":
                    self.updateStatus("running")
                    #self.status = "running"
                    if task:
                        task.updateStatus()
                    return

    def notifyNextTransform(self, partition):
        """ Notify any dependant transforms of the input update """
        task = self._getParent()
        if task and (task.transforms.index(self) + 1 < len(task.transforms)):
            task.transforms[task.transforms.index(self) + 1].updateInputStatus(
                self, partition)

    def setPartitionsStatus(self, partitions, status):
        """ Set the Status of the partitions to "ready", "hold", "bad" or "completed".
            The status is then updated to the status indicated by the applications
            "bad" and "completed" is never changed except to "ignored", "hold" is only changed to "completed". """
        if status == "ignored":
            [
                self._partition_status.pop(c) for c in partitions
                if c in self._partition_status
            ]
        elif status in ["ready", "hold", "bad", "completed"]:
            for c in partitions:
                self._partition_status[c] = status
        else:
            logger.error(
                "setPartitionsStatus called with invalid status string %s",
                status)
        for c in partitions:
            self.updatePartitionStatus(c)

    def setPartitionsLimit(self, limitpartition):
        """ Set all partitions from and including limitpartition to ignored """
        partitions = [c for c in self._partition_status if c >= limitpartition]
        self.setPartitionsStatus(partitions, "ignored")

    def getPartitionStatus(self, partition):
        if partition in self._partition_status:
            return self._partition_status[partition]
        else:
            return "ignored"

    def getNextPartitions(self, n):
        """Returns the N next partitions to process"""
        partitionlist = sorted(c for c, v in self._partition_status.items()
                               if v in ["ready", "attempted"])
        return partitionlist[:n]

    def getNewAppID(self, partition):
        """ Returns a new application ID and associates this ID with the partition given. """
        id = self._next_app_id
        self._app_partition[id] = partition
        if partition in self.getPartitionApps():
            self.getPartitionApps()[partition].append(id)
        else:
            self.getPartitionApps()[partition] = [id]
        self._next_app_id += 1
        return id

    def createNewJob(self, partition):
        """ Returns a new job initialized with the transforms application, backend and name """
        task = self._getParent(
        )  # this works because createNewJob is only called by a task
        id = task.transforms.index(self)
        j = GPI.Job()
        stripProxy(j).backend = self.backend.clone()
        stripProxy(j).application = self.application.clone()
        stripProxy(j).application.tasks_id = "%i:%i" % (task.id, id)
        stripProxy(j).application.id = self.getNewAppID(partition)
        j.inputdata = self.inputdata
        j.outputdata = self.outputdata
        j.inputsandbox = self.inputsandbox
        j.outputsandbox = self.outputsandbox
        j.name = "T%i:%i C%i" % (task.id, id, partition)
        return j

# Methods that can/should be overridden by derived classes

    def checkCompletedApp(self, app):
        """Can be overriden to improve application completeness checking"""
        return True

    def updateInputStatus(self, ltf, partition):
        """Is called my the last transform (ltf) if the partition 'partition' changes status"""
        # per default no dependencies exist
        pass

    def getJobsForPartitions(self, partitions):
        """This is only an example, this class should be overridden by derived classes"""
        return [self.createNewJob(p) for p in partitions]

# Information methods

    def fqn(self):
        task = self._getParent()
        if task:
            return "Task %i Transform %i" % (task.id,
                                             task.transforms.index(self))
        else:
            return "Unassigned Transform '%s'" % (self.name)

    def n_all(self):
        return len(self._partition_status)

    def n_status(self, status):
        return len(
            [cs for cs in self._partition_status.values() if cs == status])

    def overview(self):
        """ Get an ascii art overview over task status. Can be overridden """
        task = self._getParent()
        if not task is None:
            id = str(task.transforms.index(self))
        else:
            id = "?"
        o = markup("#%s: %s '%s'\n" % (id, getName(self), self.name),
                   status_colours[self.status])
        i = 0
        partitions = sorted(self._partition_status.keys())
        for c in partitions:
            s = self._partition_status[c]
            if c in self.getPartitionApps():
                failures = self.getPartitionFailures(c)
                o += markup("%i:%i " % (c, failures), overview_colours[s])
            else:
                o += markup("%i " % c, overview_colours[s])
            i += 1
            if i % 20 == 0:
                o += "\n"
        logger.info(o)

    def info(self):
        logger.info(
            markup("%s '%s'" % (getName(self), self.name),
                   status_colours[self.status]))
        logger.info("* backend: %s" % getName(self.backend))
        logger.info("Application:")
        self.application.printTree()

    def getPartitionFailures(self, partition):
        """Return the number of failures for this partition"""
        return len([
            1 for app in self.getPartitionApps()[partition]
            if app in self._app_status
            and self._app_status[app] in ["new", "failed"]
        ])

    def updateStatus(self, status):
        """Update the transform status"""
        self.status = status
Пример #23
0
class PhysicalFile(LocalFile):
    '''Class for handling physical files (i.e. PFNs)

    Example Usage:
    pfn = PhysicalFile("/some/pfn.file")
    pfn.upload("/some/lfn.file","CERN-USER") # upload the PFN to LFC
    [...etc...]
    '''
    _schema = Schema(
        Version(1, 0), {
            'name':
            SimpleItem(defvalue='', doc='PFN'),
            'namePattern':
            SimpleItem(
                defvalue="", doc='pattern of the file name', transient=1),
            'localDir':
            SimpleItem(
                defvalue="",
                doc=
                'local dir where the file is stored, used from get and put methods',
                transient=1),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          typelist=['Ganga.GPIDev.Lib.File.LocalFile'],
                          sequence=1,
                          copyable=0,
                          doc="collected files from the wildcard namePattern",
                          transient=1),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=['bool'],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere',
                transient=1)
        })
    _category = 'gangafiles'
    _name = 'PhysicalFile'
    _exportmethods = ['location', 'upload']

    def __init__(self, name=''):
        val = full_expand_filename(name)
        super(PhysicalFile, self).__init__(namePattern=val)
        self.namePattern = os.path.basename(name)
        self.localDir = os.path.dirname(val)
        self.name = val
        logger.warning(
            "!!! PhysicalFile has been deprecated, this is now just a wrapper to the LocalFile object"
        )
        logger.warning(
            "!!! Please update your scripts before PhysicalFile is removed")

    def __construct__(self, args):
        if (len(args) != 1) or (type(args[0]) is not type('')):
            super(PhysicalFile, self).__construct__(args)
        else:
            self.name = full_expand_filename(args[0])
            val = full_expand_filename(args[0])
            self.localDir = os.path.dirname(val)
            self.namePattern = os.path.basename(val)

    def _attribute_filter__set__(self, n, v):
        if n == 'name':
            import os.path
            val = full_expand_filename(v)
            self.name = val
            self.namePattern = os.path.basename(val)
            self.localDir = os.path.dirname(val)
            return val
        return v

    def upload(self, lfn, diracSE, guid=None):

        from GangaDirac.Lib.Files.DiracFile import DiracFile
        diracFile = DiracFile(namePattern=self.name, lfn=lfn)

        diracFile.put(force=True)

        return diracFile
Пример #24
0
class DiracFile(IGangaFile):
    """
    File stored on a DIRAC storage element

    Usage:

        Some common use cases:

        1) Uploading a file and sending jobs to run over it
        2) Uploading a file to be sent to where your jobs are running
        3) Uploading and removing a file
        4) Removing an existing file from Dirac storage
        5) Change the path of LFN produced by a ganga job.
        6) Accessing a (potentially remote) file known to Dirac through an LFN


    1)
        To upload a file and submit a job to use it as inputdata:

        df = DiracFile('/path/to/some/local/file')
        df.put()

        j=Job( ... )
        j.inputdata=[df.lfn]

        (The file is now accessible via data.py at the site)

    2)
        To upload a file and make it available on a workernode:

        df = DiracFile('/path/to/some/local/file')
        df.put(uploadSE = 'CERN-USER')

        j=Job( ... )
        j.inputfiles = [df]
        j.submit()

    3)
        To upload and then remove a file:

        df = DiracFile('/path/to/some/local/file')
        df.put()
        df.remove()

    4)
        To remove an existing file already in Dirac storage
        
        df = DiracFile('LFN:/some/lfn/path')
        df.remove()

        or:

        df = DiracFile(lfn='/some/lfn/path')
        df.remove()

    5)
        To change an LFN path structure which is produced by Ganga:

        j=Job( ... )
        j.outputfiles=[DiracFile('myAwesomeLFN.ext', remoteDir='myPath_{jid}_{sjid}')]
        j.submit()

        This will produce LFN similar to:

        /lhcb/user/<u>/<user>/myPath_1_2/2017_01/123456/123456789/myAwesomeLFN.ext

        Other possibilities may look like:

        j.outputfiles=[DiracFile('myData.ext', remoteDir='myProject/job{jid}_sj{sjid}')]
         =>
           /lhcb/user/<u>/<user>/myProject/job1_sj2/2017_01/123456/123456789/myData.ext
        
        j.outputfiles=[DiracFile('myData.ext', remoteDir='myProject')]
         =>
           /lhcb/user/<u>/<user>/myProject/2017_01/123456/123456789/myData.ext
        

        Alternatively you may change in your .gangarc:
        [DIRAC]
        useGangaPath=True

        This will give you LFN like:

        /lhcb/user/<u>/<user>/GangaJob_13/OutputFiles/2017_01/123456/123456789/myFile.ext

        for all future jobs while this is in your .gangarc config.

    6)
        Accessing a (potentially remote) file locally known to DIRAC:

        df = DiracFile(lfn='/some/lfn/path')
        ganga_path = df.accessURL()
        **exit ganga**

        root ganga_path # to stream a file over xrootd://

    """
    _schema = Schema(
        Version(1, 1),
        {
            'namePattern':
            SimpleItem(defvalue="", doc='pattern of the file name'),
            'localDir':
            SimpleItem(
                defvalue=None,
                copyable=1,
                typelist=['str', 'type(None)'],
                doc=
                'local dir where the file is stored, used from get and put methods'
            ),
            'locations':
            SimpleItem(
                defvalue=[],
                copyable=1,
                typelist=['str'],
                sequence=1,
                doc="list of SE locations where the outputfiles are uploaded"),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=['bool'],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere'
            ),
            'lfn':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the logical file name/set the logical file name to use if not '
                'using wildcards in namePattern'),
            'remoteDir':
            SimpleItem(
                defvalue="",
                doc='remote directory where the LFN is to be placed within '
                'this is the relative path of the LFN which is put between the user LFN base and the filename.'
            ),
            'guid':
            SimpleItem(
                defvalue='',
                copyable=1,
                typelist=['str'],
                doc=
                'return the GUID/set the GUID to use if not using wildcards in the namePattern.'
            ),
            'subfiles':
            ComponentItem(
                category='gangafiles',
                defvalue=[],
                sequence=1,
                copyable=0,  # hidden=1,
                typelist=['GangaDirac.Lib.Files.DiracFile'],
                doc="collected files from the wildcard namePattern"),
            'defaultSE':
            SimpleItem(
                defvalue='',
                copyable=1,
                doc=
                "defaultSE where the file is to be accessed from or uploaded to"
            ),
            'failureReason':
            SimpleItem(defvalue="",
                       protected=1,
                       copyable=0,
                       doc='reason for the upload failure'),
            'credential_requirements':
            ComponentItem('CredentialRequirement', defvalue='DiracProxy'),
        })

    _env = None

    _category = 'gangafiles'
    _name = "DiracFile"
    _exportmethods = [
        "get", "getMetadata", "getReplicas", 'getSubFiles', 'remove',
        'removeReplica', "replicate", 'put', 'locations', 'location',
        'accessURL', '_updateRemoteURLs', 'hasMatchedFiles'
    ]

    _additional_slots = ['_have_copied', '_remoteURLs', '_storedReplicas']

    def __init__(self,
                 namePattern='',
                 localDir=None,
                 lfn='',
                 remoteDir=None,
                 **kwds):
        """
        name is the name of the output file that has to be written ...
        """

        super(DiracFile, self).__init__()
        self.locations = []

        self._setLFNnamePattern(lfn, namePattern)

        if localDir is not None:
            self.localDir = localDir
        if remoteDir is not None:
            self.remoteDir = remoteDir

        self._have_copied = False
        self._remoteURLs = {}
        self._storedReplicas = {}

    def __setattr__(self, attr, value):
        """
        This is an overloaded setter method to make sure that we're auto-expanding the filenames of files which exist.
        In the case we're assigning any other attributes the value is simply passed through
        Args:
            attr (str): This is the name of the attribute which we're assigning
            value (unknown): This is the value being assigned.
        """
        actual_value = value
        if attr == "namePattern":
            this_dir, actual_value = os.path.split(value)
            if this_dir:
                self.localDir = this_dir
        elif attr == 'localDir':
            if value:
                new_value = os.path.abspath(expandfilename(value))
                if os.path.exists(new_value):
                    actual_value = new_value

        super(DiracFile, self).__setattr__(attr, actual_value)

    def _attribute_filter__set__(self, name, value):

        if value != "" and value is not None:
            #   Do some checking of the filenames in a subprocess
            if name == 'lfn':
                this_dir, self.namePattern = os.path.split(value)
                if this_dir:
                    self.remoteDir = this_dir
                return value

            elif name == 'namePattern':
                self.localDir, this_name = os.path.split(value)
                return this_name

            elif name == 'localDir':
                if value:
                    return expandfilename(value)
                else:
                    return value

        return value

    def locations(self):

        return self.locations

    def _setLFNnamePattern(self, lfn="", namePattern=""):

        if hasattr(self, 'defaultSE') and self.defaultSE != "":
            ## TODO REPLACE THIS WITH IN LIST OF VONAMES KNOWN
            # Check for /lhcb/some/path or /gridpp/some/path
            if namePattern.split(os.pathsep)[0] == self.defaultSE \
                or (len(namePattern) > 3 and namePattern[0:4].upper() == "LFN:"\
                    or len(namePattern.split(os.pathsep)) > 1 and namePattern.split(os.pathsep)[1] == self.defaultSE):
                # Check for LFN:/gridpp/some/path or others...
                lfn = namePattern
                namePattern = ""

        if lfn:
            if len(lfn) > 3 and lfn[0:4].upper() == "LFN:":
                lfn = lfn[4:]
        elif namePattern:
            if len(namePattern) > 3 and namePattern[0:4].upper() == 'LFN:':
                lfn = namePattern[4:]

        if lfn != "" and namePattern != "":
            self.lfn = lfn
            self.namePattern = namePattern

        elif lfn != "" and namePattern == "":
            self.lfn = lfn

        elif namePattern != "" and lfn == "":
            self.namePattern = namePattern

    def _attribute_filter__get__(self, name):

        # Attempt to spend too long loading un-needed objects into memory in
        # order to read job status
        if name is 'lfn':
            if not self.lfn:
                logger.warning("Do NOT have an LFN, for file: %s" %
                               self.namePattern)
                logger.warning(
                    "If file exists locally try first using the method put()")
            return object.__getattribute__(self, 'lfn')
        elif name in ['guid', 'locations']:
            if configDirac['DiracFileAutoGet']:
                if name is 'guid':
                    if self.guid:
                        if self.lfn:
                            self.getMetadata()
                            return object.__getattribute__(self, 'guid')
                elif name is 'locations':
                    if self.locations == []:
                        if self.lfn:
                            self.getMetadata()
                            return object.__getattribute__(self, 'locations')

        return object.__getattribute__(self, name)

    def __repr__(self):
        """Get the representation of the file."""

        return "DiracFile(namePattern='%s', lfn='%s', localDir='%s')" % (
            self.namePattern, self.lfn, self.localDir)

    def getSubFiles(self):
        """
        Returns the subfiles for this instance
        """
        if self.lfn:
            self.setLocation()
        return self.subfiles

    def dirac_line_processor(self, line, dirac_file, localPath):
        """
            Function to interperate the post processor lines.
            This returns False when everything went OK and True on an ERROR
        """
        logger.debug("Calling dirac_line_processor")
        tokens = line.strip().split(':::')
        logger.debug("dirac_line_processor: %s" % tokens)
        pattern = tokens[1].split('->')[0].split('&&')[0]
        name = tokens[1].split('->')[0].split('&&')[1]
        lfn = tokens[1].split('->')[1]
        guid = tokens[3]
        try:
            locations = eval(tokens[2])
        except Exception as err:
            logger.debug("line_process err: %s" % err)
            locations = tokens[2]

        if pattern == name:
            logger.debug("pattern == name")
            logger.error("Failed to parse outputfile data for file '%s'" %
                         name)
            return True

        #   This is the case that multiple files were requested
        if pattern == dirac_file.namePattern:
            logger.debug("pattern == dirac_file.namePattern")
            d = DiracFile(namePattern=name, lfn=lfn)
            d.compressed = dirac_file.compressed
            d.guid = guid
            d.locations = locations
            d.localDir = localPath
            dirac_file.subfiles.append(d)
            #dirac_line_processor(line, d)
            return False

        #   This is the case that an individual file was requested
        elif name == dirac_file.namePattern:
            logger.debug("name == dirac_file.namePattern")
            if lfn == '###FAILED###':
                dirac_file.failureReason = tokens[2]
                logger.error("Failed to upload file '%s' to Dirac: %s" %
                             (name, dirac_file.failureReason))
                return True
            dirac_file.lfn = lfn
            dirac_file.locations = locations
            dirac_file.guid = guid
            dirac_file.localDir = localPath
            return False

        else:
            logger.debug("False")
            return False

    def setLocation(self):
        """
        """

        logger.debug("DiracFile: setLocation")

        if not stripProxy(self).getJobObject():
            logger.error("No job assocaited with DiracFile: %s" % str(self))
            return

        job = self.getJobObject()
        postprocessLocationsPath = os.path.join(
            job.outputdir,
            getConfig('Output')['PostProcessLocationsFileName'])

        postprocesslocations = None

        try:
            postprocesslocations = open(postprocessLocationsPath, 'r')
            self.subfiles = []
            ## NB remember only do this once at it leaves the 'cursor' at the end of the file - rcurrie
            all_lines = postprocesslocations.readlines()
            logger.debug("lines:\n%s" % all_lines)
            for line in all_lines:
                logger.debug("This line: %s" % line)
                if line.startswith('DiracFile'):
                    if self.dirac_line_processor(
                            line, self,
                            os.path.dirname(postprocessLocationsPath)
                    ) and regex.search(self.namePattern) is None:
                        logger.error(
                            "Error processing line:\n%s\nAND: namePattern: %s is NOT matched"
                            % (str(line), str(self.namePattern)))
                    else:
                        logger.debug("Parsed the Line")
                else:
                    logger.debug("Skipping the Line")

        except Exception as err:
            logger.warning("unexpected Error: %s" % str(err))
        finally:
            if postprocesslocations is not None:
                postprocesslocations.close()

    def _auto_remove(self):
        """
        Remove called when job is removed as long as config option allows
        """
        if self.lfn != '':
            self.remove()

    @require_credential
    def remove(self):
        """
        Remove this lfn and all replicas from DIRAC LFC/SEs
        """
        if self.lfn == "":
            raise GangaFileError(
                'Can\'t remove a  file from DIRAC SE without an LFN.')
        logger.info('Removing file %s' % self.lfn)
        stdout = execute('removeFile("%s")' % self.lfn,
                         cred_req=self.credential_requirements)

        self.lfn = ""
        self.locations = []
        self.guid = ''
        return True

    @require_credential
    def removeReplica(self, SE):
        """
        Remove the replica from the given SE
        """
        self.getReplicas()
        if SE not in self.locations:
            raise GangaFileError("No replica at supplied SE: %s" % SE)
        try:
            logger.info("Removing replica at %s for LFN %s" % (SE, self.lfn))
            stdout = execute('removeReplica("%s", "%s")' % (self.lfn, SE),
                             cred_req=self.credential_requirements)
            self.locations.remove(SE)
        except GangaDiracError as err:
            raise err

        return True

    @require_credential
    def getMetadata(self):
        """
        Get Metadata associated with this files lfn. This method will also
        try to automatically set the files guid attribute.
        """

        if self.lfn == "":
            self._optionallyUploadLocalFile()

        # check that it has a replica
        if not self.getReplicas():
            raise GangaFileError("No replica found for this file!")

        # eval again here as datatime not included in dirac_ganga_server

        ret = execute('getMetadata("%s")' % self.lfn,
                      cred_req=self.credential_requirements)

        if self.guid != ret.get('Successful', {}).get(self.lfn, {}).get(
                'GUID', False):
            self.guid = ret['Successful'][self.lfn]['GUID']

        reps = self.getReplicas()
        ret['Successful'][self.lfn].update({'replicas': self.locations})

        return ret

    def _optionallyUploadLocalFile(self):
        """
        """

        if self.lfn != "":
            return

        if self.namePattern != "" and self.lfn == "":

            logger.info(
                "I have a local DiracFile, however you're requesting it's location on the grid"
            )
            logger.info("Shall I upload it to the grid before I continue?")
            decision = raw_input('[y] / n:')
            while not (decision.lower() in ['y', 'n']
                       or decision.lower() == ''):
                decision = raw_input('[y] / n:')

            if decision.lower() in ['y', '']:
                # upload namePattern to grid
                logger.debug("Uploading the file first")
                self.put()
            elif decision == 'n':
                logger.debug("Not uploading now")
                return
            else:
                # do Nothing
                logger.debug("Continuing without uploading file")

            if self.lfn == "":
                raise GangaFileError('Uploading of namePattern: %s failed' %
                                     self.namePattern)

        if self.namePattern == "" and self.lfn == "":
            raise GangaFileError(
                'Cannot do anything if I don\'t have an lfn or a namePattern!')

        return

    @require_credential
    def getReplicas(self, forceRefresh=False):
        """
        Get the list of all SE where this file has a replica
        This relies on an internally stored list of replicas, (SE and  unless forceRefresh = True
        """

        if self.lfn == '':
            self._optionallyUploadLocalFile()
        if self.lfn == '':
            raise GangaFileError(
                "Can't find replicas for file which has no LFN!")

        these_replicas = None

        if len(self.subfiles) != 0:

            allReplicas = []
            for i in self.subfiles:
                allReplicas.append(i.getReplicas())

            these_replicas = allReplicas

        else:
            # deep copy just before wer change it incase we're pointing to the
            # data stored in original from a copy
            if self._have_copied:
                self._storedReplicas = copy.deepcopy(self._storedReplicas)
            if (self._storedReplicas == {}
                    and len(self.subfiles) == 0) or forceRefresh:

                try:
                    self._storedReplicas = execute(
                        'getReplicas("%s")' % self.lfn,
                        cred_req=self.credential_requirements)
                except GangaDiracError as err:
                    logger.error("Couldn't find replicas for: %s" %
                                 str(self.lfn))
                    self._storedReplicas = {}
                    raise

                try:
                    self._storedReplicas = self._storedReplicas['Successful']
                except Exception as err:
                    logger.error("Unknown Error: %s from %s" %
                                 (str(err), self._storedReplicas))
                    raise

                logger.debug("getReplicas: %s" % str(self._storedReplicas))

                if self.lfn in self._storedReplicas:
                    self._updateRemoteURLs(self._storedReplicas)

                    these_replicas = [self._storedReplicas[self.lfn]]
                else:
                    these_replicas = {}
            elif self._storedReplicas != {}:
                these_replicas = [self._storedReplicas[self.lfn]]

        return these_replicas

    def _updateRemoteURLs(self, reps):
        """
        Internal function used for storing all replica information about this LFN at different sites
        """
        if len(self.subfiles) != 0:
            for i in self.subfiles:
                i._updateRemoteURLs(reps)
        else:
            if self.lfn not in reps:
                return
            if self.locations != reps[self.lfn].keys():
                self.locations = reps[self.lfn].keys()
            #logger.debug( "locations: %s" % str( self.locations ) )
            # deep copy just before wer change it incase we're pointing to the
            # data stored in original from a copy
            if self._have_copied:
                self._remoteURLs = copy.deepcopy(self._remoteURLs)
            for site in self.locations:
                #logger.debug( "site: %s" % str( site ) )
                self._remoteURLs[site] = reps[self.lfn][site]
                #logger.debug("Adding _remoteURLs[site]: %s" % str(self._remoteURLs[site]))

    def location(self):
        """
        Return a list of LFN locations for this DiracFile
        """
        if len(self.subfiles) == 0:
            if self.lfn == "":
                self._optionallyUploadLocalFile()
            else:
                return [self.lfn]
        else:
            # 1 LFN per DiracFile
            LFNS = []
            for this_file in self.subfiles:
                these_LFNs = this_file.location()
                for this_url in these_LFNs:
                    LFNs.append(this_url)
            return LFNs

    @require_credential
    def accessURL(self, thisSE='', protocol=''):
        """
        Attempt to find an accessURL which corresponds to the specified SE. If no SE is specified then
        return a random one from all the replicas. Also use the specified protocol - if none then use 
        the default. 
        """
        lfns = []
        if len(self.subfiles) == 0:
            lfns.append(self.lfn)
        else:
            for i in self.subfiles:
                lfns.append(i.lfn)
        return getAccessURLs(lfns, thisSE, protocol,
                             self.credential_requirements)

    @require_credential
    def internalCopyTo(self, targetPath):
        """
        Retrieves locally the file matching this DiracFile object pattern.
        If localPath is specified
        Args:
            targetPath(str): The path the file should be placed at locally
        """

        to_location = targetPath

        if self.lfn == "":
            raise GangaFileError('Can\'t download a file without an LFN.')

        logger.info("Getting file %s" % self.lfn)
        stdout = execute('getFile("%s", destDir="%s")' %
                         (self.lfn, to_location),
                         cred_req=self.credential_requirements)

        if self.namePattern == "":
            name = os.path.basename(self.lfn)
            if self.compressed:
                name = name[:-3]
            self.namePattern = name

        if self.guid == "" or not self.locations:
            self.getMetadata()

        return True

    @require_credential
    def replicate(self, destSE, sourceSE=''):
        """
        Replicate an LFN to another SE

        Args:
            destSE (str): the SE to replicate the file to
            sourceSE (str): the se to use as a cource for the file
        """

        if not self.lfn:
            raise GangaFileError('Must supply an lfn to replicate')

        logger.info("Replicating file %s to %s" % (self.lfn, destSE))
        stdout = execute('replicateFile("%s", "%s", "%s")' %
                         (self.lfn, destSE, sourceSE),
                         cred_req=self.credential_requirements)

        if destSE not in self.locations:
            self.locations.append(destSE)

    def processWildcardMatches(self):
        if regex.search(self.namePattern) is not None:
            raise GangaFileError(
                "No wildcards in inputfiles for DiracFile just yet. Dirac are exposing this in API soon."
            )

    @require_credential
    def put(self, lfn='', force=False, uploadSE="", replicate=False):
        """
        Try to upload file sequentially to storage elements defined in configDirac['allDiracSE'].
        File will be uploaded to the first SE that the upload command succeeds for.

        The file is uploaded to the SE described by the DiracFile.defaultSE attribute

        Alternatively, the user can specify an uploadSE which contains an SE
        which the file is to be uploaded to.

        If the user wants to replicate this file(s) across all SE then they should state replicate = True.

        Return value will be either the stdout from the dirac upload command if not
        using the wildcard characters '*?[]' in the namePattern.
        If the wildcard characters are used then the return value will be a list containing
        newly created DiracFile objects which were the result of glob-ing the wildcards.

        The objects in this list will have been uploaded or had their failureReason attribute populated if the
        upload failed.
        """

        if self.lfn != "" and force == False and lfn == '':
            logger.warning(
                "Warning you're about to 'put' this DiracFile: %s on the grid as it already has an lfn: %s"
                % (self.namePattern, self.lfn))
            decision = raw_input('y / [n]:')
            while not (decision.lower() in ['y', 'n']
                       or decision.lower() == ''):
                decision = raw_input('y / [n]:')

            if decision.lower() == 'y':
                pass
            else:
                return

        if (lfn != '' and self.lfn != '') and force == False:
            logger.warning(
                "Warning you're attempting to put this DiracFile: %s" %
                self.namePattern)
            logger.warning("It currently has an LFN associated with it: %s" %
                           self.lfn)
            logger.warning(
                "Do you want to continue and attempt to upload to: %s" % lfn)
            decision = raw_input('y / [n]:')
            while not (decision.lower() in ['y', 'n', '']):
                decision = raw_input('y / [n]:')

            if decision.lower() == 'y':
                pass
            else:
                return

        if lfn and os.path.basename(lfn) != self.namePattern:
            logger.warning(
                "Changing namePattern from: '%s' to '%s' during put operation"
                % (self.namePattern, os.path.basename(lfn)))

        if lfn:
            self.lfn = lfn

        # looks like will only need this for the interactive uploading of jobs.
        # Also if any backend need dirac upload on client then when downloaded
        # this will upload then delete the file.

        if self.namePattern == "":
            if self.lfn != '':
                logger.warning(
                    "'Put'-ing a file with ONLY an existing LFN makes no sense!"
                )
            raise GangaFileError(
                'Can\'t upload a file without a local file name.')

        sourceDir = self.localDir
        if self.localDir is None:
            sourceDir = os.getcwd()
            # attached to a job, use the joboutputdir
            if self._parent != None and os.path.isdir(
                    self.getJobObject().outputdir):
                sourceDir = self.getJobObject().outputdir

        if not os.path.isdir(sourceDir):
            raise GangaFileError(
                'localDir attribute is not a valid dir, don\'t know from which dir to take the file'
            )

        if regex.search(self.namePattern) is not None:
            if self.lfn != "":
                logger.warning(
                    "Cannot specify a single lfn for a wildcard namePattern")
                logger.warning("LFN will be generated automatically")
                self.lfn = ""

        if not self.remoteDir:
            try:
                job = self.getJobObject()
                lfn_folder = os.path.join("GangaJob_%s" % job.getFQID('/'),
                                          "OutputFiles")
            except AssertionError:
                t = datetime.datetime.now()
                this_date = t.strftime("%H.%M_%A_%d_%B_%Y")
                lfn_folder = os.path.join('GangaFiles_%s' % this_date)
            lfn_base = os.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                lfn_folder)

        else:
            lfn_base = os.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                self.remoteDir)

        if uploadSE == "":
            if self.defaultSE != "":
                storage_elements = [self.defaultSE]
            else:
                if configDirac['allDiracSE']:
                    storage_elements = [
                        random.choice(configDirac['allDiracSE'])
                    ]
                else:
                    raise GangaFileError(
                        "Can't upload a file without a valid defaultSE or storageSE, please provide one"
                    )
        elif isinstance(uploadSE, list):
            storage_elements = uploadSE
        else:
            storage_elements = [uploadSE]

        outputFiles = GangaList()
        for this_file in glob.glob(os.path.join(sourceDir, self.namePattern)):
            name = this_file

            if not os.path.exists(name):
                if not self.compressed:
                    raise GangaFileError(
                        'Cannot upload file. File "%s" must exist!' % name)
                name += '.gz'
                if not os.path.exists(name):
                    raise GangaFileError('File "%s" must exist!' % name)
            else:
                if self.compressed:
                    os.system('gzip -c %s > %s.gz' % (name, name))
                    name += '.gz'
                    if not os.path.exists(name):
                        raise GangaFileError('File "%s" must exist!' % name)

            lfn = os.path.join(lfn_base, os.path.basename(this_file))

            d = DiracFile()
            d.namePattern = os.path.basename(name)
            d.compressed = self.compressed
            d.localDir = sourceDir
            stderr = ''
            stdout = ''
            logger.info('Uploading file \'%s\' to \'%s\' as \'%s\'' %
                        (name, storage_elements[0], lfn))
            logger.debug('execute: uploadFile("%s", "%s", %s)' %
                         (lfn, os.path.join(sourceDir,
                                            name), str([storage_elements[0]])))
            try:
                stdout = execute('uploadFile("%s", "%s", %s)' %
                                 (lfn, os.path.join(sourceDir, name),
                                  str([storage_elements[0]])),
                                 cred_req=self.credential_requirements)
            except GangaDiracError as err:
                logger.warning("Couldn't upload file '%s': \'%s\'" %
                               (os.path.basename(name), err))
                failureReason = "Error in uploading file '%s' : '%s'" % (
                    os.path.basename(name), err)
                if regex.search(self.namePattern) is not None:
                    d.failureReason = failureReason
                    outputFiles.append(d)
                    continue
                self.failureReason += '\n' + failureReason
                continue

            stdout_temp = stdout.get('Successful')

            if not stdout_temp:
                msg = "Couldn't upload file '%s': \'%s\'" % (
                    os.path.basename(name), stdout)
                logger.warning(msg)
                if regex.search(self.namePattern) is not None:
                    d.failureReason = msg
                    outputFiles.append(d)
                    continue
                self.failureReason = msg
                continue
            else:
                lfn_out = stdout_temp[lfn]

            # when doing the two step upload delete the temp file
            if self.compressed or self._parent != None:
                os.remove(name)
            # need another eval as datetime needs to be included.
            guid = lfn_out.get('GUID', '')
            if regex.search(self.namePattern) is not None:
                d.lfn = lfn
                d.remoteDir = os.path.dirname(lfn)
                d.locations = lfn_out.get('allDiracSE', '')
                d.guid = guid
                outputFiles.append(d)
                continue
            else:
                self.lfn = lfn
                self.remoteDir = os.path.dirname(lfn)
                self.locations = lfn_out.get('allDiracSE', '')
                self.guid = guid

        if replicate == True:

            if len(outputFiles) == 1 or len(outputFiles) == 0:
                storage_elements.pop(0)
                for se in storage_elements:
                    self.replicate(se)
            else:
                storage_elements.pop(0)
                for this_file in outputFiles:
                    for se in storage_elements:
                        this_file.replicate(se)

        if len(outputFiles) > 0:
            return outputFiles
        else:
            outputFiles.append(self)
            return outputFiles

    def getWNScriptDownloadCommand(self, indent):

        script_location = os.path.join(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))),
            'downloadScript.py.template')

        download_script = FileUtils.loadScript(script_location, '')

        script = """\n
download_script='''\n###DOWNLOAD_SCRIPT###'''
import subprocess
dirac_env=###DIRAC_ENV###
subprocess.Popen('''python -c "import sys\nexec(sys.stdin.read())"''', shell=True, env=dirac_env, stdin=subprocess.PIPE).communicate(download_script)
"""
        script = '\n'.join(
            [str(indent + str(line)) for line in script.split('\n')])

        replace_dict = {
            '###DOWNLOAD_SCRIPT###': download_script,
            '###DIRAC_ENV###': self._getDiracEnvStr(),
            '###LFN###': self.lfn
        }

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def _getDiracEnvStr(self):
        diracEnv = str(getDiracEnv(self.credential_requirements.dirac_env))
        return diracEnv

    def _WN_wildcard_script(self, namePattern, lfnBase, compressed):
        wildcard_str = """
for f in glob.glob('###NAME_PATTERN###'):
    processes.append(uploadFile(os.path.basename(f), '###LFN_BASE###', ###COMPRESSED###, '###NAME_PATTERN###'))
"""
        wildcard_str = FileUtils.indentScript(wildcard_str, '###INDENT###')

        replace_dict = {
            '###NAME_PATTERN###': namePattern,
            '###LFN_BASE###': lfnBase,
            '###COMPRESSED###': compressed
        }

        for k, v in replace_dict.iteritems():
            wildcard_str = wildcard_str.replace(str(k), str(v))

        return wildcard_str

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip,
                            postProcessLocationsFP):
        """
        Returns script that have to be injected in the jobscript for postprocessing on the WN
        """

        script_path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        script_location = os.path.join(script_path, 'uploadScript.py.template')

        upload_script = FileUtils.loadScript(script_location, '')

        WNscript_location = os.path.join(script_path,
                                         'WNInjectTemplate.py.template')
        script = FileUtils.loadScript(WNscript_location, '')

        if not self.remoteDir:
            try:
                job = self.getJobObject()
                lfn_folder = os.path.join("GangaJob_%s" % job.getFQID('.'),
                                          "OutputFiles")
            except AssertionError:
                t = datetime.datetime.now()
                this_date = t.strftime("%H.%M_%A_%d_%B_%Y")
                lfn_folder = os.path.join('GangaFiles_%s' % this_date)
            lfn_base = os.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                lfn_folder)
        else:
            lfn_base = oa.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                self.remoteDir)

        for this_file in outputFiles:
            isCompressed = this_file.namePattern in patternsToZip

            if not regex.search(this_file.namePattern) is None:
                script += self._WN_wildcard_script(this_file.namePattern,
                                                   lfn_base, str(isCompressed))
            else:
                script += '###INDENT###print("Uploading: %s as: %s")\n' % (
                    this_file.namePattern,
                    str(os.path.join(lfn_base, this_file.namePattern)))
                script += '###INDENT###processes.append(uploadFile("%s", "%s", %s))\n' % (
                    this_file.namePattern, lfn_base, str(isCompressed))

        if stripProxy(self)._parent is not None and stripProxy(
                self).getJobObject() and getName(
                    stripProxy(self).getJobObject().backend) != 'Dirac':
            script_env = self._getDiracEnvStr()
        else:
            script_env = str(None)

        script = '\n'.join(
            [str('###INDENT###' + str(line)) for line in script.split('\n')])

        replace_dict = {
            '###UPLOAD_SCRIPT###': upload_script,
            '###STORAGE_ELEMENTS###': str(configDirac['allDiracSE']),
            '###INDENT###': indent,
            '###LOCATIONSFILE###': postProcessLocationsFP,
            '###DIRAC_ENV###': script_env
        }

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def hasMatchedFiles(self):

        if self.lfn != "" and self.namePattern != "":
            if self.namePattern == os.path.basename(self.lfn):
                return True
            else:
                logger.error("LFN doesn't match namePattern for file: %s" %
                             str(self.namePattern))
                return False
        elif len(self.subfiles) > 0 and regex.search(
                self.namePattern) is not None:
            return True
        else:
            logger.error("Failed to Match file:\n%s" % str(self))
            return False

    @staticmethod
    def diracLFNBase(credential_requirements):
        """
        Compute a sensible default LFN base name
        If ``DiracLFNBase`` has been defined, use that.
        Otherwise, construct one from the user name and the user VO
        Args:
            credential_requirements (DiracProxy): This is the credential which governs how we should format the path
        """
        if configDirac['DiracLFNBase']:
            return configDirac['DiracLFNBase']
        user = DiracProxyInfo(credential_requirements).username
        return '/{0}/user/{1}/{2}'.format(configDirac['userVO'], user[0], user)
Пример #25
0
class ITransform(GangaObject):
    _schema = Schema(Version(1, 0), {
        'status': SimpleItem(defvalue='new', protected=1, copyable=1, doc='Status - running, pause or completed', typelist=[str]),
        'name': SimpleItem(defvalue='Simple Transform', doc='Name of the transform (cosmetic)', typelist=[str]),
        'application': ComponentItem('applications', defvalue=None, optional=1, load_default=False, doc='Application of the Transform.'),
        'inputsandbox': FileItem(defvalue=[], sequence=1, doc="list of File objects shipped to the worker node "),
        'outputsandbox': SimpleItem(defvalue=[], typelist=[str], sequence=1, doc="list of filenames or patterns shipped from the worker node"),
        'backend': ComponentItem('backends', defvalue=None, optional=1, load_default=False, doc='Backend of the Transform.'),
        'splitter': ComponentItem('splitters', defvalue=None, optional=1, load_default=False, doc='Splitter used on each unit of the Transform.'),
        'postprocessors': ComponentItem('postprocessor', defvalue=None, doc='list of postprocessors to run after job has finished'),
        'merger': ComponentItem('mergers', defvalue=None, hidden=1, copyable=0, load_default=0, optional=1, doc='Merger to be done over all units when complete.'),
        'unit_merger': ComponentItem('mergers', defvalue=None, load_default=0, optional=1, doc='Merger to be copied and run on each unit separately.'),
        'copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy all units output to, e.g. Grid dataset -> Local Dataset'),
        'unit_copy_output': ComponentItem('datasets', defvalue=None, load_default=0, optional=1, doc='The dataset to copy each individual unit output to, e.g. Grid dataset -> Local Dataset'),
        'run_limit': SimpleItem(defvalue=8, doc='Number of times a partition is tried to be processed.', protected=1, typelist=[int]),
        'minor_run_limit': SimpleItem(defvalue=3, doc='Number of times a unit can be resubmitted', protected=1, typelist=[int]),
        'major_run_limit': SimpleItem(defvalue=3, doc='Number of times a junit can be rebrokered', protected=1, typelist=[int]),
        'units': ComponentItem('units', defvalue=[], sequence=1, copyable=1, doc='list of units'),
        'inputdata': ComponentItem('datasets', defvalue=[], sequence=1, protected=1, optional=1, load_default=False, doc='Input datasets to run over'),
        'outputdata': ComponentItem('datasets', defvalue=None, optional=1, load_default=False, doc='Output dataset template'),
        'inputfiles': GangaFileItem(defvalue=[], sequence=1, doc="list of file objects that will act as input files for a job"),
        'outputfiles' : GangaFileItem(defvalue=[], sequence=1, doc="list of OutputFile objects to be copied to all jobs"),
        'metadata': ComponentItem('metadata', defvalue=MetadataDict(), doc='the metadata', protected=1),
        'rebroker_on_job_fail': SimpleItem(defvalue=True, doc='Rebroker if too many minor resubs'),
        'abort_loop_on_submit': SimpleItem(defvalue=True, doc='Break out of the Task Loop after submissions'),
        'required_trfs': SimpleItem(defvalue=[], typelist=[int], sequence=1, doc="IDs of transforms that must complete before this unit will start. NOTE DOESN'T COPY OUTPUT DATA TO INPUT DATA. Use TaskChainInput Dataset for that."),
        'chain_delay': SimpleItem(defvalue=0, doc='Minutes delay between a required/chained unit completing and starting this one', protected=0, typelist=[int]),
        'submit_with_threads': SimpleItem(defvalue=False, doc='Use Ganga Threads for submission'),
        'max_active_threads': SimpleItem(defvalue=10, doc='Maximum number of Ganga Threads to use. Note that the number of simultaneous threads is controlled by the queue system (default is 5)'),
        'info' : SimpleItem(defvalue=[],typelist=[str],protected=1,sequence=1,doc="Info showing status transitions and unit info"),
        'id': SimpleItem(defvalue=-1, protected=1, doc='ID of the Transform', typelist=[int]),
        #'force_single_unit' : SimpleItem(defvalue=False, doc='Force all input data into one Unit'),
    })

    _category = 'transforms'
    _name = 'ITransform'
    _exportmethods = ['addInputData', 'resetUnit', 'setRunLimit', 'getJobs', 'setMinorRunLimit',
                      'setMajorRunLimit', 'getID', 'overview', 'resetUnitsByStatus', 'removeUnusedJobs',
                      'showInfo', 'showUnitInfo', 'pause', 'n_all', 'n_status' ]
    _hidden = 0

    def showInfo(self):
        """Print out the info in a nice way"""
        print("\n".join( self.info ))

    def showUnitInfo(self, uid):
        """Print out the given unit info in a nice way"""
        self.units[uid].showInfo()

    def getJobs(self):
        """Return a list of the currently active job ids"""
        joblist = []
        for u in self.units:
            joblist += u.active_job_ids
        return joblist

    def setMinorRunLimit(self, newRL):
        """Set the number of times a job will be resubmitted before a major resubmit is attempted"""
        self.minor_run_limit = newRL

    def setMajorRunLimit(self, newRL):
        """Set the number of times a job will be rebrokered before the transform is paused"""
        self.major_run_limit = newRL

    def setRunLimit(self, newRL):
        """Set the total (minor+major) number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL

    def overview(self, status=''):
        """Show the status of the units in this transform"""
        for unit in self.units:
            # display colour given state
            o = ""
            o += ("%d:  " % self.units.index(unit)) + unit.name

            # is unit active?
            if unit.active:
                o += " " * (40 - len(o) + 3) + "*"
            else:
                o += " " * (40 - len(o) + 3) + "-"

            # sub job status
            o += "\t %i" % unit.n_status("submitted")
            o += "\t %i" % unit.n_status("running")
            o += "\t %i" % unit.n_status("completed")
            o += "\t %i" % unit.n_status("failed")
            o += "\t %i" % unit.minor_resub_count
            o += "\t %i" % unit.major_resub_count

            # change colour on state
            if unit.status == 'completed':
                o = markup(o, overview_colours["completed"])
            elif not unit.active:
                o = markup(o, overview_colours["bad"])
            elif unit.status == "recreating":
                o = markup(o, overview_colours["attempted"])
            elif len(unit.active_job_ids) == 0:
                o = markup(o, overview_colours["hold"])
            else:
                o = markup(o, overview_colours["running"])

            print(o)


# Special methods:
    def __init__(self):
        super(ITransform, self).__init__()
        self.initialize()

    def _auto__init__(self):
        self.status = 'new'

    def _readonly(self):
        """A transform is read-only if the status is not new."""
        if self.status == "new":
            return 0
        return 1

    def initialize(self):
        from Ganga.Lib.Localhost.Localhost import Localhost
        self.backend = Localhost()

    def check(self):
        """Check this transform has valid data, etc. and has the correct units"""

        # ignore anything but new transforms
        if self.status != "new":
            return

        # first, validate the transform
        if not self.validate():
            raise ApplicationConfigurationError(
                None, "Validate failed for Transform %s" % self.name)

        self.updateStatus("running")

    def startup(self):
        """This function is used to set the status after restarting Ganga"""
        pass

# Public methods
    def resetUnit(self, uid):
        """Reset the given unit"""
        addInfoString( self, "Reseting Unit %i" % ( uid ) )

        for u in self.units:
            if u.getID() == uid:
                u.reset()
                break

        # find any chained units and mark for recreation
        for trf in self._getParent().transforms:
            for u2 in trf.units:
                for req in u2.req_units:
                    if req == "%d:%d" % (self.getID(), u.getID()) or req == "%d:ALL" % (self.getID()):
                        trf.resetUnit(u2.getID())

        self.updateStatus("running")

    def getID(self):
        """Return the index of this trf in the parent task"""

        # if the id isn't already set, use the index from the parent Task
        if self.id < 0:
            task = self._getParent()
            if not task:
                raise ApplicationConfigurationError(
                    None, "This transform has not been associated with a task and so there is no ID available")
            self.id = task.transforms.index(self)
        
        return self.id

    def run(self, check=True):
        """Sets this transform to running status"""
        if self.status == "new" and check:
            self.check()
        if self.status != "completed":
            self.updateStatus("running")
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.warning("Transform is already completed!")

    def update(self):
        """Called by the parent task to check for status updates, submit jobs, etc."""
        if self.status == "pause" or self.status == "new":
            return 0

        # check for complete required units
        task = self._getParent()
        for trf_id in self.required_trfs:
            if task.transforms[trf_id].status != "completed":
                return 0

        # set the start time if not already set
        if len(self.required_trfs) > 0 and self.units[0].start_time == 0:
            for unit in self.units:
                unit.start_time = time.time() + self.chain_delay * 60 - 1

        # report the info for this transform
        unit_status = { "new":0, "hold":0, "running":0, "completed":0, "bad":0, "recreating":0 }
        for unit in self.units:
            unit_status[unit.status] += 1
         
        info_str = "Unit overview: %i units, %i new, %i hold, %i running, %i completed, %i bad. to_sub %i" % (len(self.units), unit_status["new"], unit_status["hold"],
                                                                                                              unit_status["running"], unit_status["completed"],
                                                                                                              unit_status["bad"], self._getParent().n_tosub())
      
        addInfoString(self, info_str)
                
        # ask the unit splitter if we should create any more units given the
        # current data
        self.createUnits()

        # loop over units and update them ((re)submits will be called here)
        old_status = self.status
        unit_status_list = []

        # find submissions first
        unit_update_list = []
        for unit in self.units:

            if not unit.checkForSubmission() and not unit.checkForResubmission():
                unit_update_list.append(unit)
                continue

            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        # now check for download
        for unit in unit_update_list:
            if unit.update() and self.abort_loop_on_submit:
                logger.info("Unit %d of transform %d, Task %d has aborted the loop" % (
                    unit.getID(), self.getID(), task.id))
                return 1

            unit_status_list.append(unit.status)

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for any TaskChainInput completions
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:
                if task.transforms[ds.input_trf_id].status != "completed":
                    return 0

        # update status and check
        for state in ['running', 'hold', 'bad', 'completed']:
            if state in unit_status_list:
                if state == 'hold':
                    state = "running"
                if state != self.status:
                    self.updateStatus(state)
                break

    def createUnits(self):
        """Create new units if required given the inputdata"""

        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        # check for chaining
        for ds in self.inputdata:
            if isType(ds, TaskChainInput) and ds.input_trf_id != -1:

                # check for single unit
                if ds.single_unit:

                    # is there a unit already linked?
                    done = False
                    rec_unit = None
                    for out_unit in self.units:
                        if '%d:ALL' % (ds.input_trf_id) in out_unit.req_units:
                            done = True
                            # check if the unit is being recreated
                            if out_unit.status == "recreating":
                                rec_unit = out_unit
                            break

                    if not done or rec_unit:
                        new_unit = self.createChainUnit(
                            self._getParent().transforms[ds.input_trf_id].units, ds.use_copy_output)
                        if new_unit:
                            self.addChainUnitToTRF(
                                new_unit, ds, -1, prev_unit=rec_unit)

                else:

                    # loop over units in parent trf and create units as
                    # required
                    for in_unit in self._getParent().transforms[ds.input_trf_id].units:

                        # is there a unit already linked?
                        done = False
                        rec_unit = None
                        for out_unit in self.units:
                            if '%d:%d' % (ds.input_trf_id, in_unit.getID()) in out_unit.req_units:
                                done = True
                                # check if the unit is being recreated
                                if out_unit.status == "recreating":
                                    rec_unit = out_unit
                                break

                        if not done or rec_unit:
                            new_unit = self.createChainUnit(
                                [in_unit], ds.use_copy_output)
                            if new_unit:
                                self.addChainUnitToTRF(
                                    new_unit, ds, in_unit.getID(), prev_unit=rec_unit)

    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create a chained unit given the parent outputdata"""
        return IUnit()

    def addChainUnitToTRF(self, unit, inDS, unit_id=-1, prev_unit=None):
        """Add a chained unit to this TRF. Override for more control"""
        if unit_id == -1:
            unit.req_units.append('%d:ALL' % (inDS.input_trf_id))
            unit.name = "Parent: TRF %d, All Units" % (inDS.input_trf_id)
        else:
            unit.req_units.append('%d:%d' % (inDS.input_trf_id, unit_id))
            unit.name = "Parent: TRF %d, Unit %d" % (
                inDS.input_trf_id, unit_id)

        self.addUnitToTRF(unit, prev_unit)

    def addInputData(self, inDS):
        """Add the given input dataset to the list"""
        self.inputdata.append(inDS)

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        if self.status != "completed":
            self.updateStatus("pause")
            #self.status = "pause"
            task = self._getParent()
            if task:
                task.updateStatus()
        else:
            logger.debug("Transform is already completed!")

    def setRunlimit(self, newRL):
        """Set the number of times a job should be resubmitted before the transform is paused"""
        self.run_limit = newRL
        logger.debug("Runlimit set to %i", newRL)

# Methods that can/should be overridden by derived classes
    def validate(self):
        """Override this to validate that the transform is OK"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        # make sure a path has been selected for any local downloads
        if self.unit_copy_output is not None and isType(self.unit_copy_output, TaskLocalCopy):
            if self.unit_copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        if self.copy_output is not None and isType(self.copy_output, TaskLocalCopy):
            if self.copy_output.local_location == '':
                logger.error("No path selected for Local Output Copy")
                return False

        # this is a generic trf so assume the application and splitter will do
        # all the work
        return True

    def addUnitToTRF(self, unit, prev_unit=None):
        """Add a unit to this Transform given the input and output data"""
        if not unit:
            raise ApplicationConfigurationError(None, "addUnitTOTRF failed for Transform %d (%s): No unit specified" % (self.getID(), self.name))

        addInfoString( self, "Adding Unit to TRF...")
        unit.updateStatus("hold")
        unit.active = True
        if prev_unit:
            unit.prev_job_ids += prev_unit.prev_job_ids
            self.units[prev_unit.getID()] = unit
        else:
            self.units.append(unit)
            stripProxy(unit).id = len(self.units) - 1

# Information methods
    def fqn(self):
        task = self._getParent()
        if task:
            return "Task %i Transform %i" % (task.id, task.transforms.index(self))
        else:
            return "Unassigned Transform '%s'" % (self.name)

    def n_active(self):
        return sum([u.n_active() for u in self.units])

    def n_all(self):
        return sum([u.n_all() for u in self.units])

    def n_status(self, status):
        return sum([u.n_status(status) for u in self.units])

    def info(self):
        logger.info(markup("%s '%s'" % (getName(self), self.name), status_colours[self.status]))
        logger.info("* backend: %s" % getName(self.backend))
        logger.info("Application:")
        self.application.printTree()

    def updateStatus(self, status):
        """Update the transform status"""
        self.status = status

    def createUnitCopyOutputDS(self, unit_id):
        """Create a the Copy Output dataset to use with this unit. Overload to handle more than the basics"""

        from Ganga.GPIDev.Lib.Tasks.TaskLocalCopy import TaskLocalCopy
        if isType(self.unit_copy_output, TaskLocalCopy):
            logger.warning("Default implementation of createUnitCopyOutputDS can't handle datasets of type '%s'" % getName(self.unit_copy_output))
            return

        # create copies of the Copy Output DS and add Unit name to path
        self.units[unit_id].copy_output = self.unit_copy_output.clone()
        self.units[unit_id].copy_output.local_location = os.path.join(
            self.unit_copy_output.local_location, self.units[unit_id].name.replace(":", "_").replace(" ", "").replace(",", "_"))

    def __setattr__(self, attr, value):

        if attr == 'outputfiles':

            if value != []:
                if self.outputdata is not None:
                    logger.error(
                        'ITransform.outputdata is set, you can\'t set ITransform.outputfiles')
                    return
                elif self.outputsandbox != []:
                    logger.error(
                        'ITransform.outputsandbox is set, you can\'t set ITransform.outputfiles')
                    return

            # reduce duplicate values here, leave only duplicates for LCG,
            # where we can have replicas
            uniqueValuesDict = []
            uniqueValues = []

            for val in value:
                key = '%s%s' % (getName(val), val.namePattern)
                if key not in uniqueValuesDict:
                    uniqueValuesDict.append(key)
                    uniqueValues.append(val)
                elif getName(val) == 'LCGSEFile':
                    uniqueValues.append(val)

            super(ITransform, self).__setattr__(attr, uniqueValues)

        elif attr == 'inputfiles':

            if value != []:
                if self.inputsandbox != []:
                    logger.error(
                        'ITransform.inputsandbox is set, you can\'t set ITransform.inputfiles')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputsandbox is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'inputsandbox':

            if value != []:

                if getConfig('Output')['ForbidLegacyInput']:
                    logger.error(
                        'Use of ITransform.inputsandbox is forbidden, please use ITransform.inputfiles')
                    return

                if self.inputfiles != []:
                    logger.error(
                        'ITransform.inputfiles is set, you can\'t set ITransform.inputsandbox')
                    return

            super(ITransform, self).__setattr__(attr, value)

        elif attr == 'outputdata':

            if value is not None:

                if getConfig('Output')['ForbidLegacyOutput']:
                    logger.error(
                        'Use of ITransform.outputdata is forbidden, please use ITransform.outputfiles')
                    return

                if self.outputfiles != []:
                    logger.error(
                        'ITransform.outputfiles is set, you can\'t set ITransform.outputdata')
                    return
            super(ITransform, self).__setattr__(attr, value)

        else:
            super(ITransform, self).__setattr__(attr, value)

    def resetUnitsByStatus(self, status='bad'):
        """Reset all units of a given status"""
        for unit in self.units:
            if unit.status == status:
                logger.info("Resetting Unit %d, Transform %d..." %
                            (unit.getID(), self.getID()))
                self.resetUnit(unit.getID())

    def checkUnitsAreCompleted(self, parent_units):
        """Check the given parent units are complete"""
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return False

        return True

    def getChainInclExclMasks(self, parent_units):
        """return the include/exclude masks from the TaskChainInput"""
        incl_pat_list = []
        excl_pat_list = []
        from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
        for parent in parent_units:
            for inds in self.inputdata:
                if isType(inds, TaskChainInput) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        return incl_pat_list, excl_pat_list

    def getParentUnitJobs(self, parent_units, include_subjobs=True):
        """Return the list of parent jobs"""
        job_list = []
        for parent in parent_units:
            job = getJobByID(parent.active_job_ids[0])
            if job.subjobs:
                job_list += job.subjobs
            else:
                job_list += [job]

        return job_list

    def removeUnusedJobs(self):
        """Remove all jobs that aren't being used, e.g. failed jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing job '%d'..." % jid)
                    job = getJobByID(jid)
                    job.remove()
                except Exception as err:
                    logger.debug("removeUnused: %s" % str(err))
                    logger.error("Problem removing job '%d'" % jid)
Пример #26
0
class Task(GangaObject):
    """This is a Task without special properties"""
    _schema = Schema(
        Version(1, 0), {
            'transforms':
            ComponentItem('transforms',
                          defvalue=[],
                          sequence=1,
                          copyable=1,
                          doc='list of transforms'),
            'id':
            SimpleItem(defvalue=-1,
                       protected=1,
                       doc='ID of the Task',
                       typelist=["int"]),
            'name':
            SimpleItem(defvalue='NewTask',
                       copyable=1,
                       doc='Name of the Task',
                       typelist=["str"]),
            'comment':
            SimpleItem(
                '', protected=0, doc='comment of the task', typelist=["str"]),
            'status':
            SimpleItem(defvalue='new',
                       protected=1,
                       doc='Status - new, running, pause or completed',
                       typelist=["str"]),
            'float':
            SimpleItem(defvalue=0,
                       copyable=1,
                       doc='Number of Jobs run concurrently',
                       typelist=["int"]),
            'resub_limit':
            SimpleItem(
                defvalue=0.9,
                copyable=1,
                doc=
                'Resubmit only if the number of running jobs is less than "resub_limit" times the float. This makes the job table clearer, since more jobs can be submitted as subjobs.',
                typelist=["float"]),
            'creation_date':
            SimpleItem(defvalue="19700101",
                       copyable=0,
                       hidden=1,
                       doc='Creation date of the task (used in dq2 datasets)',
                       typelist=["str"]),
        })

    _category = 'tasks'
    _name = 'Task'
    _exportmethods = [
        # Settings
        'setBackend',
        'setParameter',
        'insertTransform',
        'appendTransform',
        'removeTransform',
        'check',
        'run',
        'pause',
        'remove',  # Operations
        # Info
        'overview',
        'info',
        'n_all',
        'n_status',
        'help',
        'getJobs',
        'table',
        'float_all',
        'run_all'  # Helper
    ]

    default_registry = "tasks"

    # Special methods:
    def _auto__init__(self, registry=None):
        if registry is None:
            from Ganga.Core.GangaRepository import getRegistry
            registry = getRegistry(self.default_registry)
        # register the job (it will also commit it)
        # job gets its id now
        registry._add(self)
        self.creation_date = time.strftime('%Y%m%d%H%M%S')
        self.initialize()
        self.startup()
        self._setDirty()

    def initialize(self):
        pass

    def startup(self):
        """Startup function on Ganga startup"""
        for t in self.transforms:
            t.startup()

#   def _readonly(self):
#      """A task is read-only if the status is not new."""
#      if self.status == "new":
#         return 0
#      return 1

# Public methods:
#
# - remove() a task
# - clone() a task
# - check() a task (if updated)
# - run() a task to start processing
# - pause() to interrupt processing
# - setBackend(be) for all transforms
# - setParameter(myParam=True) for all transforms
# - insertTransform(id, tf) insert a new processing step
# - removeTransform(id) remove a processing step

    def remove(self, remove_jobs="do_nothing"):
        """Delete the task"""
        if not remove_jobs in [True, False]:
            logger.info("You want to remove the task %i named '%s'." %
                        (self.id, self.name))
            logger.info(
                "Since this operation cannot be easily undone, please call this command again:"
            )
            logger.info(
                " * as tasks(%i).remove(remove_jobs=True) if you want to remove all associated jobs,"
                % (self.id))
            logger.info(
                " * as tasks(%i).remove(remove_jobs=False) if you want to keep the jobs."
                % (self.id))
            return
        if remove_jobs:
            for j in GPI.jobs:
                try:
                    stid = j.application.tasks_id.split(":")
                    if int(stid[-2]) == self.id:
                        j.remove()
                except Exception as err:
                    logger.debug("Task remove_jobs task split Error!")
                    logger.debug("Error:\n%s" % str(err))
                    pass
        self._getRegistry()._remove(self)
        logger.info("Task #%s deleted" % self.id)

    def clone(self):
        c = super(Task, self).clone()
        for tf in c.transforms:
            tf.status = "new"
            # This is cleared separately since it is not in the schema
            tf._partition_apps = {}
        # self._getParent().register(c)
        c.check()
        return c

    def check(self):
        """This function is called by run() or manually by the user"""
        if self.status != "new":
            logger.error(
                "The check() function may modify a task and can therefore only be called on new tasks!"
            )
            return
        try:
            for t in self.transforms:
                t.check()
        finally:
            self.updateStatus()
        return True

    def run(self):
        """Confirms that this task is fully configured and ready to be run."""
        if self.status == "new":
            self.check()

        if self.status != "completed":
            if self.float == 0:
                logger.warning(
                    "The 'float', the number of jobs this task may run, is still zero. Type 'tasks(%i).float = 5' to allow this task to submit 5 jobs at a time"
                    % self.id)
            try:
                for tf in self.transforms:
                    if tf.status != "completed":
                        tf.run(check=False)

            finally:
                self.updateStatus()
        else:
            logger.info("Task is already completed!")

    def pause(self):
        """Pause the task - the background thread will not submit new jobs from this task"""
        float_cache = self.float
        self.float = 0
        if self.status != "completed":
            for tf in self.transforms:
                tf.pause()
            self.status = "pause"
        else:
            logger.info("Transform is already completed!")
        self.float = float_cache

    def setBackend(self, backend):
        """Sets the backend on all transforms"""
        for tf in self.transforms:
            if backend is None:
                tf.backend = None
            else:
                tf.backend = stripProxy(backend).clone()

    def setParameter(self, **args):
        """Use: setParameter(processName="HWW") to set the processName in all applications to "HWW"
           Warns if applications are not affected because they lack the parameter"""
        for name, parm in args.iteritems():
            for tf in [t for t in self.transforms if t.application]:
                if name in tf.application.getNodeData():
                    addProxy(tf.application).__setattr__(name, parm)
                else:
                    logger.warning("Transform %i was not affected!", tf.name)

    def insertTransform(self, id, tf):
        """Insert transfrm tf before index id (counting from 0)"""
        if self.status != "new" and id < len(self.transforms):
            logger.error(
                "You can only insert transforms at the end of the list. Only if a task is new it can be freely modified!"
            )
            return
        # self.transforms.insert(id,tf.copy()) # this would be safer, but
        # breaks user exspectations
        # this means that t.insertTransform(0,t2.transforms[0]) will cause
        # Great Breakage
        self.transforms.insert(id, tf)

    def appendTransform(self, tf):
        """Append transform"""
        return self.insertTransform(len(self.transforms), tf)

    def removeTransform(self, id):
        """Remove the transform with the index id (counting from 0)"""
        if self.status != "new":
            logger.error("You can only remove transforms if the task is new!")
            return
        del self.transforms[id]

    def getJobs(self, transform=None, partition=None, only_master_jobs=True):
        """ Get the job slice of all jobs that process this task """
        if not partition is None:
            only_master_jobs = False
        jobslice = JobRegistrySlice(
            "tasks(%i).getJobs(transform=%s, partition=%s, only_master_jobs=%s)"
            % (self.id, transform, partition, only_master_jobs))

        def addjob(j):
            if transform is None or partition is None or self.transforms[int(
                    transform)]._app_partition[j.application.id] == partition:
                jobslice.objects[j.fqid] = stripProxy(j)

        for j in GPI.jobs:
            try:
                stid = j.application.tasks_id.split(":")
                if int(stid[-2]) == self.id and (transform is None or stid[-1]
                                                 == str(transform)):
                    if j.subjobs and not only_master_jobs:
                        for sj in j.subjobs:
                            addjob(sj)
                    else:
                        addjob(j)
            except Exception as err:
                logger.debug("getJobs Error!!")
                logger.debug("Error:\n%s" % str(err))
                # print x
                pass
        return JobRegistrySliceProxy(jobslice)

# Internal methods

    def finaliseTransforms(self):
        """Check for any things needing doing after a transform has completed"""
        for t in self.transforms:
            t.finalise()

    def updateStatus(self):
        """Updates status based on transform status.
           Called from check() or if status of a transform changes"""
        # Calculate status from transform status:
        states = [tf.status for tf in self.transforms]
        if "running" in states and "pause" in states:
            new_status = "running/pause"
        elif "running" in states:
            new_status = "running"
        elif "pause" in states:
            new_status = "pause"
        elif "new" in states:
            new_status = "new"
        elif "completed" in states:
            new_status = "completed"
        else:
            new_status = "new"  # no tranforms
        # Handle status changes here:
        if self.status != new_status:
            if new_status == "running/pause":
                logger.info(
                    "Some Transforms of Task %i '%s' have been paused. Check tasks.table() for details!"
                    % (self.id, self.name))
            elif new_status == "completed":
                logger.warning("Task %i '%s' has completed!" %
                               (self.id, self.name))
            elif self.status == "completed":
                logger.warning("Task %i '%s' has been reopened!" %
                               (self.id, self.name))
        self.status = new_status
        return self.status

    def submitJobs(self):
        """Submits as many jobs as necessary to maintain the float. Internal"""
        numjobs = 0
        if not self.status in ["running", "running/pause"]:
            return 0

        for i in range(len(self.transforms) - 1, -1, -1):
            tf = self.transforms[i]
            to_run = self.float - self.n_status("running")
            run = (self.resub_limit * self.float >= self.n_status("running"))
            if tf.status == "running" and to_run > 0 and run:
                numjobs += tf.submitJobs(to_run)
        return numjobs

    # Information methods
    def n_all(self):
        return sum([t.n_all() for t in self.transforms])

    def n_status(self, status):
        return sum([t.n_status(status) for t in self.transforms])

    def table(self):
        from Ganga.GPI import tasks
        tasks[self.id:self.id].table()

    def overview(self):
        """ Get an ascii art overview over task status. Can be overridden """
        logger.info("Colours: " + ", ".join([
            markup(key, overview_colours[key]) for key in [
                "hold", "ready", "running", "completed", "attempted", "failed",
                "bad", "unknown"
            ]
        ]))
        logger.info(
            "Lists the partitions of events that are processed in one job, and the number of failures to process it."
        )
        logger.info("Format: (partition number)[:(number of failed attempts)]")
        logger.info('')
        for t in self.transforms:
            t.overview()

    def info(self):
        for t in self.transforms:
            t.info()

    def help(self):
        logger.info("This is a Task without special properties")

    # Helper methods
    def float_all(self):
        self.float = self.n_all()

    def run_all(self):
        self.float_all()
        self.run()
Пример #27
0
class DiracBase(IBackend):
    """The backend that submits jobs to the Grid via DIRAC.

    The backend for jobs to be submitted to the Grid. Jobs are
    submitted through the DIRAC WMS system and then in turn submitted to the
    Grid. A few examples of usage are given below

    # Create Dirac backend object
    b = Dirac()

    # Create and submit job.
    j = Job(application=app,backend=b)
    j.submit()

    # Run a Root job on the Grid if in LHCb VO

    # Create a Root application object. See Root help text for instructions
    # on how to configure this.
    app = Root()

    # Create and submit job to Dirac using default options
    j = Job(application=app,backend=Dirac())
    j.submit()

    # Using the 'settings' attribute
    j.backend.settings['BannedSites'] = ['LCG.CERN.ch']
    j.resubmit()

    # settings can be set at any time but are only 'respected' during
    # submit and resubmit.

    """

    dirac_monitoring_is_active = True

    _schema = Schema(
        Version(3, 2), {
            'id':
            SimpleItem(
                defvalue=None,
                protected=1,
                copyable=0,
                typelist=['int', 'type(None)'],
                doc=
                'The id number assigned to the job by the DIRAC WMS. If seeking help'
                ' on jobs with the Dirac backend, please always report this id '
                'number in addition to a full description of your problem. The id '
                'can also be used to further inspect the job at '
                'https://lhcbweb.pic.es/DIRAC/info/general/diracOverview'),
            'status':
            SimpleItem(defvalue=None,
                       protected=1,
                       copyable=0,
                       typelist=['str', 'type(None)'],
                       doc='The detailed status as reported by the DIRAC WMS'),
            'actualCE':
            SimpleItem(defvalue=None,
                       protected=1,
                       copyable=0,
                       typelist=['str', 'type(None)'],
                       doc='The location where the job ran'),
            'normCPUTime':
            SimpleItem(
                defvalue=None,
                protected=1,
                copyable=0,
                typelist=['str', 'type(None)'],
                doc='The normalized CPU time reported by the DIRAC WMS'),
            'statusInfo':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       typelist=['str', 'type(None)'],
                       doc='Minor status information from Dirac'),
            'extraInfo':
            SimpleItem(defvalue='',
                       protected=1,
                       copyable=0,
                       typelist=['str', 'type(None)'],
                       doc='Application status information from Dirac'),
            'diracOpts':
            SimpleItem(
                defvalue='',
                doc=
                'DIRAC API commands to add the job definition script. Only edit '
                'if you *really* know what you are doing'),
            'settings':
            SimpleItem(
                defvalue={'CPUTime': 2 * 86400},
                doc='Settings for DIRAC job (e.g. CPUTime, BannedSites, etc.)'
            ),
            'credential_requirements':
            ComponentItem('CredentialRequirement', defvalue=DiracProxy),
        })
    _exportmethods = [
        'getOutputData', 'getOutputSandbox', 'removeOutputData',
        'getOutputDataLFNs', 'getOutputDataAccessURLs', 'peek', 'reset',
        'debug'
    ]
    _packed_input_sandbox = True
    _category = "backends"
    _name = 'DiracBase'
    _hidden = True

    def _setup_subjob_dataset(self, dataset):
        """
        This method is used for constructing datasets on a per subjob basis when submitting parametric jobs
        Args:
            Dataset (Dataset): This is a GangaDataset object, todo check this isn't a list
        """
        return None

    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError(
                'Dirac',
                'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC'
            )

        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True

    @require_credential
    def _common_submit(self, dirac_script):
        '''Submit the job via the Dirac server.
        Args:
            dirac_script (str): filename of the JDL which is to be submitted to DIRAC
        '''
        j = self.getJobObject()
        self.id = None
        self.actualCE = None
        self.status = None
        self.extraInfo = None
        self.statusInfo = ''
        j.been_queued = False
        dirac_cmd = """execfile(\'%s\')""" % dirac_script

        try:
            result = execute(dirac_cmd, cred_req=self.credential_requirements)
        except GangaDiracError as err:

            err_msg = 'Error submitting job to Dirac: %s' % str(err)
            logger.error(err_msg)
            logger.error("\n\n===\n%s\n===\n" % dirac_script)
            logger.error("\n\n====\n")
            with open(dirac_script, 'r') as file_in:
                logger.error("%s" % file_in.read())
            logger.error("\n====\n")
            raise BackendError('Dirac', err_msg)

        idlist = result
        if type(idlist) is list:
            return self._setup_bulk_subjobs(idlist, dirac_script)

        self.id = idlist
        return type(self.id) == int

    def _addition_sandbox_content(self, subjobconfig):
        '''any additional files that should be sent to dirac
        Args:
            subjobcofig (unknown): This is the config for this subjob (I think)'''
        return []

    def submit(self, subjobconfig, master_input_sandbox):
        """Submit a DIRAC job
        Args:
            subjobconfig (unknown):
            master_input_sandbox (list): file names which are in the master sandbox of the master sandbox (if any)
        """

        j = self.getJobObject()

        sboxname = j.createPackedInputSandbox(subjobconfig.getSandboxFiles())

        input_sandbox = master_input_sandbox[:]
        input_sandbox += sboxname

        input_sandbox += self._addition_sandbox_content(subjobconfig)

        ## Add LFN to the inputfiles section of the file
        input_sandbox_userFiles = []
        for this_file in j.inputfiles:
            if isType(this_file, DiracFile):
                input_sandbox_userFiles.append('LFN:' + str(this_file.lfn))
        if j.master:
            for this_file in j.master.inputfiles:
                if isType(this_file, DiracFile):
                    input_sandbox_userFiles.append('LFN:' + str(this_file.lfn))

        for this_file in input_sandbox_userFiles:
            input_sandbox.append(this_file)

        logger.debug("dirac_script: %s" % str(subjobconfig.getExeString()))
        logger.debug("sandbox_cont:\n%s" % str(input_sandbox))

        # This is a workaroud for the fact DIRAC doesn't like whitespace in sandbox filenames
        ### START_WORKAROUND
        tmp_dir = tempfile.mkdtemp()

        # Loop through all files and if the filename contains a ' ' copy it to a location which doesn't contain one.
        # This does have the limitation that all file basenames must not contain a ' ' character.
        # However we don't make any in Ganga as of 20/09/16
        sandbox_str = '['
        for file_ in input_sandbox:
            if ' ' in str(file_):
                new_name = os.path.join(tmp_dir, os.path.basename(file_))
                shutil.copy(file_, new_name)
                file_ = new_name
            sandbox_str += '\'' + str(file_) + '\', '
        sandbox_str += ']'
        logger.debug("sandbox_str: %s" % sandbox_str)
        ### FINISH_WORKAROUND

        dirac_script = subjobconfig.getExeString().replace(
            '##INPUT_SANDBOX##', sandbox_str)

        dirac_script_filename = os.path.join(j.getInputWorkspace().getPath(),
                                             'dirac-script.py')
        with open(dirac_script_filename, 'w') as f:
            f.write(dirac_script)

        try:
            return self._common_submit(dirac_script_filename)
        finally:
            # CLEANUP after workaround
            shutil.rmtree(tmp_dir, ignore_errors=True)

    def master_auto_resubmit(self, rjobs):
        '''Duplicate of the IBackend.master_resubmit but hooked into auto resubmission
        such that the monitoring server is used rather than the user server
        Args:
            rjobs (list): This is a list of jobs which are to be auto-resubmitted'''

        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0

        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid,
                            getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    result = b._resubmit()
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger,
                                       debug=isType(x, GangaDiracError))
                    return handleError(
                        IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1

    def resubmit(self):
        """Resubmit a DIRAC job"""
        return self._resubmit()

    def _resubmit(self):
        """Resubmit a DIRAC job"""
        j = self.getJobObject()
        parametric = False
        script_path = os.path.join(j.getInputWorkspace().getPath(),
                                   'dirac-script.py')
        # Check old script
        if j.master is None and not os.path.exists(script_path):
            raise BackendError('Dirac',
                               'No "dirac-script.py" found in j.inputdir')

        if j.master is not None and not os.path.exists(script_path):
            script_path = os.path.join(j.master.getInputWorkspace().getPath(),
                                       'dirac-script.py')
            if not os.path.exists(script_path):
                raise BackendError(
                    'Dirac',
                    'No "dirac-script.py" found in j.inputdir or j.master.inputdir'
                )
            parametric = True

        # Read old script
        f = open(script_path, 'r')
        script = f.read()
        f.close()

        # Create new script - ##note instead of using get_parametric_dataset
        # could just use j.inputdata.
        if parametric is True:
            parametric_datasets = get_parametric_datasets(script.split('\n'))
            if j.master:
                if len(parametric_datasets) != len(j.master.subjobs):
                    raise BackendError(
                        'Dirac',
                        'number of parametric datasets defined in API script doesn\'t match number of master.subjobs'
                    )
            if j.inputdata and len(j.inputdata) > 0:
                _input_files = [
                    f for f in j.inputdata if not isType(f, DiracFile)
                ]
            else:
                _input_files = []
            if set(parametric_datasets[j.id]).symmetric_difference(
                    set([f.namePattern for f in _input_files])):
                raise BackendError(
                    'Dirac',
                    'Mismatch between dirac-script and job attributes.')
            script = script.replace(
                '.setParametricInputData(%s)' % str(parametric_datasets),
                '.setInputData(%s)' % str(parametric_datasets[j.id]))
            script = script.replace('%n', str(j.id))  # name

        start_user_settings = '# <-- user settings\n'
        new_script = script[:script.find(start_user_settings) +
                            len(start_user_settings)]

        job_ident = get_job_ident(script.split('\n'))
        for key, value in self.settings.iteritems():
            if str(key).startswith('set'):
                _key = key[3:]
            else:
                _key = key
            if type(value) is str:
                template = '%s.set%s("%s")\n'
            else:
                template = '%s.set%s(%s)\n'
            new_script += template % (job_ident, str(_key), str(value))
        new_script += script[script.find('# user settings -->'):]

        # Save new script
        new_script_filename = os.path.join(j.getInputWorkspace().getPath(),
                                           'dirac-script.py')
        f = open(new_script_filename, 'w')
        f.write(new_script)
        f.flush()
        f.close()
        return self._common_submit(new_script_filename)

    def reset(self, doSubjobs=False):
        """Resets the state of a job back to 'submitted' so that the
        monitoring will run on it again.
        Args:
            doSubjobs (bool): Should we rest the subjobs associated with this job or not"""
        j = self.getJobObject()

        disallowed = ['submitting', 'killed']
        if j.status in disallowed:
            logger.warning("Can not reset a job in status '%s'." % j.status)
        else:
            j.getOutputWorkspace().remove(preserve_top=True)
            self.extraInfo = None
            self.statusInfo = ''
            self.status = None
            self.actualCE = None
            j.been_queued = False
            j.updateStatus('submitted')
            if j.subjobs and not doSubjobs:
                logger.info(
                    'This job has subjobs, if you would like the backends '
                    'of all the subjobs that are in status=\'completing\' or '
                    'status=\'failed\' also reset then recall reset with the '
                    'arg \'True\' i.e. job(3).backend.reset(True)')
            elif j.subjobs and doSubjobs:
                logger.info(
                    'resetting the backends of \'completing\' and \'failed\' subjobs.'
                )
                for sj in j.subjobs:
                    if sj.status == 'completing' or sj.status == 'failed':
                        sj.backend.reset()
            if j.master:
                j.master.updateMasterJobStatus()

    @require_credential
    def kill(self):
        """ Kill a Dirac jobs"""
        if not self.id:
            return None
        dirac_cmd = 'kill(%d)' % self.id
        try:
            result = execute(dirac_cmd, cred_req=self.credential_requirements)
        except GangaDiracError as err:
            raise BackendError('Dirac', 'Could not kill job: %s' % err)
        return True

    @require_credential
    def peek(self, filename=None, command=None):
        """Peek at the output of a job (Note: filename/command are ignored).
        Args:
            filename (str): Ignored but is filename of a file in the sandbox
            command (str): Ignored but is a command which could be executed"""
        dirac_cmd = 'peek(%d)' % self.id
        try:
            result = execute(dirac_cmd, cred_req=self.credential_requirements)
            logger.info(result)
        except GangaDiracError:
            logger.error("No peeking available for Dirac job '%i'.", self.id)

    @require_credential
    def getOutputSandbox(self, outputDir=None):
        """Get the outputsandbox for the job object controlling this backend
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
        """
        j = self.getJobObject()
        if outputDir is None:
            outputDir = j.getOutputWorkspace().getPath()
        dirac_cmd = "getOutputSandbox(%d,'%s')" % (self.id, outputDir)
        try:
            result = execute(dirac_cmd, cred_req=self.credential_requirements)
        except GangaDiracError as err:
            msg = 'Problem retrieving output: %s' % str(err)
            logger.warning(msg)
            return False

        return True

    def removeOutputData(self):
        """
        Remove all the LFNs associated with this job.
        """
        # Note when the API can accept a list for removeFile I will change
        # this.
        j = self.getJobObject()
        if j.subjobs:
            for sj in j.subjobs:
                outputfiles_foreach(sj, DiracFile, lambda x: x.remove())
        else:
            outputfiles_foreach(j, DiracFile, lambda x: x.remove())

    def getOutputData(self, outputDir=None, names=None, force=False):
        """Retrieve data stored on SE to dir (default=job output workspace).
        If names=None, then all outputdata is downloaded otherwise names should
        be a list of files to download. If force=True then data will be redownloaded
        even if the file already exists.

        Note that if called on a master job then all subjobs outputwill be downloaded.
        If dir is None then the subjobs output goes into their individual
        outputworkspaces as expected. If however one specifies a dir then this is
        treated as a top dir and a subdir for each job will be created below it. This
        will avoid overwriting files with the same name from each subjob.
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
            names (list): list of names which match namePatterns in the outputfiles
            force (bool): Force the download out data potentially overwriting existing objects
        """
        j = self.getJobObject()
        if outputDir is not None and not os.path.isdir(outputDir):
            raise GangaDiracError(
                "Designated outupt path '%s' must exist and be a directory" %
                outputDir)

        def download(dirac_file, job, is_subjob=False):
            dirac_file.localDir = job.getOutputWorkspace().getPath()
            if outputDir is not None:
                output_dir = outputDir
                if is_subjob:
                    output_dir = os.path.join(outputDir, job.fqid)
                    if not os.path.isdir(output_dir):
                        os.mkdir(output_dir)
                dirac_file.localDir = output_dir
            if os.path.exists(
                    os.path.join(dirac_file.localDir,
                                 os.path.basename(
                                     dirac_file.lfn))) and not force:
                return
            try:
                dirac_file.get()
                return dirac_file.lfn
            # should really make the get method throw if doesn't suceed. todo
            except (GangaDiracError, GangaFileError) as e:
                logger.warning(e)

        suceeded = []
        if j.subjobs:
            for sj in j.subjobs:
                suceeded.extend([
                    download(f, sj, True)
                    for f in outputfiles_iterator(sj, DiracFile)
                    if f.lfn != '' and (
                        names is None or f.namePattern in names)
                ])
        else:
            suceeded.extend([
                download(f, j, False)
                for f in outputfiles_iterator(j, DiracFile)
                if f.lfn != '' and (names is None or f.namePattern in names)
            ])

        return filter(lambda x: x is not None, suceeded)

    def getOutputDataLFNs(self):
        """Retrieve the list of LFNs assigned to outputdata"""
        j = self.getJobObject()
        lfns = []

        if j.subjobs:
            for sj in j.subjobs:
                lfns.extend([
                    f.lfn for f in outputfiles_iterator(sj, DiracFile)
                    if f.lfn != ''
                ])
        else:
            lfns.extend([
                f.lfn for f in outputfiles_iterator(j, DiracFile)
                if f.lfn != ''
            ])
        return lfns

    def getOutputDataAccessURLs(self):
        """Retrieve the list of accessURLs assigned to outputdata for a job"""
        return getAccessURLs(self.getOutputDataLFNs())

    @require_credential
    def debug(self):
        '''Obtains some (possibly) useful DIRAC debug info. '''
        # check services
        cmd = 'getServicePorts()'
        try:
            result = execute(cmd, cred_req=self.credential_requirements)
        except GangaDiracError as err:
            logger.warning('Could not obtain services: %s' % str(err))
            return
        services = result
        for category in services:
            system, service = category.split('/')
            cmd = "ping('%s','%s')" % (system, service)
            try:
                result = execute(cmd, cred_req=self.credential_requirements)
                msg = 'OK.'
            except GangaDiracError as err:
                msg = '%s' % err
            logger.info('%s: %s' % (category, msg))

        # get pilot info for this job
        if not isinstance(self.id, int):
            return
        j = self.getJobObject()
        cwd = os.getcwd()
        debug_dir = j.getDebugWorkspace().getPath()
        cmd = "getJobPilotOutput(%d,'%s')" % (self.id, debug_dir)
        try:
            result = execute(cmd, cred_req=self.credential_requirements)
            logger.info('Pilot Info: %s/pilot_%d/std.out.' %
                        (debug_dir, self.id))
        except GangaDiracError as err:
            logger.error("%s" % err)

    @staticmethod
    def _bulk_updateStateTime(jobStateDict, bulk_time_lookup={}):
        """ This performs the same as the _getStateTime method but loops over a list of job ids within the DIRAC namespace (much faster)
        Args:
            jobStateDict (dict): This is a dict of {job.backend.id : job_status, } elements
            bulk_time_lookup (dict): Dict of result of multiple calls to getBulkStateTime, performed in advance
        """
        for this_state, these_jobs in jobStateDict.iteritems():
            if bulk_time_lookup == {} or this_state not in bulk_time_lookup:
                bulk_result = execute(
                    "getBulkStateTime(%s,\'%s\')" %
                    (repr([j.backend.id for j in these_jobs]), this_state),
                    cred_req=these_jobs[0].backend.credential_requirements
                )  # TODO split jobs by cred_req
            else:
                bulk_result = bulk_time_lookup[this_state]
            for this_job in jobStateDict[this_state]:
                backend_id = this_job.backend.id
                if backend_id in bulk_result and bulk_result[backend_id]:
                    DiracBase._getStateTime(
                        this_job, this_state,
                        {this_state: bulk_result[backend_id]})
                else:
                    DiracBase._getStateTime(this_job, this_state)

    @staticmethod
    def _getStateTime(job, status, getStateTimeResult={}):
        """Returns the timestamps for 'running' or 'completed' by extracting
        their equivalent timestamps from the loggingInfo.
        Args:
            job (Job): This is the job object we want to update
            status (str): This is the Ganga status we're updating (running, completed... etc)
            getStateTimeResult (dict): This is the optional result of executing the approriate getStateTime
                                        against this job.backend.id, if not provided the command is called internally
        """
        # Now private to stop server cross-talk from user thread. Since updateStatus calles
        # this method whether called itself by the user thread or monitoring thread.
        # Now don't use hook but define our own private version
        # used in monitoring loop... messy but works.
        if job.status != status:
            b_list = ['running', 'completing', 'completed', 'failed']
            backend_final = ['failed', 'completed']
            # backend stamps
            if not job.subjobs and status in b_list:
                for childstatus in b_list:
                    if job.backend.id:
                        logger.debug("Accessing getStateTime() in diracAPI")
                        if childstatus in backend_final:
                            if childstatus in getStateTimeResult:
                                be_statetime = getStateTimeResult[childstatus]
                            else:
                                be_statetime = execute(
                                    "getStateTime(%d,\'%s\')" %
                                    (job.backend.id, childstatus),
                                    cred_req=job.backend.
                                    credential_requirements)
                            job.time.timestamps["backend_final"] = be_statetime
                            logger.debug(
                                "Wrote 'backend_final' to timestamps.")
                            break
                        else:
                            time_str = "backend_" + childstatus
                            if time_str not in job.time.timestamps:
                                if childstatus in getStateTimeResult:
                                    be_statetime = getStateTimeResult[
                                        childstatus]
                                else:
                                    be_statetime = execute(
                                        "getStateTime(%d,\'%s\')" %
                                        (job.backend.id, childstatus),
                                        cred_req=job.backend.
                                        credential_requirements)
                                job.time.timestamps["backend_" +
                                                    childstatus] = be_statetime
                            logger.debug("Wrote 'backend_%s' to timestamps.",
                                         childstatus)
                    if childstatus == status:
                        break
            logger.debug("_getStateTime(job with id: %d, '%s') called.",
                         job.id, job.status)
        else:
            logger.debug(
                "Status changed from '%s' to '%s'. No new timestamp was written",
                job.status, status)

    def timedetails(self):
        """Prints contents of the loggingInfo from the Dirac API."""
        if not self.id:
            return None
        logger.debug("Accessing timedetails() in diracAPI")
        dirac_cmd = 'timedetails(%d)' % self.id
        return execute(dirac_cmd, cred_req=self.credential_requirements)

    @staticmethod
    def job_finalisation_cleanup(job, updated_dirac_status):
        """
        Method for reverting a job back to a clean state upon a failure in the job progression
        Args:
            job (Job) This is the job to change the status
            updated_dirac_status (str): Ganga status which is to be used somewhere
        """
        #   Revert job back to running state if we exit uncleanly
        if job.status == "completing":
            job.updateStatus('running')
            if job.master:
                job.master.updateMasterJobStatus()
        # FIXME should I add something here to cleanup on sandboxes pulled from
        # malformed job output?

    @staticmethod
    def _internal_job_finalisation(job, updated_dirac_status):
        """
        This method performs the main job finalisation
        Args:
            job (Job): Thi is the job we want to finalise
            updated_dirac_status (str): String representing the Ganga finalisation state of the job failed/completed
        """

        if updated_dirac_status == 'completed':
            start = time.time()
            # firstly update job to completing
            DiracBase._getStateTime(job, 'completing')
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us

            job.updateStatus('completing')
            if job.master:
                job.master.updateMasterJobStatus()

            output_path = job.getOutputWorkspace().getPath()

            logger.info('Contacting DIRAC for job: %s' % job.fqid)
            # Contact dirac which knows about the job
            job.backend.normCPUTime, getSandboxResult, file_info_dict, completeTimeResult = execute(
                "finished_job(%d, '%s')" % (job.backend.id, output_path),
                cred_req=job.backend.credential_requirements)

            now = time.time()
            logger.info(
                '%0.2fs taken to download output from DIRAC for Job %s' %
                ((now - start), job.fqid))

            #logger.info('Job ' + job.fqid + ' OutputDataInfo: ' + str(file_info_dict))
            #logger.info('Job ' + job.fqid + ' OutputSandbox: ' + str(getSandboxResult))
            #logger.info('Job ' + job.fqid + ' normCPUTime: ' + str(job.backend.normCPUTime))

            # Set DiracFile metadata
            wildcards = [
                f.namePattern for f in job.outputfiles.get(DiracFile)
                if regex.search(f.namePattern) is not None
            ]

            lfn_store = os.path.join(
                output_path,
                getConfig('Output')['PostProcessLocationsFileName'])

            # Make the file on disk with a nullop...
            if not os.path.isfile(lfn_store):
                with open(lfn_store, 'w'):
                    pass

            if job.outputfiles.get(DiracFile):

                # Now we can iterate over the contents of the file without touching it
                with open(lfn_store, 'ab') as postprocesslocationsfile:
                    if not hasattr(file_info_dict, 'keys'):
                        logger.error("Error understanding OutputDataInfo: %s" %
                                     str(file_info_dict))
                        raise GangaDiracError(
                            "Error understanding OutputDataInfo: %s" %
                            str(file_info_dict))

                    ## Caution is not clear atm whether this 'Value' is an LHCbism or bug
                    list_of_files = file_info_dict.get('Value',
                                                       file_info_dict.keys())

                    for file_name in list_of_files:
                        file_name = os.path.basename(file_name)
                        info = file_info_dict.get(file_name)
                        #logger.debug("file_name: %s,\tinfo: %s" % (str(file_name), str(info)))

                        if not hasattr(info, 'get'):
                            logger.error(
                                "Error getting OutputDataInfo for: %s" %
                                str(job.getFQID('.')))
                            logger.error(
                                "Please check the Dirac Job still exists or attempt a job.backend.reset() to try again!"
                            )
                            logger.error("Err: %s" % str(info))
                            logger.error("file_info_dict: %s" %
                                         str(file_info_dict))
                            raise GangaDiracError(
                                "Error getting OutputDataInfo")

                        valid_wildcards = [
                            wc for wc in wildcards
                            if fnmatch.fnmatch(file_name, wc)
                        ]
                        if not valid_wildcards:
                            valid_wildcards.append('')

                        for wc in valid_wildcards:
                            #logger.debug("wildcard: %s" % str(wc))

                            DiracFileData = 'DiracFile:::%s&&%s->%s:::%s:::%s\n' % (
                                wc, file_name,
                                info.get('LFN', 'Error Getting LFN!'),
                                str(info.get('LOCATIONS', ['NotAvailable'])),
                                info.get('GUID', 'NotAvailable'))
                            #logger.debug("DiracFileData: %s" % str(DiracFileData))
                            postprocesslocationsfile.write(DiracFileData)
                            postprocesslocationsfile.flush()

                logger.debug("Written: %s" % open(lfn_store, 'r').readlines())

            # check outputsandbox downloaded correctly
            if not result_ok(getSandboxResult):
                logger.warning('Problem retrieving outputsandbox: %s' %
                               str(getSandboxResult))
                DiracBase._getStateTime(job, 'failed')
                if job.status in ['removed', 'killed']:
                    return
                elif (job.master
                      and job.master.status in ['removed', 'killed']):
                    return  # user changed it under us
                job.updateStatus('failed')
                if job.master:
                    job.master.updateMasterJobStatus()
                raise BackendError(
                    'Dirac', 'Problem retrieving outputsandbox: %s' %
                    str(getSandboxResult))

            # finally update job to completed
            DiracBase._getStateTime(job, 'completed', completeTimeResult)
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('completed')
            if job.master:
                job.master.updateMasterJobStatus()
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for complete update : ' +
                         str(now - start))

        elif updated_dirac_status == 'failed':
            # firstly update status to failed
            DiracBase._getStateTime(job, 'failed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('failed')
            if job.master:
                job.master.updateMasterJobStatus()

            # if requested try downloading outputsandbox anyway
            if configDirac['failed_sandbox_download']:
                execute("getOutputSandbox(%d,'%s')" %
                        (job.backend.id, job.getOutputWorkspace().getPath()),
                        cred_req=job.backend.credential_requirements)
        else:
            logger.error("Job #%s Unexpected dirac status '%s' encountered" %
                         (job.getFQID('.'), updated_dirac_status))

    @staticmethod
    def job_finalisation(job, updated_dirac_status):
        """
        Attempt to finalise the job given and auto-retry 5 times on error
        Args:
            job (Job): Job object to finalise
            updated_dirac_status (str): The Ganga status to update the job to, i.e. failed/completed
        """
        count = 1
        limit = 5
        sleep_length = 2.5

        while count != limit:

            try:
                count += 1
                # Check status is sane before we start
                if job.status != "running" and (
                        not job.status in ['completed', 'killed', 'removed']):
                    job.updateStatus('submitted')
                    job.updateStatus('running')
                if job.status in ['completed', 'killed', 'removed']:
                    break
                DiracBase._internal_job_finalisation(job, updated_dirac_status)
                break
            except Exception as err:

                logger.warning("An error occured finalising job: %s" %
                               job.getFQID('.'))
                logger.warning(
                    "Attemting again (%s of %s) after %s-sec delay" %
                    (str(count), str(limit), str(sleep_length)))
                if count == limit:
                    logger.error(
                        "Unable to finalise job after %s retries due to error:\n%s"
                        % (job.getFQID('.'), str(err)))
                    job.force_status('failed')
                    raise

            time.sleep(sleep_length)

        job.been_queued = False

    @staticmethod
    def requeue_dirac_finished_jobs(requeue_jobs, finalised_statuses):
        """
        Method used to requeue jobs whih are in the finalized state of some form, finished/failed/etc
        Args:
            requeue_jobs (list): This is a list of the jobs which are to be requeued to be finalised
            finalised_statuses (dict): Dict of the Dirac statuses vs the Ganga statuses after running
        """

        # requeue existing completed job
        for j in requeue_jobs:
            if j.been_queued:
                continue

            if monitoring_component:
                if monitoring_component.should_stop():
                    break
            if not configDirac['serializeBackend']:
                getQueues()._monitoring_threadpool.add_function(
                    DiracBase.job_finalisation,
                    args=(j, finalised_statuses[j.backend.status]),
                    priority=5,
                    name="Job %s Finalizing" % j.fqid)
                j.been_queued = True
            else:
                DiracBase.job_finalisation(
                    j, finalised_statuses[j.backend.status])

    @staticmethod
    def monitor_dirac_running_jobs(monitor_jobs, finalised_statuses):
        """
        Method to update the configuration of jobs which are in a submitted/running state in Ganga&Dirac
        Args:
            monitor_jobs (list): Jobs which are to be monitored for their status change
            finalised_statuses (dict): Dict of the Dirac statuses vs the Ganga statuses after running
        """

        # now that can submit in non_blocking mode, can see jobs in submitting
        # that have yet to be assigned an id so ignore them
        # NOT SURE THIS IS VALID NOW BULK SUBMISSION IS GONE
        # EVEN THOUGH COULD ADD queues.add(j.submit) WILL KEEP AN EYE ON IT
        # dirac_job_ids    = [ j.backend.id for j in monitor_jobs if j.backend.id is not None ]
        # Correction this did become a problem for a crashed session during
        # submit, see #104454
        dead_jobs = (j for j in monitor_jobs if j.backend.id is None)
        for d in dead_jobs:
            d.updateStatus('failed')
            if d.master is not None:
                d.master.updateMasterJobStatus()

        ganga_job_status = [
            j.status for j in monitor_jobs if j.backend.id is not None
        ]
        dirac_job_ids = [
            j.backend.id for j in monitor_jobs if j.backend.id is not None
        ]

        logger.debug("GangaStatus: %s" % str(ganga_job_status))
        logger.debug("diracJobIDs: %s" % str(dirac_job_ids))

        if not dirac_job_ids:
            ## Nothing to do here stop bugging DIRAC about it!
            ## Everything else beyond here in the function depends on some ids present here, no ids means we can stop.
            return

        statusmapping = configDirac['statusmapping']

        result, bulk_state_result = execute(
            'monitorJobs(%s, %s)' % (repr(dirac_job_ids), repr(statusmapping)),
            cred_req=monitor_jobs[0].backend.credential_requirements)

        #result = results[0]
        #bulk_state_result = results[1]

        if len(result) != len(ganga_job_status):
            logger.warning('Dirac monitoring failed for %s, result = %s' %
                           (str(dirac_job_ids), str(result)))
            logger.warning("Results: %s" % str(result))
            return

        requeue_job_list = []
        jobStateDict = {}

        jobs_to_update = {}
        master_jobs_to_update = []

        thread_handled_states = ['completed', 'failed']
        for job, state, old_state in zip(monitor_jobs, result,
                                         ganga_job_status):
            if monitoring_component:
                if monitoring_component.should_stop():
                    break

            if job.been_queued:
                continue

            job.backend.statusInfo = state[0]
            job.backend.status = state[1]
            job.backend.actualCE = state[2]
            updated_dirac_status = state[3]
            try:
                job.backend.extraInfo = state[4]
            except Exception as err:
                logger.debug("gexception: %s" % str(err))
                pass
            logger.debug('Job status vector  : ' + job.fqid + ' : ' +
                         repr(state))

            if updated_dirac_status not in jobStateDict:
                jobStateDict[updated_dirac_status] = []
            jobStateDict[updated_dirac_status].append(job)

            if job.backend.status in finalised_statuses:
                if job.status != 'running':
                    if job.status in ['removed', 'killed']:
                        requeue_job_list.append(job)
                    elif (job.master
                          and job.master.status in ['removed', 'killed']):
                        continue  # user changed it under us
                    else:
                        if 'running' not in jobs_to_update:
                            jobs_to_update['running'] = []
                        jobs_to_update['running'].append(job)
                        if job.master:
                            if job.master not in master_jobs_to_update:
                                master_jobs_to_update.append(job.master)
                        requeue_job_list.append(job)

            else:
                if job.status in ['removed', 'killed']:
                    continue
                if (job.master and job.master.status in ['removed', 'killed']):
                    continue  # user changed it under us
                if job.status != updated_dirac_status:
                    if updated_dirac_status not in jobs_to_update:
                        jobs_to_update[updated_dirac_status] = []
                    jobs_to_update[updated_dirac_status].append(job)
                    if job.master:
                        if job.master not in master_jobs_to_update:
                            master_jobs_to_update.append(job.master)

        DiracBase._bulk_updateStateTime(jobStateDict, bulk_state_result)

        for status in jobs_to_update:
            for job in jobs_to_update[status]:
                job.updateStatus(status, update_master=False)

        for j in master_jobs_to_update:
            j.updateMasterJobStatus()

        DiracBase.requeue_dirac_finished_jobs(requeue_job_list,
                                              finalised_statuses)

    @staticmethod
    def updateMonitoringInformation(jobs_):
        """Check the status of jobs and retrieve output sandboxesi
        Args:
            jobs_ (list): List of the appropriate jobs to monitored
        """
        # Only those jobs in 'submitted','running' are passed in here for checking
        # if however they have already completed in Dirac they may have been put on queue
        # for processing from last time. These should be put back on queue without
        # querying dirac again. Their signature is status = running and job.backend.status
        # already set to Done or Failed etc.

        jobs = [stripProxy(j) for j in jobs_]

        # remove from consideration any jobs already in the queue. Checking this non persisted attribute
        # is better than querying the queue as cant tell if a job has just been taken off queue and is being processed
        # also by not being persistent, this attribute automatically allows queued jobs from last session to be considered
        # for requeing
        interesting_jobs = [j for j in jobs if not j.been_queued]
        # status that correspond to a ganga 'completed' or 'failed' (see DiracCommands.status(id))
        # if backend status is these then the job should be on the queue
        finalised_statuses = configDirac['finalised_statuses']

        monitor_jobs = [
            j for j in interesting_jobs
            if j.backend.status not in finalised_statuses
        ]
        requeue_jobs = [
            j for j in interesting_jobs
            if j.backend.status in finalised_statuses
        ]

        #logger.debug('Interesting jobs: ' + repr([j.fqid for j in interesting_jobs]))
        #logger.debug('Monitor jobs    : ' + repr([j.fqid for j in monitor_jobs]))
        #logger.debug('Requeue jobs    : ' + repr([j.fqid for j in requeue_jobs]))

        try:
            # Split all the monitorable jobs into groups based on the
            # credential used to communicate with DIRAC
            for requeue_jobs_group in group_jobs_by_backend_credential(
                    requeue_jobs):
                DiracBase.requeue_dirac_finished_jobs(requeue_jobs_group,
                                                      finalised_statuses)
            for monitor_jobs_group in group_jobs_by_backend_credential(
                    monitor_jobs):
                DiracBase.monitor_dirac_running_jobs(monitor_jobs_group,
                                                     finalised_statuses)
        except GangaDiracError as err:
            logger.warning(
                "Error in Monitoring Loop, jobs on the DIRAC backend may not update"
            )
            logger.debug(err)
Пример #28
0
class LHCbTransform(ITransform):
    _schema = Schema(
        Version(1, 0),
        dict(
            ITransform._schema.datadict.items() + {
                'files_per_unit':
                SimpleItem(
                    defvalue=-1,
                    doc=
                    'Maximum number of files to assign to each unit from a given input dataset. If < 1, use all files.',
                    typelist=["int"]),
                'splitter':
                ComponentItem('splitters',
                              defvalue=None,
                              optional=1,
                              load_default=False,
                              doc='Splitter to be used for units'),
                'queries':
                ComponentItem('query',
                              defvalue=[],
                              sequence=1,
                              protected=1,
                              optional=1,
                              load_default=False,
                              doc='Queries managed by this Transform'),
                'delete_chain_input':
                SimpleItem(
                    defvalue=False,
                    doc=
                    'Delete the Dirac input files/data after completion of each unit',
                    typelist=["bool"]),
                'mc_num_units':
                SimpleItem(defvalue=0,
                           doc="No. of units to create for MC generation"),
            }.items()))

    _category = 'transforms'
    _name = 'LHCbTransform'
    _exportmethods = ITransform._exportmethods + [
        'updateQuery', 'addQuery', 'removeUnusedData', 'cleanTransform'
    ]

    def __init__(self):
        super(LHCbTransform, self).__init__()

        # generally no delay neededd
        self.chain_delay = 0

    def addQuery(self, bk):
        """Add a BK query to this transform"""
        # Check if the BKQuery input is correct and append/update
        if not isType(bk, BKQuery):
            raise GangaAttributeError(
                None,
                'LHCbTransform expects a BKQuery object passed to the addQuery method'
            )

        # check we don't already have inputdata
        if len(self.queries) == 0 and len(self.inputdata) > 0:
            logger.error(
                "Cannot add both input data and BK queries. Input Data already present."
            )
            return

        # add the query and update the input data
        self.queries.append(bk)
        self.updateQuery()

    def addInputQuery(self, inDS):
        """Add the given input dataset to the list but only if BK queries aren't given"""
        if len(self.queries) > 0:
            logger.error(
                "Cannot add both input data and BK queries. Query already given"
            )
            return

        super(LHCbTransform, self).addInputQuery(inDS)

    def cleanTransform(self):
        """Remove unused data and then unused jobs"""
        self.removeUnusedData()
        self.removeUnusedJobs()

    def removeUnusedData(self):
        """Remove any output data from orphaned jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing data from job '%d'..." % jid)
                    job = getJobByID(jid)

                    jlist = []
                    if len(job.subjobs) > 0:
                        jlist = job.subjobs
                    else:
                        jlist = [job]

                    for sj in jlist:
                        for f in sj.outputfiles:
                            if isType(f, DiracFile) == "DiracFile" and f.lfn:
                                f.remove()
                except:
                    logger.error("Problem deleting data for job '%d'" % jid)
                    pass

    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(LHCbTransform, self).createUnits()

        if len(self.inputdata) > 0:

            # check for conflicting input
            if self.mc_num_units > 0:
                logger.warning("Inputdata specified - MC Event info ignored")

            # loop over input data and see if we need to create any more units
            import copy
            for id, inds in enumerate(self.inputdata):

                if not isType(inds, LHCbDataset):
                    continue

                # go over the units and see what files have been assigned
                assigned_data = LHCbDataset()
                for unit in self.units:

                    if unit.input_datset_index != id:
                        continue

                    assigned_data.files += unit.inputdata.files

                # any new files
                new_data = LHCbDataset(
                    files=self.inputdata[id].difference(assigned_data).files)

                if len(new_data.files) == 0:
                    continue

                # Create units for these files
                step = self.files_per_unit
                if step <= 0:
                    step = len(new_data.files)

                for num in range(0, len(new_data.files), step):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.input_datset_index = id
                    self.addUnitToTRF(unit)
                    unit.inputdata = copy.deepcopy(self.inputdata[id])
                    unit.inputdata.files = []
                    unit.inputdata.files += new_data.files[num:num + step]

        elif self.mc_num_units > 0:
            if len(self.units) == 0:
                # check for appropriate splitter
                from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter
                if not self.splitter or isType(self.splitter, GaussSplitter):
                    logger.warning(
                        "No GaussSplitter specified - first event info ignored"
                    )

                # create units for MC generation
                for i in range(0, self.mc_num_units):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
        else:
            import traceback
            traceback.print_stack()
            logger.error(
                "Please specify either inputdata or MC info for unit generation"
            )

    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create an output unit given this output data"""

        # we need a parent job that has completed to get the output files
        incl_pat_list = []
        excl_pat_list = []
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return None

            for inds in self.inputdata:
                from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
                if isType(
                        inds, TaskChainInput
                ) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        # go over the output files and copy the appropriates over as input
        # files
        flist = []
        import re
        for parent in parent_units:
            job = getJobByID(parent.active_job_ids[0])
            if job.subjobs:
                job_list = job.subjobs
            else:
                job_list = [job]

            for sj in job_list:
                for f in sj.outputfiles:

                    # match any dirac files that are allowed in the file mask
                    if isType(f, DiracFile):
                        if len(incl_pat_list) > 0:
                            for pat in incl_pat_list:
                                if re.search(pat, f.lfn):
                                    flist.append("LFN:" + f.lfn)
                        else:
                            flist.append("LFN:" + f.lfn)

                        if len(excl_pat_list) > 0:
                            for pat in excl_pat_list:
                                if re.search(
                                        pat,
                                        f.lfn) and "LFN:" + f.lfn in flist:
                                    flist.remove("LFN:" + f.lfn)

        # just do one unit that uses all data
        unit = LHCbUnit()
        unit.name = "Unit %d" % len(self.units)
        unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist])

        return unit

    def updateQuery(self, resubmit=False):
        """Update the dataset information of the transforms. This will
        include any new data in the processing or re-run jobs that have data which
        has been removed."""
        if len(self.queries) == 0:
            raise GangaException(
                None,
                'Cannot call updateQuery() on an LHCbTransform without any queries'
            )

        if self._getParent() != None:
            logger.info(
                'Retrieving latest bookkeeping information for transform %i:%i, please wait...'
                % (self._getParent().id, self.getID()))
        else:
            logger.info(
                'Retrieving latest bookkeeping information for transform, please wait...'
            )

        # check we have an input DS per BK Query
        while len(self.queries) > len(self.inputdata):
            self.inputdata.append(LHCbDataset())

        # loop over the queries and add fill file lists
        for id, query in enumerate(self.queries):

            # Get the latest dataset
            latest_dataset = query.getDataset()

            # Compare to previous inputdata, get new and removed
            logger.info(
                'Checking for new and removed data for query %d, please wait...'
                % self.queries.index(query))
            dead_data = LHCbDataset()
            new_data = LHCbDataset()

            # loop over the old data and compare
            new_data.files += latest_dataset.difference(
                self.inputdata[id]).files
            dead_data.files += self.inputdata[id].difference(
                latest_dataset).files

            # for dead data, find then kill/remove any associated jobs
            # loop over units and check any associated with this DS
            # TODO: Follow through chained tasks
            for unit in self.units:
                # associted unit
                if unit.input_datset_index != id:
                    continue

                # find the job
                if len(unit.active_job_ids) == 0:
                    continue

                # check the data
                for f in dead_data.files:
                    if f in unit.inputdata.files:

                        # kill the job
                        job = getJobByID(unit.active_job_ids[0])
                        if job.status in ['submitted', 'running']:
                            job.kill()

                        # forget the job
                        unit.prev_job_ids.append(unit.active_job_ids[0])
                        unit.active_job_ids = []
                        break

            # in any case, now just set the DS files to the new set
            self.inputdata[id].files = []
            self.inputdata[id].files = latest_dataset.files
Пример #29
0
class GoogleFile(IGangaFile):
    """
    The GoogleFile outputfile type allows for files to be directly uploaded, downloaded, removed and restored from the GoogleDrive service.
    It can be used as part of a job to output data directly to GoogleDrive, or standalone through the Ganga interface.

    example job: j=Job(application=Executable(exe=File('/home/hep/hs4011/Tests/testjob.sh'), args=[]),outputfiles=[GoogleFile('TestJob.txt')])

                 j.submit()

                 ### This job will automatically upload the outputfile 'TestJob.txt' to GoogleDrive.

    example of standalone submission:

                 g=GoogleFile('TestFile.txt')

                 g.localDir = '~/TestDirectory'        ### The file's location must be specified for standalone submission

                 g.put()                               ### The put() method uploads the file to GoogleDrive directly

    The GoogleFile outputfile is also compatible with the Dirac backend, making outputfiles from Dirac-run jobs upload directly to GoogleDrive.
    """

    _schema = Schema(
        Version(1, 1), {
            'namePattern':
            SimpleItem(defvalue="", doc='pattern of the file name'),
            'localDir':
            SimpleItem(
                defvalue="",
                copyable=1,
                doc=
                'local dir where the file is stored, used from get and put methods'
            ),
            'subfiles':
            ComponentItem(category='gangafiles',
                          defvalue=[],
                          hidden=1,
                          sequence=1,
                          copyable=0,
                          doc="collected files from the wildcard namePattern"),
            'failureReason':
            SimpleItem(
                defvalue="", copyable=1, doc='reason for the upload failure'),
            'compressed':
            SimpleItem(
                defvalue=False,
                typelist=[bool],
                protected=0,
                doc=
                'wheather the output file should be compressed before sending somewhere'
            ),
            'downloadURL':
            SimpleItem(
                defvalue="",
                copyable=1,
                protected=1,
                doc=
                'download URL assigned to the file upon upload to GoogleDrive'
            ),
            'id':
            SimpleItem(
                defvalue="",
                copyable=1,
                hidden=1,
                protected=1,
                doc='GoogleFile ID assigned to file  on upload to GoogleDrive'
            ),
            'title':
            SimpleItem(defvalue="",
                       copyable=1,
                       hidden=1,
                       protected=1,
                       doc='GoogleFile title of the uploaded file'),
            'GangaFolderId':
            SimpleItem(defvalue="",
                       copyable=1,
                       hidden=1,
                       protected=1,
                       doc='GoogleDrive Ganga folder  ID')
        })
    _category = 'gangafiles'
    _name = 'GoogleFile'
    _exportmethods = ["get", "put", "remove", "restore", "deleteCredentials"]

    def __init__(self, namePattern=''):
        super(GoogleFile, self).__init__()
        self.namePattern = namePattern
        self.__initialized = False

        self.cred_path = os.path.join(
            getConfig('Configuration')['gangadir'], 'googlecreddata.pkl')

    def __initializeCred(self):
        while os.path.isfile(self.cred_path) == False:
            from oauth2client.client import OAuth2WebServerFlow

            # Copy your credentials from the APIs Console
            #            CLIENT_ID = "54459939297.apps.googleusercontent.com"
            #            CLIENT_SECRET = "mAToHx5RpXtwkeYR6nOIe_Yw"
            CLIENT_ID = '776655306197-dirtoquqsm7cpqgepvamofg5t2b5f637.apps.googleusercontent.com'
            CLIENT_SECRET = 'GpdEP-OBZZQLB3k-xxOpzFQG'
            # Check https://developers.google.com/drive/scopes for all
            # available scopes
            OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive.file'

            # Redirect URI for installed apps
            REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

            # Run through the OAuth flow and retrieve credentials
            credentials = ''
            flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE,
                                       REDIRECT_URI)
            authorize_url = flow.step1_get_authorize_url()
            try:
                import webbrowser
                webbrowser.get('macosx').open(authorize_url, 0, True)
            except:
                try:
                    import webbrowser
                    webbrowser.get('windows-default').open(
                        authorize_url, 0, True)
                except:
                    try:
                        import webbrowser
                        webbrowser.get('firefox').open(authorize_url, 0, True)
                    except Exception, err:
                        logger.error("Error: %s" % str(err))
                        pass
            logger.info('Go to the following link in your browser: ' +
                        authorize_url)
            code = raw_input('Enter verification code: ').strip()
            try:
                credentials = flow.step2_exchange(code)
            except:
                deny = raw_input(
                    'An incorrect code was entered. Have you denied Ganga access to your GoogleDrive (y/[n])?'
                )
                if deny.lower() in ['', 'n']:
                    pass
                elif deny[0:1].upper() == 'Y':
                    return None

            # Pickle credential data
            if credentials is not '':
                with open(self.cred_path, "wb") as output:
                    pickle.dump(credentials, output)

                os.chmod(self.cred_path, stat.S_IWUSR | stat.S_IRUSR)
                logger.info(
                    'Your GoogleDrive credentials have been stored in the file %s and are only readable by you. '
                    'The file will give permission to modify files in your GoogleDrive. '
                    'Permission can be revoked by going to "Manage Apps" in your GoogleDrive '
                    'or by deleting the credentials through the deleteCredentials GoogleFile method.'
                    % self.cred_path)

        self.__initialized = True

        self._check_Ganga_folder()
Пример #30
0
class MassStorageFile(IGangaFile):
    """MassStorageFile represents a class marking a file to be written into mass storage (like Castor at CERN)
    """
    _schema = Schema(Version(1, 1), {'namePattern': SimpleItem(defvalue="", doc='pattern of the file name'),
                                     'localDir': SimpleItem(defvalue="", copyable=1, doc='local dir where the file is stored, used from get and put methods'),
                                     'joboutputdir': SimpleItem(defvalue="", doc='outputdir of the job with which the outputsandbox file object is associated'),
                                     'locations': SimpleItem(defvalue=[], copyable=1, typelist=[str], sequence=1, doc="list of locations where the outputfiles are uploaded"),
                                     'outputfilenameformat': SimpleItem(defvalue=None, typelist=[str, None], protected=0,\
                                                    doc="keyword path to where the output should be uploaded, i.e. /some/path/here/{jid}/{sjid}/{fname},\
                                                        if this field is not set, the output will go in {jid}/{sjid}/{fname} or in {jid}/{fname}\
                                                        depending on whether the job is split or not"                                                                                                     ),
                                     'inputremotedirectory': SimpleItem(defvalue=None, typelist=[str, None], protected=0, doc="Directory on mass storage where the file is stored"),
                                     'subfiles': ComponentItem(category='gangafiles', defvalue=[], hidden=1, sequence=1, copyable=0,\
                                                    doc="collected files from the wildcard namePattern"),
                                     'failureReason': SimpleItem(defvalue="", protected=1, copyable=0, doc='reason for the upload failure'),
                                     'compressed': SimpleItem(defvalue=False, typelist=[bool], protected=0, doc='wheather the output file should be compressed before sending somewhere')
                                     })

    _category = 'gangafiles'
    _name = "MassStorageFile"
    _exportmethods = [
        "location", "get", "put", "setLocation", "remove", "accessURL"
    ]

    _additional_slots = ['shell']

    def __init__(self, namePattern='', localDir='', **kwds):
        """
        MassStorageFile construction
        Args:
            namePattern (str): is the pattern of the output file that has to be written into mass storage
            localDir (str): This is the optional local directory of a file to be uploaded to mass storage
        """
        self._checkConfig()
        super(MassStorageFile, self).__init__()
        self._setNamePath(_namePattern=namePattern, _localDir=localDir)
        self.locations = []
        self.shell = Shell.Shell()

    def __setattr__(self, attr, value):
        """
        This is an overloaded setter method to make sure that we're auto-expanding the filenames of files which exist.
        In the case we're assigning any other attributes the value is simply passed through
        Args:
            attr (str): This is the name of the attribute which we're assigning
            value (unknown): This is the value being assigned.
        """
        actual_value = value
        if attr == "namePattern":
            this_localDir, actual_value = os.path.split(value)
            if this_localDir:
                self.localDir = this_localDir
        if attr == "localDir":
            if value and (value.find(':') == -1):
                actual_value = os.path.abspath(expandfilename(value))

        super(MassStorageFile, self).__setattr__(attr, actual_value)

    def _setNamePath(self, _namePattern='', _localDir=''):
        if _namePattern != '' and _localDir == '':
            self.namePattern = os.path.basename(_namePattern)
            if not os.path.dirname(_namePattern):
                if os.path.isfile(
                        os.path.join(os.getcwd(),
                                     os.path.basename(_namePattern))):
                    self.localDir = os.getcwd()
            else:
                self.localDir = os.path.dirname(_namePattern)
        elif _namePattern != '' and _localDir != '':
            self.namePattern = _namePattern
            self.localDir = _localDir

    def _checkConfig(self):
        """
        Check that the MassStorageFile configuration is correct
        """
        if not getConfig('Output')[_getName(self)]['uploadOptions']['path']:
            raise GangaException(
                'Unable to create MassStorageFile. Check your configuration!')

    def __repr__(self):
        """Get the representation of the file."""

        return "%s(namePattern='%s')" % (_getName(self), self.namePattern)

    def mass_line_processor(self, line):
        """ This function splits the input line from the post-processsing system to define where this file is:
        Args:
            line(str): This is expected to be in the format of the postprocessor file from jobs transfering files on the WN
        """
        lineParts = line.split()
        pattern = lineParts[1]
        outputPath = lineParts[2]
        split_name = os.path.splitext(outputPath)
        if split_name[1] == '.gz':
            name = split_name[0]
        else:
            name = outputPath

        if regex.search(self.namePattern) is not None:
            if outputPath == 'ERROR':
                logger.error("Failed to upload file to mass storage")
                logger.error(line[line.find('ERROR') + 5:])
                d = copy.deepcopy(self)
                d.namePattern = pattern
                d.compressed = self.compressed
                d.failureReason = line[line.find('ERROR') + 5:]
                self.subfiles.append(d)
            else:
                if pattern == self.namePattern:
                    d = copy.deepcopy(self)
                    d.namePattern = name
                    self.subfiles.append(d)
                    d.mass_line_processor(line)
        elif name == self.namePattern:
            if outputPath == 'ERROR':
                logger.error("Failed to upload file to mass storage")
                logger.error(line[line.find('ERROR') + 5:])
                self.failureReason = line[line.find('ERROR') + 5:]
                return
        self.locations = [outputPath.strip('\n')]

    def setLocation(self):
        """
        Sets the location of output files that were uploaded to mass storage from the WN
        """
        job = self.getJobObject()

        postprocessLocationsPath = os.path.join(
            job.outputdir,
            getConfig('Output')['PostProcessLocationsFileName'])
        if not os.path.exists(postprocessLocationsPath):
            return

        for line in open(postprocessLocationsPath, 'r'):

            if line.strip() == '':
                continue

            if line.startswith('massstorage'):
                self.mass_line_processor(line.strip())

    def location(self):
        """
        Return list with the locations of the post processed files (if they were configured to upload the output somewhere)
        """
        tmpLocations = []
        if self.subfiles:
            for i in self.subfiles:
                tmpLocations.append(i.locations)
        else:
            tmpLocations = self.locations
        return tmpLocations

    def internalCopyTo(self, targetPath):
        """
        Copy a the file to the local storage using the get mechanism
        Args:
            targetPath (str): Target path where the file is to copied to
        """
        to_location = targetPath

        cp_cmd = getConfig('Output')[_getName(self)]['uploadOptions']['cp_cmd']

        for location in self.locations:
            targetLocation = os.path.join(to_location,
                                          os.path.basename(location))
            self.execSyscmdSubprocess(
                '%s %s %s' % (cp_cmd, quote(location), quote(targetLocation)))

    def getWNScriptDownloadCommand(self, indent):
        ## FIXME fix me for the situation of multiple files?

        script = """\n

###INDENT###os.system(\'###CP_COMMAND###\')

"""
        cp_cmd = '%s %s .' % (getConfig('Output')[_getName(
            self)]['uploadOptions']['cp_cmd'], quote(self.locations[0]))

        replace_dict = {'###INDENT###': indent, '###CP_COMMAND###': cp_cmd}

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def _mkdir(self, massStoragePath, exitIfNotExist=False):
        """
        Creates a folder on the mass Storage corresponding to the given path
        Args:
            massStoragePath (str): This is the path we want to make if it doesn't exist.
        """

        massStorageConfig = getConfig('Output')[_getName(
            self)]['uploadOptions']
        mkdir_cmd = massStorageConfig['mkdir_cmd']
        ls_cmd = massStorageConfig['ls_cmd']

        # create the last directory (if not exist) from the config path
        pathToDirName = os.path.dirname(massStoragePath)
        dirName = os.path.basename(massStoragePath)

        directoryExists = False

        (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
            '%s %s' % (ls_cmd, quote(pathToDirName)))
        if exitcode != 0 and exitIfNotExist:
            self.handleUploadFailure(mystderr,
                                     '1) %s %s' % (ls_cmd, pathToDirName))
            raise GangaException(mystderr)

        for directory in mystdout.split('\n'):
            if directory.strip() == dirName:
                directoryExists = True
                break

        if not directoryExists:
            (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess(
                '%s -p %s' % (mkdir_cmd, quote(massStoragePath)))
            if exitcode != 0:
                self.handleUploadFailure(
                    mystderr, '2) %s %s' % (mkdir_cmd, massStoragePath))
                raise GangaException(mystderr)

    def put(self):
        """
        Creates and executes commands for file upload to mass storage (Castor), this method will
        be called on the client
        """

        sourceDir = ''

        # if used as a stand alone object
        if self._getParent() is None:
            if self.localDir == '':
                _CWD = os.getcwd()
                if os.path.isfile(os.path.join(_CWD, self.namePattern)):
                    sourceDir = _CWD
                else:
                    logger.warning(
                        'localDir attribute is empty, don\'t know from which dir to take the file'
                    )
                    return
            else:
                sourceDir = self.localDir

                (result, message) = self.validate()

                if result == False:
                    logger.warning(message)
                    return

        else:
            job = self.getJobObject()
            sourceDir = job.outputdir

            # if there are subjobs, the put method will be called on every subjob
            # and will upload the resulted output file
            if len(job.subjobs) > 0:
                return

        massStorageConfig = getConfig('Output')[_getName(
            self)]['uploadOptions']

        cp_cmd = massStorageConfig['cp_cmd']
        ls_cmd = massStorageConfig['ls_cmd']
        massStoragePath = massStorageConfig['path']

        try:
            self._mkdir(massStoragePath, exitIfNotExist=True)
        except GangaException:
            return

        # the folder part of self.outputfilenameformat
        folderStructure = ''
        # the file name part of self.outputfilenameformat
        filenameStructure = ''

        if not self.outputfilenameformat:
            filenameStructure = '{fname}'

            parent = self._getParent()
            if parent is not None:
                folderStructure = '{jid}'
                if parent._getParent() is not None:
                    folderStructure = os.path.join(folderStructure, '{sjid}')

        else:
            folderStructure = os.path.dirname(self.outputfilenameformat)
            filenameStructure = os.path.basename(self.outputfilenameformat)

        folderStructure = self.expandString(folderStructure)

        # create the folder structure
        if folderStructure:
            massStoragePath = os.path.join(massStoragePath, folderStructure)
            try:
                self._mkdir(massStoragePath)
            except GangaException:
                return

        # here filenameStructure has replaced jid and sjid if any, and only not
        # replaced keyword is fname
        fileName = self.namePattern
        if self.compressed:
            fileName = '%s.gz' % self.namePattern

        if regex.search(fileName) is not None:
            for currentFile in glob.glob(os.path.join(sourceDir, fileName)):
                finalFilename = self.expandString(
                    filenameStructure, os.path.basename(currentFile))

                (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess('%s %s %s' %\
                                                (cp_cmd, quote(currentFile), quote(os.path.join(massStoragePath, finalFilename))))

                d = copy.deepcopy(self)
                d.namePattern = os.path.basename(currentFile)
                d.localDir = os.path.dirname(currentFile)
                d.compressed = self.compressed

                if exitcode != 0:
                    self.handleUploadFailure(
                        mystderr, '4) %s %s %s' %
                        (cp_cmd, currentFile,
                         os.path.join(massStoragePath, finalFilename)))
                else:
                    logger.info(
                        '%s successfully uploaded to mass storage as %s' %
                        (currentFile,
                         os.path.join(massStoragePath, finalFilename)))
                    d.locations = os.path.join(massStoragePath,
                                               os.path.basename(finalFilename))

                self.subfiles.append(d)
        else:
            currentFile = os.path.join(sourceDir, fileName)
            finalFilename = self.expandString(filenameStructure, fileName)
            (exitcode, mystdout, mystderr) = self.execSyscmdSubprocess('%s %s %s' %\
                                                        (cp_cmd, quote(currentFile), quote(os.path.join(massStoragePath, finalFilename))))
            if exitcode != 0:
                self.handleUploadFailure(
                    mystderr, '5) %s %s %s' %
                    (cp_cmd, currentFile,
                     os.path.join(massStoragePath, finalFilename)))
            else:
                logger.info('%s successfully uploaded to mass storage as %s' %
                            (currentFile,
                             os.path.join(massStoragePath, finalFilename)))
                location = os.path.join(massStoragePath,
                                        os.path.basename(finalFilename))
                if location not in self.locations:
                    self.locations.append(location)

    def validate(self):

        # if the user has set outputfilenameformat, validate for presence of
        # jid, sjid and fname keywords depending on job type - split or
        # non-split
        if self.outputfilenameformat != None:

            searchFor = ['{fname}']
            isJob = False
            isSplitJob = False

            if self._getParent() != None:

                isJob = True

                if stripProxy(self.getJobObject()).master is not None:

                    isSplitJob = True
                    searchFor.append('{sjid}')

            missingKeywords = []

            for item in searchFor:
                if self.outputfilenameformat.find(item) == -1:
                    missingKeywords.append(item)

            if len(missingKeywords):
                return (
                    False,
                    'Error in %s.outputfilenameformat field : missing keywords %s '
                    % (_getName(self), ','.join(missingKeywords)))

            if isSplitJob == False and self.outputfilenameformat.find(
                    '{sjid}') > -1:
                return (
                    False,
                    'Error in %s.outputfilenameformat field :  job is non-split, but {\'sjid\'} keyword found'
                    % _getName(self))

            if isJob == False and self.outputfilenameformat.find(
                    '{sjid}') > -1:
                return (
                    False,
                    'Error in %s.outputfilenameformat field :  no parent job, but {\'sjid\'} keyword found'
                    % _getName(self))

            if isJob == False and self.outputfilenameformat.find('{jid}') > -1:
                return (
                    False,
                    'Error in %s.outputfilenameformat field :  no parent job, but {\'jid\'} keyword found'
                    % _getName(self))

            invalidUnixChars = ['"', ' ']
            test = self.outputfilenameformat.replace('{jid}', 'a').replace(
                '{sjid}', 'b').replace('{fname}', 'c')

            for invalidUnixChar in invalidUnixChars:
                if test.find(invalidUnixChar) > -1:
                    return (
                        False,
                        'Error in %s.outputfilenameformat field :  invalid char %s found'
                        % (_getName(self), invalidUnixChar))

        return (True, '')

    def handleUploadFailure(self, error, cmd_run_str=''):
        """
        Function to display what went wrong with an associated Job id if there is one and to assign failureReason for future.
        Args:
            error (str): This is the error which was given from the shell command
            cmd_run_str (str): This is a string related to but not always exactly the command run.
        """

        self.failureReason = error
        if self._getParent() != None:
            logger.error(
                "Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s"
                % (str(self._getParent().fqid), self.failureReason))
        else:
            logger.error("The file can't be uploaded because of %s" %
                         (self.failureReason))
        if cmd_run_str:
            logger.error("Attempted to run: '%s'" % (cmd_run_str))

    def getWNInjectedScript(self, outputFiles, indent, patternsToZip,
                            postProcessLocationsFP):
        """
        Returns script that have to be injected in the jobscript for postprocessing on the WN
        """
        massStorageCommands = []

        massStorageConfig = getConfig('Output')[_getName(
            self)]['uploadOptions']

        for outputFile in outputFiles:

            outputfilenameformat = 'None'
            if outputFile.outputfilenameformat != None and outputFile.outputfilenameformat != '':
                outputfilenameformat = outputFile.outputfilenameformat

            massStorageCommands.append([
                'massstorage', outputFile.namePattern, outputfilenameformat,
                massStorageConfig['mkdir_cmd'], massStorageConfig['cp_cmd'],
                massStorageConfig['ls_cmd'], massStorageConfig['path']
            ])

        script_location = os.path.join(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))),
            'scripts/MassStorageFileWNScript.py.template')

        from Ganga.GPIDev.Lib.File import FileUtils
        script = FileUtils.loadScript(script_location, '###INDENT###')

        jobfqid = self.getJobObject().fqid

        jobid = jobfqid
        subjobid = ''

        if (jobfqid.find('.') > -1):
            jobid = jobfqid.split('.')[0]
            subjobid = jobfqid.split('.')[1]

        replace_dict = {
            '###MASSSTORAGECOMMANDS###': repr(massStorageCommands),
            '###PATTERNSTOZIP###': str(patternsToZip),
            '###INDENT###': indent,
            '###POSTPROCESSLOCATIONSFP###': postProcessLocationsFP,
            '###FULLJOBDIR###': str(jobfqid.replace('.', os.path.sep)),
            '###JOBDIR###': str(jobid),
            '###SUBJOBDIR###': str(subjobid)
        }

        for k, v in replace_dict.iteritems():
            script = script.replace(str(k), str(v))

        return script

    def processWildcardMatches(self):
        if self.subfiles:
            return self.subfiles

        if regex.search(self.namePattern):
            ls_cmd = getConfig('Output')[_getName(
                self)]['uploadOptions']['ls_cmd']
            exitcode, output, m = self.shell.cmd1(ls_cmd + ' ' +
                                                  self.inputremotedirectory,
                                                  capture_stderr=True)

            for filename in output.split('\n'):
                if fnmatch(filename, self.namePattern):
                    subfile = copy.deepcopy(self)
                    subfile.namepattern = filename
                    subfile.inputremotedirectory = self.inputremotedirectory

                    self.subfiles.append(subfile)

    def remove(self, force=False, removeLocal=False):
        """
        Removes file from remote storage ONLY by default
        """
        massStorageConfig = getConfig('Output')[_getName(
            self)]['uploadOptions']
        rm_cmd = massStorageConfig['rm_cmd']

        if force == True:
            _auto_delete = True
        else:
            _auto_delete = False

        for i in self.locations:

            if not _auto_delete:

                keyin = None

                while keyin is None:
                    keyin = raw_input(
                        "Do you want to delete file %s at Location: %s ? [y/n] "
                        % (str(self.namePattern), str(i)))
                    if keyin.lower() == 'y':
                        _delete_this = True
                    elif keyin.lower() == 'n':
                        _delete_this = False
                    else:
                        logger.warning("y/n please!")
                        keyin = None
            else:
                _delete_this = True

            if _delete_this:
                logger.info("Deleting File at Location: %s")
                self.execSyscmdSubprocess('%s %s' % (rm_cmd, quote(i)))
                self.locations.pop(i)

        if removeLocal:

            sourceDir = ''
            if self.localDir == '':
                _CWD = os.getcwd()
                if os.path.isfile(os.path.join(_CWD, self.namePattern)):
                    sourceDir = _CWD
            else:
                sourceDir = self.localDir

            _localFile = os.path.join(sourceDir, self.namePattern)

            if os.path.isfile(_localFile):

                if force:
                    _actual_delete = True
                else:

                    keyin = None
                    while keyin is None:
                        keyin = raw_input(
                            "Do you want to remove the local File: %s ? ([y]/n) "
                            % str(_localFile))
                        if keyin.lower() in ['y', '']:
                            _actual_delete = True
                        elif keyin.lower() == 'n':
                            _actual_delete = False
                        else:
                            logger.warning("y/n please!")
                            keyin = None

                if _actual_delete:
                    remove_filename = _localFile + "_" + str(
                        time.time()) + '__to_be_deleted_'

                    try:
                        os.rename(_localFile, remove_filename)
                    except OSError as err:
                        logger.warning(
                            "Error in first stage of removing file: %s" %
                            remove_filename)
                        remove_filename = _localFile

                    try:
                        os.remove(remove_filename)
                    except OSError as err:
                        if err.errno != errno.ENOENT:
                            logger.error("Error in removing file: %s" %
                                         str(remove_filename))
                            raise
                        pass
        return

    def accessURL(self):

        # Need to come up with a prescription based upon the server address and
        # file on EOS or elsewhere to return a full URL which we can pass to
        # ROOT...

        protoPath = getConfig('Output')[_getName(self)]['defaultProtocol']

        myLocations = self.location()

        accessURLs = []

        for _file in myLocations:
            accessURLs.append(protoPath + os.path.join(os.sep, _file))

        return accessURLs