Exemplo n.º 1
0
    def __make_subjob__(self, mj, guids, names, sjob_evnts=-1, sites=None):
        
        """
        private method to create subjob object
        """
        
        logger.debug('generating subjob to run %d events in-total on files: %s' % (sjob_evnts, repr(guids)))
        j = Job()

        j.name            = mj.name
        j.inputdata       = mj.inputdata

        if j.inputdata.type in ['','DQ2']:
            j.inputdata.guids = guids

        j.inputdata.names = names

        j.outputdata    = mj.outputdata
        j.application   = mj.application
        if sjob_evnts != -1:
            j.application.max_events = sjob_evnts
        j.backend       = mj.backend
        
        if j.backend._name in ['LCG'] and j.backend.requirements._name == 'AtlasLCGRequirements':
            if sites:
                j.backend.requirements.sites = sites
        
        j.inputsandbox  = mj.inputsandbox
        j.outputsandbox = mj.outputsandbox

        return j
Exemplo n.º 2
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj - 1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" %
                                                   sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):
                    j.backend.requirements.sites = transform.partitions_sites[
                        sj - 1]
                else:
                    j.backend.site = transform.partitions_sites[sj - 1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
            #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Exemplo n.º 3
0
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj-1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" % sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):                
                    j.backend.requirements.sites = transform.partitions_sites[sj-1]                    
                else:
                    j.backend.site = transform.partitions_sites[sj-1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
             #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Exemplo n.º 4
0
def create_gaudi_subjob(job, inputdata):
    j = Job()
    j.name = job.name
    j.application = copy_app(job.application)
    j.backend = job.backend # no need to deepcopy
    if inputdata:
        j.inputdata = inputdata
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = j.inputdata
    else:
        j.inputdata = None
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = BesDataset()
    j.outputsandbox = job.outputsandbox[:]
    j.outputdata = job.outputdata
    return j
Exemplo n.º 5
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core.exceptions import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids,
                                       j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application,
                          j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend,
                          j.backend.__class__), 'backend dont match'
Exemplo n.º 6
0
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application   = job.application
            j.inputdata     = job.inputdata 
            j.inputdata.table_id_lower = 1 
            j.inputdata.table_id_upper = 1
            j.outputdata    = job.outputdata 
            j.inputsandbox  = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend       = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i+1: my_chunksize+=1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset+1
            ## fill subjob with prime tables 
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Exemplo n.º 7
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application = job.application
            j.inputdata = job.inputdata
            j.inputdata.table_id_lower = 1
            j.inputdata.table_id_upper = 1
            j.outputdata = job.outputdata
            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i + 1: my_chunksize += 1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset + 1
            ## fill subjob with prime tables
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Exemplo n.º 8
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids, j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application, j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend, j.backend.__class__), 'backend dont match'