def addUnit(self, outname, dsname, template=None): """Create a new unit based on this ds and output""" unit = AtlasUnit() if not template: unit.inputdata = DQ2Dataset() else: unit.inputdata = stripProxy(template) unit.inputdata.dataset = dsname unit.name = outname self.addUnitToTRF(unit)
def addUnit(self, outname, dsname, template = None): """Create a new unit based on this ds and output""" unit = AtlasUnit() if not template: unit.inputdata = DQ2Dataset() else: unit.inputdata = stripProxy( template ) unit.inputdata.dataset = dsname unit.name = outname self.addUnitToTRF( unit )
def createChainUnit(self, parent_units, use_copy_output=True): """Create an output unit given this output data""" # we need valid parent jobs for parent in parent_units: # need datasetname filled for Panda jobs if len(parent.active_job_ids) == 0 or \ (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \ GPI.jobs(parent.active_job_ids[0]).outputdata and \ GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Panda" and \ GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname == ""): return None # need datasetList filled for Jedi jobs if len(parent.active_job_ids) == 0 or \ (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \ GPI.jobs(parent.active_job_ids[0]).outputdata and \ GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi" and \ len(GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList) == 0): return None # for local jobs, make sure units are complete if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset" and \ parent.status != "completed": return None # Are we doing Local -> Local? i.e. are we going from ATLASOutputDataset? # Problem: Doesn't take into account merger locations... if GPI.jobs(parent_units[0].active_job_ids[0] ).outputdata._impl._name == "ATLASOutputDataset": unit = AtlasUnit() unit.inputdata = ATLASLocalDataset() for parent in parent_units: for l in GPI.jobs(parent.active_job_ids[0]).outputdata.output: unit.inputdata.names += l # should we use the copy_output (ie. local output). Special case for TagPrepare elif GPI.jobs(parent_units[0].active_job_ids[0] ).application._impl._name == "TagPrepare": # make sure all have completed before taking the tag-info if parent_units[0].status != "completed": return None unit = AtlasUnit() unit.inputdata = DQ2Dataset() unit.inputdata.tag_info = GPI.jobs( parent_units[0].active_job_ids[0]).application.tag_info elif not use_copy_output or not parent.copy_output: unit = AtlasUnit() unit.inputdata = DQ2Dataset() ds_list = [] for parent in parent_units: # Don't just use the main datasetname as Jedi introduces separate containers for logs and output files if GPI.jobs(parent.active_job_ids[0] ).backend._impl._name == "Jedi": for ds in GPI.jobs( parent.active_job_ids[0]).outputdata.datasetList: if not ds.endswith(".log/"): unit.inputdata.dataset.append(ds) else: unit.inputdata.dataset.append( GPI.jobs( parent.active_job_ids[0]).outputdata.datasetname) else: unit = AtlasUnit() unit.inputdata = ATLASLocalDataset() for parent in parent_units: # unit needs to have completed and downloaded if parent.status != "completed": return None # we should be OK so copy all output to an ATLASLocalDataset for f in parent.copy_output.files: unit.inputdata.names.append( os.path.join(parent.copy_output.local_location, f)) return unit
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(AtlasTransform, self).createUnits() # if there is no input data, just create a single unit given the application if len(self.inputdata) == 0 and len(self.units) == 0: unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) # loop over input data and see if we need to create any more units for inds in self.inputdata: ok = True if inds._name == "DQ2Dataset": # check if this data is being run over ok = False for unit in self.units: if unit.inputdata.dataset == inds.dataset: ok = True if not ok: # new unit required for this dataset unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = inds elif inds._name == "ATLASLocalDataset": # different behaviour depending on files_per_unit if self.files_per_unit < 0: # check if this data is being run over ok = False for unit in self.units: if set(unit.inputdata.names) == set(inds.names): ok = True if not ok: # new unit required for this dataset unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = inds else: ok = False curr_data = [] for unit in self.units: curr_data.extend(unit.inputdata.names) if set(inds.names) in set(curr_data) or set( inds.names) == set(curr_data): ok = True if not ok: # new unit(s) required for this dataset num = 0 while num < len(inds.names): unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = inds.clone() unit.inputdata.names = inds.names[num:num + self. files_per_unit] num += self.files_per_unit
def createChainUnit( self, parent_units, use_copy_output = True ): """Create an output unit given this output data""" # we need valid parent jobs for parent in parent_units: # need datasetname filled for Panda jobs if len(parent.active_job_ids) == 0 or \ (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \ GPI.jobs(parent.active_job_ids[0]).outputdata and \ GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Panda" and \ GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname == ""): return None # need datasetList filled for Jedi jobs if len(parent.active_job_ids) == 0 or \ (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \ GPI.jobs(parent.active_job_ids[0]).outputdata and \ GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi" and \ len(GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList) == 0): return None # for local jobs, make sure units are complete if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset" and \ parent.status != "completed": return None # Are we doing Local -> Local? i.e. are we going from ATLASOutputDataset? # Problem: Doesn't take into account merger locations... if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset": unit = AtlasUnit() unit.inputdata = ATLASLocalDataset() for parent in parent_units: for l in GPI.jobs(parent.active_job_ids[0]).outputdata.output: unit.inputdata.names += l # should we use the copy_output (ie. local output). Special case for TagPrepare elif GPI.jobs(parent_units[0].active_job_ids[0]).application._impl._name == "TagPrepare": # make sure all have completed before taking the tag-info if parent_units[0].status != "completed": return None unit = AtlasUnit() unit.inputdata = DQ2Dataset() unit.inputdata.tag_info = GPI.jobs(parent_units[0].active_job_ids[0]).application.tag_info elif not use_copy_output or not parent.copy_output: unit = AtlasUnit() unit.inputdata = DQ2Dataset() ds_list = [] for parent in parent_units: # Don't just use the main datasetname as Jedi introduces separate containers for logs and output files if GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi": for ds in GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList: if not ds.endswith(".log/"): unit.inputdata.dataset.append( ds ) else: unit.inputdata.dataset.append( GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname ) else: unit = AtlasUnit() unit.inputdata = ATLASLocalDataset() for parent in parent_units: # unit needs to have completed and downloaded if parent.status != "completed": return None # we should be OK so copy all output to an ATLASLocalDataset for f in parent.copy_output.files: unit.inputdata.names.append( os.path.join( parent.copy_output.local_location, f ) ) return unit
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(AtlasTransform,self).createUnits() # if there is no input data, just create a single unit given the application if len(self.inputdata) == 0 and len(self.units) == 0: unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF( unit ) # loop over input data and see if we need to create any more units for inds in self.inputdata: ok = True if inds._name == "DQ2Dataset": # check if this data is being run over ok = False for unit in self.units: if unit.inputdata.dataset == inds.dataset: ok = True if not ok: # new unit required for this dataset unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF( unit ) unit.inputdata = inds elif inds._name == "ATLASLocalDataset": # different behaviour depending on files_per_unit if self.files_per_unit < 0: # check if this data is being run over ok = False for unit in self.units: if set(unit.inputdata.names) == set(inds.names): ok = True if not ok: # new unit required for this dataset unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF( unit ) unit.inputdata = inds else: ok = False curr_data = [] for unit in self.units: curr_data.extend( unit.inputdata.names ) if set(inds.names) in set( curr_data ) or set(inds.names) == set( curr_data ): ok = True if not ok: # new unit(s) required for this dataset num = 0 while num < len( inds.names ): unit = AtlasUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF( unit ) unit.inputdata = inds.clone() unit.inputdata.names = inds.names[num:num + self.files_per_unit] num += self.files_per_unit