def initializeFromDatasets(self,dataset_list): """ For each dataset in the dataset_list a unit is created. The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field.""" if not type(dataset_list) is list: logger.error("dataset_list must be a python list: ['ds1','ds2',...]") return # check for primary transforms primary_tfs = [] for tf in self.transforms: if len(tf.required_trfs) == 0: primary_tfs.append( tf ) if len(primary_tfs) == 0: logger.error("No primary transforms specified. Yout need at least one before the Task can be initialised.") return unit_num = 0 for dset in dataset_list: dset = dset.strip() try: if "*" in dset: logger.error("WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!") try: if dset[-1] == "/": tid_datasets = dq2.listDatasetsInContainer(dset) else: tid_datasets = dq2.listDatasetsInContainer(dset+"/") except DQUnknownDatasetException: dslist = dq2.listDatasets(dset).keys() if len(dslist) == 0: logger.error("Dataset %s not found!" % dset) return tid_datasets = [ds for ds in dslist if "_tid" in ds and not "_sub" in ds] if len(tid_datasets) == 0: if len(dslist) > 1: logger.error("Found no tid dataset but multiple datasets match %s*!" % dset) return tid_datasets = [dslist[0]] except Exception as e: logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e)) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) if len(tid_datasets) == 0: logger.error("No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset) return prev_num = unit_num for tf in primary_tfs: unit_num = prev_num for ds in tid_datasets: tf.addUnit("Unit_%d" % unit_num, ds) unit_num += 1
def initializeFromDatasets(self, dataset_list): """ For each dataset in the dataset_list a unit is created. The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field.""" if not type(dataset_list) is list: logger.error( "dataset_list must be a python list: ['ds1','ds2',...]") return # check for primary transforms primary_tfs = [] for tf in self.transforms: if len(tf.required_trfs) == 0: primary_tfs.append(tf) if len(primary_tfs) == 0: logger.error( "No primary transforms specified. Yout need at least one before the Task can be initialised." ) return unit_num = 0 for dset in dataset_list: dset = dset.strip() try: if "*" in dset: logger.error( "WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!" ) try: if dset[-1] == "/": tid_datasets = dq2.listDatasetsInContainer(dset) else: tid_datasets = dq2.listDatasetsInContainer(dset + "/") except DQUnknownDatasetException: dslist = dq2.listDatasets(dset).keys() if len(dslist) == 0: logger.error("Dataset %s not found!" % dset) return tid_datasets = [ ds for ds in dslist if "_tid" in ds and not "_sub" in ds ] if len(tid_datasets) == 0: if len(dslist) > 1: logger.error( "Found no tid dataset but multiple datasets match %s*!" % dset) return tid_datasets = [dslist[0]] except Exception as e: logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e)) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) if len(tid_datasets) == 0: logger.error( "No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset) return prev_num = unit_num for tf in primary_tfs: unit_num = prev_num for ds in tid_datasets: tf.addUnit("Unit_%d" % unit_num, ds) unit_num += 1
def checkCompletedApp(self, app): task = self._getParent() j = app._getParent() for odat in j.outputdata.outputdata: # Look out: if this is changed, there is anothher one like it below! if 0 == len([ f for f in j.outputdata.output if ".".join(odat.split(".")[:-1]) in f ]): logger.error("Job %s has not produced %s file, only: %s" % (j.id, odat, j.outputdata.output)) return False # if this is the first app to complete the partition... if self.getPartitionStatus(self._app_partition[app.id]) != "completed": task_container, subtask_dsname = task.container_name, self.dataset_name infos = {} for oinfo in j.outputdata.output: try: dq2_lock.acquire() info = oinfo.split(",") # get master replica from dataset - info not set to SE; but to ANALY_XYZ from panda master_replica = dq2.getMasterReplicaLocation(info[0]) if master_replica: info[5] = master_replica else: replicas = dq2.listDatasetReplicas(info[0]).values() if len(replicas) == 0: try: info[5] = getPandaClient().PandaSites[ info[5]]["ddm"] except KeyError: pass else: complete, incomplete = replicas[0].values() info[5] = (complete + incomplete)[0] if info[4][:3] == "ad:": info[4] = info[4][3:] finally: dq2_lock.release() datasetname = subtask_dsname + '.' + info[5] info[0] = datasetname infos.setdefault(datasetname, []).append(",".join(info)) for ds in infos.keys(): outputdata = DQ2OutputDataset() try: outputdata.create_dataset(ds) except DQDatasetExistsException: pass try: outputdata.register_datasets_details(None, infos[ds]) except DQFileExistsInDatasetException: pass # Register Container try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(task_container) logger.debug('Registered container for Task %i: %s' % (task.id, task_container)) except Exception as x: logger.error( 'Problem registering container for Task %i, %s : %s %s' % (task.id, task_container, x.__class__, x)) for ds in infos.keys(): try: dq2.registerDatasetsInContainer(task_container, [ds]) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error( 'Problem registering dataset %s in container %s: %s %s' % (subtask_dsname, task_container, x.__class__, x)) finally: dq2_lock.release() return True
def registerDataset(self): """Register in the transform container""" trf = self._getParent() trf_container = trf.getContainerName() fail = False try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(trf_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(trf_container) logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), trf_container)) except Exception as x: logger.error('Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x)) fail = True job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.registerDatasetsInContainer(trf_container, [ ds ] ) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x)) fail = True finally: dq2_lock.release() if fail: return not fail # add dataset to the task container task = trf._getParent() task_container = task.getContainerName() try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(task_container) logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), task_container)) except Exception as x: logger.error('Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' % (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' % (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x)) fail = True ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.registerDatasetsInContainer(task_container, [ ds ] ) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x)) fail = True finally: dq2_lock.release() return not fail
def unregisterDataset(self): """Register in the transform container""" trf = self._getParent() trf_container = trf.getContainerName() fail = False try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(trf_container) except: containerinfo = {} if containerinfo != {}: job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.deleteDatasetsFromContainer(trf_container, [ ds ] ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x)) fail = True finally: dq2_lock.release() if fail: return not fail # add dataset to the task container task = trf._getParent() task_container = task.getContainerName() try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo != {}: job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.deleteDatasetsFromContainer(task_container, [ ds ] ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x)) fail = True finally: dq2_lock.release() return not fail
def checkCompletedApp(self, app): task = self._getParent() j = app._getParent() for odat in j.outputdata.outputdata: # Look out: if this is changed, there is anothher one like it below! if 0==len([f for f in j.outputdata.output if ".".join(odat.split(".")[:-1]) in f]): logger.error("Job %s has not produced %s file, only: %s" % (j.id, odat, j.outputdata.output)) return False # if this is the first app to complete the partition... if self.getPartitionStatus(self._app_partition[app.id]) != "completed": task_container, subtask_dsname = task.container_name, self.dataset_name infos = {} for oinfo in j.outputdata.output: try: dq2_lock.acquire() info = oinfo.split(",") # get master replica from dataset - info not set to SE; but to ANALY_XYZ from panda master_replica = dq2.getMasterReplicaLocation(info[0]) if master_replica: info[5] = master_replica else: replicas = dq2.listDatasetReplicas(info[0]).values() if len(replicas) == 0: try: info[5] = getPandaClient().PandaSites[info[5]]["ddm"] except KeyError: pass else: complete, incomplete = replicas[0].values() info[5] = (complete + incomplete)[0] if info[4][:3] == "ad:": info[4] = info[4][3:] finally: dq2_lock.release() datasetname = subtask_dsname + '.' + info[5] info[0] = datasetname infos.setdefault(datasetname, []).append(",".join(info)) for ds in infos.keys(): outputdata = DQ2OutputDataset() try: outputdata.create_dataset(ds) except DQDatasetExistsException: pass try: outputdata.register_datasets_details(None, infos[ds]) except DQFileExistsInDatasetException: pass # Register Container try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(task_container) logger.debug('Registered container for Task %i: %s' % (task.id, task_container)) except Exception as x: logger.error('Problem registering container for Task %i, %s : %s %s' % (task.id, task_container,x.__class__, x)) for ds in infos.keys(): try: dq2.registerDatasetsInContainer(task_container, [ ds ] ) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error('Problem registering dataset %s in container %s: %s %s' %( subtask_dsname, task_container, x.__class__, x)) finally: dq2_lock.release() return True
def registerDataset(self): """Register in the transform container""" trf = self._getParent() trf_container = trf.getContainerName() fail = False try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(trf_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(trf_container) logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), trf_container)) except Exception as x: logger.error('Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x)) fail = True job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.registerDatasetsInContainer(trf_container, [ ds ] ) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x)) fail = True finally: dq2_lock.release() if fail: return not fail # add dataset to the task container task = trf._getParent() task_container = task.getContainerName() try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo == {}: try: dq2.registerContainer(task_container) logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), task_container)) except Exception as x: logger.error('Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' % (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' % (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x)) fail = True ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.registerDatasetsInContainer(task_container, [ ds ] ) except DQContainerAlreadyHasDataset: pass except Exception as x: logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x)) fail = True finally: dq2_lock.release() return not fail
def unregisterDataset(self): """Register in the transform container""" trf = self._getParent() trf_container = trf.getContainerName() fail = False try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(trf_container) except: containerinfo = {} if containerinfo != {}: job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.deleteDatasetsFromContainer(trf_container, [ ds ] ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x)) fail = True finally: dq2_lock.release() if fail: return not fail # add dataset to the task container task = trf._getParent() task_container = task.getContainerName() try: containerinfo = {} dq2_lock.acquire() try: containerinfo = dq2.listDatasets(task_container) except: containerinfo = {} if containerinfo != {}: job = GPI.jobs(self.active_job_ids[0]) ds_list = self.getOutputDatasetList() for ds in ds_list: try: dq2.deleteDatasetsFromContainer(task_container, [ ds ] ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x)) fail = True except DQException as x: logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x)) fail = True finally: dq2_lock.release() return not fail