def __init__(self, config, datasetExpr, datasetNick = None): tmp = ['OutputDirsFromConfig', 'MetadataFromTask'] if os.path.isdir(datasetExpr): tmp = ['OutputDirsFromWork'] tmp.extend(['JobInfoFromOutputDir', 'ObjectsFromCMSSW', 'FilesFromJobInfo', 'MetadataFromCMSSW', 'ParentLookup', 'SEListFromPath', 'LFNFromPath', 'DetermineEvents', 'FilterEDMFiles']) config.set('scanner', str.join(' ', tmp)) config.set('include config infos', 'True') config.set('parent keys', 'CMSSW_PARENT_LFN CMSSW_PARENT_PFN') config.set('events key', 'CMSSW_EVENTS_WRITE') GCProvider.__init__(self, config, datasetExpr, datasetNick) self._discovery = config.getBool('discovery', False)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): tmp = ['OutputDirsFromConfig', 'MetadataFromTask'] if os.path.isdir(datasetExpr): tmp = ['OutputDirsFromWork'] tmp.extend(['JobInfoFromOutputDir', 'ObjectsFromCMSSW', 'FilesFromJobInfo', 'MetadataFromCMSSW', 'ParentLookup', 'SEListFromPath', 'LFNFromPath', 'DetermineEvents', 'FilterEDMFiles']) config.set('scanner', str.join(' ', tmp)) config.set('include config infos', 'True') config.set('parent keys', 'CMSSW_PARENT_LFN CMSSW_PARENT_PFN') config.set('events key', 'CMSSW_EVENTS_WRITE') GCProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) self._discovery = config.getBool('discovery', False)
def _generateDatasetName(self, key, data): if self._discovery: return GCProvider._generateDatasetName(self, key, data) if 'CMSSW_DATATIER' not in data: raise DatasetError('Incompatible data tiers in dataset: %s' % data) getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ()) userPath = getPathComponents(self._ds_name) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in data.get('PARENT_PATH', []): if len(userPath) == 3: (primary, processed, tier) = userPath else: try: (primary, processed, tier) = getPathComponents(parent) except Exception: clear_current_exception() if (primary is None) and (len(userPath) > 0): primary = userPath[0] userPath = userPath[1:] if len(userPath) == 2: (processed, tier) = userPath elif len(userPath) == 1: (processed, tier) = (userPath[0], data['CMSSW_DATATIER']) elif len(userPath) == 0: (processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER']) rawDS = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self._ds_name, rawDS)) return utils.replaceDict(rawDS, data)
def generateDatasetName(self, key, data): if self._discovery: return GCProvider.generateDatasetName(self, key, data) if 'CMSSW_DATATIER' not in data: raise DatasetError('Incompatible data tiers in dataset: %s' % data) getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ()) userPath = getPathComponents(self.nameDS) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in data.get('PARENT_PATH', []): if len(userPath) == 3: (primary, processed, tier) = userPath else: try: (primary, processed, tier) = getPathComponents(parent) except Exception: pass if (primary is None) and (len(userPath) > 0): primary = userPath[0] userPath = userPath[1:] if len(userPath) == 2: (processed, tier) = userPath elif len(userPath) == 1: (processed, tier) = (userPath[0], data['CMSSW_DATATIER']) elif len(userPath) == 0: (processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER']) rawDS = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self.nameDS, rawDS)) return utils.replaceDict(rawDS, data)
def _get_dataset_name(self, metadata_dict, hash_dataset): if self._discovery: return GCProvider._get_dataset_name(self, metadata_dict, hash_dataset) if 'CMSSW_DATATIER' not in metadata_dict: raise DatasetError('Incompatible data tiers in dataset: %s' % repr(metadata_dict)) def _get_path_components(path): if path: return path.strip('/').split('/') return [] user_dataset_part_list = tuple( _get_path_components(self._dataset_pattern)) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in metadata_dict.get('PARENT_PATH', []): if len(user_dataset_part_list) == 3: (primary, processed, tier) = user_dataset_part_list else: try: (primary, processed, tier) = tuple(_get_path_components(parent)) except Exception: clear_current_exception() if (primary is None) and (len(user_dataset_part_list) > 0): primary = user_dataset_part_list[0] user_dataset_part_list = user_dataset_part_list[1:] if len(user_dataset_part_list) == 2: (processed, tier) = user_dataset_part_list elif len(user_dataset_part_list) == 1: (processed, tier) = (user_dataset_part_list[0], metadata_dict['CMSSW_DATATIER']) elif len(user_dataset_part_list) == 0: (processed, tier) = ('Dataset_%s' % hash_dataset, metadata_dict['CMSSW_DATATIER']) raw_dataset_name = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError( 'Invalid dataset name supplied: %r\nresulting in %s' % (self._dataset_pattern, raw_dataset_name)) return replace_with_dict(raw_dataset_name, metadata_dict)
def _get_dataset_name(self, metadata_dict, hash_dataset): if self._discovery: return GCProvider._get_dataset_name(self, metadata_dict, hash_dataset) if 'CMSSW_DATATIER' not in metadata_dict: raise DatasetError('Incompatible data tiers in dataset: %s' % repr(metadata_dict)) def _get_path_components(path): if path: return path.strip('/').split('/') return [] user_dataset_part_list = tuple(_get_path_components(self._dataset_pattern)) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in metadata_dict.get('PARENT_PATH', []): if len(user_dataset_part_list) == 3: (primary, processed, tier) = user_dataset_part_list else: try: (primary, processed, tier) = tuple(_get_path_components(parent)) except Exception: clear_current_exception() if (primary is None) and (len(user_dataset_part_list) > 0): primary = user_dataset_part_list[0] user_dataset_part_list = user_dataset_part_list[1:] if len(user_dataset_part_list) == 2: (processed, tier) = user_dataset_part_list elif len(user_dataset_part_list) == 1: (processed, tier) = (user_dataset_part_list[0], metadata_dict['CMSSW_DATATIER']) elif len(user_dataset_part_list) == 0: (processed, tier) = ('Dataset_%s' % hash_dataset, metadata_dict['CMSSW_DATATIER']) raw_dataset_name = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % ( self._dataset_pattern, raw_dataset_name)) return replace_with_dict(raw_dataset_name, metadata_dict)