示例#1
0
	def __init__(self, config, datasetExpr, datasetNick = None):
		tmp = ['OutputDirsFromConfig', 'MetadataFromTask']
		if os.path.isdir(datasetExpr):
			tmp = ['OutputDirsFromWork']
		tmp.extend(['JobInfoFromOutputDir', 'ObjectsFromCMSSW', 'FilesFromJobInfo', 'MetadataFromCMSSW',
			'ParentLookup', 'SEListFromPath', 'LFNFromPath', 'DetermineEvents', 'FilterEDMFiles'])
		config.set('scanner', str.join(' ', tmp))
		config.set('include config infos', 'True')
		config.set('parent keys', 'CMSSW_PARENT_LFN CMSSW_PARENT_PFN')
		config.set('events key', 'CMSSW_EVENTS_WRITE')
		GCProvider.__init__(self, config, datasetExpr, datasetNick)
		self._discovery = config.getBool('discovery', False)
示例#2
0
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		tmp = ['OutputDirsFromConfig', 'MetadataFromTask']
		if os.path.isdir(datasetExpr):
			tmp = ['OutputDirsFromWork']
		tmp.extend(['JobInfoFromOutputDir', 'ObjectsFromCMSSW', 'FilesFromJobInfo', 'MetadataFromCMSSW',
			'ParentLookup', 'SEListFromPath', 'LFNFromPath', 'DetermineEvents', 'FilterEDMFiles'])
		config.set('scanner', str.join(' ', tmp))
		config.set('include config infos', 'True')
		config.set('parent keys', 'CMSSW_PARENT_LFN CMSSW_PARENT_PFN')
		config.set('events key', 'CMSSW_EVENTS_WRITE')
		GCProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		self._discovery = config.getBool('discovery', False)
示例#3
0
	def _generateDatasetName(self, key, data):
		if self._discovery:
			return GCProvider._generateDatasetName(self, key, data)
		if 'CMSSW_DATATIER' not in data:
			raise DatasetError('Incompatible data tiers in dataset: %s' % data)
		getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ())
		userPath = getPathComponents(self._ds_name)

		(primary, processed, tier) = (None, None, None)
		# In case of a child dataset, use the parent infos to construct new path
		for parent in data.get('PARENT_PATH', []):
			if len(userPath) == 3:
				(primary, processed, tier) = userPath
			else:
				try:
					(primary, processed, tier) = getPathComponents(parent)
				except Exception:
					clear_current_exception()
		if (primary is None) and (len(userPath) > 0):
			primary = userPath[0]
			userPath = userPath[1:]

		if len(userPath) == 2:
			(processed, tier) = userPath
		elif len(userPath) == 1:
			(processed, tier) = (userPath[0], data['CMSSW_DATATIER'])
		elif len(userPath) == 0:
			(processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER'])

		rawDS = '/%s/%s/%s' % (primary, processed, tier)
		if None in (primary, processed, tier):
			raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self._ds_name, rawDS))
		return utils.replaceDict(rawDS, data)
示例#4
0
	def generateDatasetName(self, key, data):
		if self._discovery:
			return GCProvider.generateDatasetName(self, key, data)
		if 'CMSSW_DATATIER' not in data:
			raise DatasetError('Incompatible data tiers in dataset: %s' % data)
		getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ())
		userPath = getPathComponents(self.nameDS)

		(primary, processed, tier) = (None, None, None)
		# In case of a child dataset, use the parent infos to construct new path
		for parent in data.get('PARENT_PATH', []):
			if len(userPath) == 3:
				(primary, processed, tier) = userPath
			else:
				try:
					(primary, processed, tier) = getPathComponents(parent)
				except Exception:
					pass
		if (primary is None) and (len(userPath) > 0):
			primary = userPath[0]
			userPath = userPath[1:]

		if len(userPath) == 2:
			(processed, tier) = userPath
		elif len(userPath) == 1:
			(processed, tier) = (userPath[0], data['CMSSW_DATATIER'])
		elif len(userPath) == 0:
			(processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER'])

		rawDS = '/%s/%s/%s' % (primary, processed, tier)
		if None in (primary, processed, tier):
			raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self.nameDS, rawDS))
		return utils.replaceDict(rawDS, data)
示例#5
0
    def _get_dataset_name(self, metadata_dict, hash_dataset):
        if self._discovery:
            return GCProvider._get_dataset_name(self, metadata_dict,
                                                hash_dataset)
        if 'CMSSW_DATATIER' not in metadata_dict:
            raise DatasetError('Incompatible data tiers in dataset: %s' %
                               repr(metadata_dict))

        def _get_path_components(path):
            if path:
                return path.strip('/').split('/')
            return []

        user_dataset_part_list = tuple(
            _get_path_components(self._dataset_pattern))

        (primary, processed, tier) = (None, None, None)
        # In case of a child dataset, use the parent infos to construct new path
        for parent in metadata_dict.get('PARENT_PATH', []):
            if len(user_dataset_part_list) == 3:
                (primary, processed, tier) = user_dataset_part_list
            else:
                try:
                    (primary, processed,
                     tier) = tuple(_get_path_components(parent))
                except Exception:
                    clear_current_exception()
        if (primary is None) and (len(user_dataset_part_list) > 0):
            primary = user_dataset_part_list[0]
            user_dataset_part_list = user_dataset_part_list[1:]

        if len(user_dataset_part_list) == 2:
            (processed, tier) = user_dataset_part_list
        elif len(user_dataset_part_list) == 1:
            (processed, tier) = (user_dataset_part_list[0],
                                 metadata_dict['CMSSW_DATATIER'])
        elif len(user_dataset_part_list) == 0:
            (processed, tier) = ('Dataset_%s' % hash_dataset,
                                 metadata_dict['CMSSW_DATATIER'])

        raw_dataset_name = '/%s/%s/%s' % (primary, processed, tier)
        if None in (primary, processed, tier):
            raise DatasetError(
                'Invalid dataset name supplied: %r\nresulting in %s' %
                (self._dataset_pattern, raw_dataset_name))
        return replace_with_dict(raw_dataset_name, metadata_dict)
	def _get_dataset_name(self, metadata_dict, hash_dataset):
		if self._discovery:
			return GCProvider._get_dataset_name(self, metadata_dict, hash_dataset)
		if 'CMSSW_DATATIER' not in metadata_dict:
			raise DatasetError('Incompatible data tiers in dataset: %s' % repr(metadata_dict))

		def _get_path_components(path):
			if path:
				return path.strip('/').split('/')
			return []
		user_dataset_part_list = tuple(_get_path_components(self._dataset_pattern))

		(primary, processed, tier) = (None, None, None)
		# In case of a child dataset, use the parent infos to construct new path
		for parent in metadata_dict.get('PARENT_PATH', []):
			if len(user_dataset_part_list) == 3:
				(primary, processed, tier) = user_dataset_part_list
			else:
				try:
					(primary, processed, tier) = tuple(_get_path_components(parent))
				except Exception:
					clear_current_exception()
		if (primary is None) and (len(user_dataset_part_list) > 0):
			primary = user_dataset_part_list[0]
			user_dataset_part_list = user_dataset_part_list[1:]

		if len(user_dataset_part_list) == 2:
			(processed, tier) = user_dataset_part_list
		elif len(user_dataset_part_list) == 1:
			(processed, tier) = (user_dataset_part_list[0], metadata_dict['CMSSW_DATATIER'])
		elif len(user_dataset_part_list) == 0:
			(processed, tier) = ('Dataset_%s' % hash_dataset, metadata_dict['CMSSW_DATATIER'])

		raw_dataset_name = '/%s/%s/%s' % (primary, processed, tier)
		if None in (primary, processed, tier):
			raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (
				self._dataset_pattern, raw_dataset_name))
		return replace_with_dict(raw_dataset_name, metadata_dict)