Пример #1
0
 def _parseFile(self, iterator):
     block = None
     for idx, line in enumerate(iterator):
         try:
             # Found start of block:
             line = line.strip()
             if line.startswith(';'):
                 continue
             elif line.startswith('['):
                 if block:
                     yield self._finishBlock(block)
                 block = self._createBlock(line)
             elif line != '':
                 tmp = lmap(
                     str.strip,
                     utils.QM('[' in line, line.split(' = ', 1),
                              rsplit(line, '=', 1)))
                 if len(tmp) != 2:
                     raise DatasetError(
                         'Malformed entry in dataset file:\n%s' % line)
                 key, value = tmp
                 handlerInfo = self._handleEntry.get(key.lower(), None)
                 if handlerInfo:
                     (prop, parser, msg) = handlerInfo
                     block[prop] = try_apply(value, parser, msg)
                 else:
                     block[DataProvider.FileList].append(
                         self._parseEntry(block, key, value))
         except Exception:
             raise DatasetError('Unable to parse %s:%d\n\t%s' %
                                (repr(self._filename), idx, repr(line)))
     if block:
         yield self._finishBlock(block)
Пример #2
0
 def _create_blocks(self, iterable):
     block = None
     for idx, line in enumerate(iterable):
         try:
             # Found start of block:
             line = line.strip()
             if line.startswith(';'):
                 continue
             elif line.startswith('['):
                 if block:
                     yield self._finish_block(block)
                 block = self._create_block(line)
             elif line != '':
                 if '[' in line:  # metadata on this line -> enforce whitespace '/path/file = ...'
                     tmp = lmap(str.strip, line.split(' = ', 1))
                 else:  # loose whitespace convention (allow: '/path/file_var=B_test=1')
                     tmp = lmap(str.strip, rsplit(line, '=', 1))
                 if len(tmp) != 2:
                     raise DatasetError(
                         'Malformed entry in dataset file:\n%s' % line)
                 self._fill_block(block, *tmp)
         except Exception:
             raise DatasetError('Unable to parse %s:%d\n\t%s' %
                                (repr(self._filename), idx, repr(line)))
     if block:
         yield self._finish_block(block)
Пример #3
0
    def getBlocks(self, show_stats):
        statsProcessor = NullDataProcessor(config=None, onChange=None)
        if show_stats:
            statsProcessor = self._stats
        if self._cache_block is None:
            ec = ExceptionCollector()

            def getAllBlocks():
                for provider in self._providerList:
                    try:
                        for block in provider.getBlocksNormed():
                            yield block
                    except Exception:
                        ec.collect()
                    if utils.abort():
                        raise DatasetError('Could not retrieve all datasets!')

            try:
                self._cache_block = list(
                    statsProcessor.process(
                        self._datasetProcessor.process(getAllBlocks())))
            except Exception:
                raise DatasetError(
                    'Unable to run datasets through processing pipeline!')
            ec.raise_any(DatasetError('Could not retrieve all datasets!'))
        return self._cache_block
Пример #4
0
	def generateDatasetName(self, key, data):
		if self._discovery:
			return GCProvider.generateDatasetName(self, key, data)
		if 'CMSSW_DATATIER' not in data:
			raise DatasetError('Incompatible data tiers in dataset: %s' % data)
		getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ())
		userPath = getPathComponents(self.nameDS)

		(primary, processed, tier) = (None, None, None)
		# In case of a child dataset, use the parent infos to construct new path
		for parent in data.get('PARENT_PATH', []):
			if len(userPath) == 3:
				(primary, processed, tier) = userPath
			else:
				try:
					(primary, processed, tier) = getPathComponents(parent)
				except Exception:
					pass
		if (primary is None) and (len(userPath) > 0):
			primary = userPath[0]
			userPath = userPath[1:]

		if len(userPath) == 2:
			(processed, tier) = userPath
		elif len(userPath) == 1:
			(processed, tier) = (userPath[0], data['CMSSW_DATATIER'])
		elif len(userPath) == 0:
			(processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER'])

		rawDS = '/%s/%s/%s' % (primary, processed, tier)
		if None in (primary, processed, tier):
			raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self.nameDS, rawDS))
		return utils.replaceDict(rawDS, data)
Пример #5
0
 def getDatasets(self):
     if self._cache_dataset is None:
         self._cache_dataset = []
         ec = ExceptionCollector()
         for provider in self._providerList:
             try:
                 self._cache_dataset.extend(provider.getDatasets())
             except Exception:
                 ec.collect()
             if utils.abort():
                 raise DatasetError('Could not retrieve all datasets!')
         ec.raise_any(DatasetError('Could not retrieve all datasets!'))
     return self._cache_dataset
Пример #6
0
    def _readBlockFromConfig(self, ds_config, datasetExpr, datasetNick,
                             datasetID):
        metadata_keys = parseJSON(
            ds_config.get('metadata', '[]', onChange=None))
        common_metadata = parseJSON(
            ds_config.get('metadata common', '[]', onChange=None))
        if len(common_metadata) > len(metadata_keys):
            raise DatasetError(
                'Unable to set %d common metadata items with %d metadata keys'
                % (len(common_metadata), len(metadata_keys)))
        common_prefix = ds_config.get('prefix', '', onChange=None)
        file_list = []
        has_events = False
        has_se_list = False
        for url in ds_config.getOptions():
            if url == 'se list':
                has_se_list = True
            elif url == 'events':
                has_events = True
            elif url not in [
                    'dataset hash', 'id', 'metadata', 'metadata common',
                    'nickname', 'prefix'
            ]:
                file_list.append(
                    self._readFileFromConfig(ds_config, url, metadata_keys,
                                             common_metadata, common_prefix))
        if not file_list:
            raise DatasetError(
                'There are no dataset files specified for dataset %r' %
                datasetExpr)

        result = {
            DataProvider.Nickname:
            ds_config.get('nickname', datasetNick, onChange=None),
            DataProvider.DatasetID:
            ds_config.getInt('id', datasetID, onChange=None),
            DataProvider.Dataset:
            datasetExpr,
            DataProvider.FileList:
            sorted(file_list, key=lambda fi: fi[DataProvider.URL]),
        }
        if metadata_keys:
            result[DataProvider.Metadata] = metadata_keys
        if has_events:
            result[DataProvider.NEntries] = ds_config.getInt('events',
                                                             -1,
                                                             onChange=None)
        if has_se_list:
            result[DataProvider.Locations] = parseList(
                ds_config.get('se list', '', onChange=None), ',')
        return result
Пример #7
0
	def processBlock(self, block):
		blockDS = block[DataProvider.Dataset]
		oldNick = block.get(DataProvider.Nickname, '')
		newNick = self.getName(oldNick, blockDS, block)
		# Check if nickname is used consistenly in all blocks of a datasets
		if self._checkConsistency:
			if self._checkConsistencyData.setdefault(blockDS, newNick) != newNick:
				raise DatasetError('Different blocks of dataset "%s" have different nicknames: "%s" != "%s"' % (
					blockDS, self._checkConsistencyData[blockDS], newNick))
		if self._checkCollision:
			if self._checkCollisionData.setdefault(newNick, blockDS) != blockDS:
				raise DatasetError('Multiple datasets use the same nickname "%s": "%s" != "%s"' % (
					newNick, self._checkCollisionData[newNick], blockDS))
		block[DataProvider.Nickname] = newNick
		return block
Пример #8
0
	def check_splitter(self, splitter):
		def _get_proposal(splitter):
			return reduce(lambda prop, prov: prov.check_splitter(prop), self._provider_list, splitter)
		prop_splitter = _get_proposal(splitter)
		if prop_splitter != _get_proposal(prop_splitter):
			raise DatasetError('Dataset providers could not agree on valid dataset splitter!')
		return prop_splitter
Пример #9
0
    def _splitJobs(self, fileList, eventsPerJob, firstEvent):
        nextEvent = firstEvent
        succEvent = nextEvent + eventsPerJob
        curEvent = 0
        lastEvent = 0
        curSkip = 0
        fileListIter = iter(fileList)
        job = {
            DataSplitter.Skipped: 0,
            DataSplitter.NEntries: 0,
            DataSplitter.FileList: []
        }
        while True:
            if curEvent >= lastEvent:
                try:
                    fileObj = next(fileListIter)
                except StopIteration:
                    if job[DataSplitter.FileList]:
                        yield job
                    break

                nEvents = fileObj[DataProvider.NEntries]
                if nEvents < 0:
                    raise DatasetError(
                        'EventBoundarySplitter does not support files with a negative number of events!'
                    )
                curEvent = lastEvent
                lastEvent = curEvent + nEvents
                curSkip = 0

            if nextEvent >= lastEvent:
                curEvent = lastEvent
                continue

            curSkip += nextEvent - curEvent
            curEvent = nextEvent

            available = lastEvent - curEvent
            if succEvent - nextEvent < available:
                available = succEvent - nextEvent

            if not len(job[DataSplitter.FileList]):
                job[DataSplitter.Skipped] = curSkip

            job[DataSplitter.NEntries] += available
            nextEvent += available

            job[DataSplitter.FileList].append(fileObj[DataProvider.URL])
            if DataProvider.Metadata in fileObj:
                job.setdefault(DataSplitter.Metadata,
                               []).append(fileObj[DataProvider.Metadata])

            if nextEvent >= succEvent:
                succEvent += eventsPerJob
                yield job
                job = {
                    DataSplitter.Skipped: 0,
                    DataSplitter.NEntries: 0,
                    DataSplitter.FileList: []
                }
Пример #10
0
    def process_block(self, block):
        # Check uniqueness of URLs
        url_hash_list = []
        if self._check_url != DatasetUniqueMode.ignore:
            block[DataProvider.FileList] = list(
                self._process_fi_list(url_hash_list,
                                      block[DataProvider.FileList]))
            url_hash_list.sort()

        # Check uniqueness of blocks
        if self._check_block != DatasetUniqueMode.ignore:
            block_hash = md5_hex(
                repr((block.get(DataProvider.Dataset),
                      block[DataProvider.BlockName], url_hash_list,
                      block[DataProvider.NEntries],
                      block[DataProvider.Locations],
                      block.get(DataProvider.Metadata))))
            if block_hash in self._recorded_block:
                msg = 'Multiple occurences of block: "%s"!' % DataProvider.get_block_id(
                    block)
                msg += ' (This check can be configured with %r)' % 'dataset check unique block'
                if self._check_block == DatasetUniqueMode.warn:
                    self._log.warning(msg)
                elif self._check_block == DatasetUniqueMode.abort:
                    raise DatasetError(msg)
                elif self._check_block == DatasetUniqueMode.skip:
                    return None
            self._recorded_block.add(block_hash)
        return block
Пример #11
0
    def _read_block(self, ds_config, dataset_expr, dataset_nick):
        metadata_name_list = parse_json(
            ds_config.get('metadata', '[]', on_change=None))
        common_metadata = parse_json(
            ds_config.get('metadata common', '[]', on_change=None))
        if len(common_metadata) > len(metadata_name_list):
            raise DatasetError('Unable to set %d common metadata items ' %
                               len(common_metadata) + 'with %d metadata keys' %
                               len(metadata_name_list))
        common_prefix = ds_config.get('prefix', '', on_change=None)
        fn_list = []
        has_events = False
        has_se_list = False
        for url in ds_config.get_option_list():
            if url == 'se list':
                has_se_list = True
            elif url == 'events':
                has_events = True
            elif url not in [
                    'dataset hash', 'metadata', 'metadata common', 'nickname',
                    'prefix'
            ]:
                fi = self._read_fi(ds_config, url, metadata_name_list,
                                   common_metadata, common_prefix)
                fn_list.append(fi)
        if not fn_list:
            raise DatasetError(
                'There are no dataset files specified for dataset %r' %
                dataset_expr)

        result = {
            DataProvider.Nickname:
            ds_config.get('nickname', dataset_nick or '', on_change=None),
            DataProvider.FileList:
            sorted(fn_list, key=lambda fi: fi[DataProvider.URL])
        }
        result.update(DataProvider.parse_block_id(dataset_expr))
        if metadata_name_list:
            result[DataProvider.Metadata] = metadata_name_list
        if has_events:
            result[DataProvider.NEntries] = ds_config.get_int('events',
                                                              -1,
                                                              on_change=None)
        if has_se_list:
            result[DataProvider.Locations] = parse_list(
                ds_config.get('se list', '', on_change=None), ',')
        return result
Пример #12
0
    def _partition_block(self, fi_list, events_per_job, entry_first):
        event_next = entry_first
        event_succ = event_next + events_per_job
        event_current = 0
        event_prev = 0
        skip_current = 0
        fi_iter = iter(fi_list)
        proto_partition = {
            DataSplitter.Skipped: 0,
            DataSplitter.NEntries: 0,
            DataSplitter.FileList: []
        }
        while True:
            if event_current >= event_prev:
                fi = next(fi_iter, None)
                if fi is None:
                    if proto_partition[DataSplitter.FileList]:
                        yield proto_partition
                    break

                event_count = fi[DataProvider.NEntries]
                if event_count < 0:
                    raise DatasetError(
                        '%s does not support files with a negative number of events!'
                        % self.__class__.__name__)
                event_current = event_prev
                event_prev = event_current + event_count
                skip_current = 0

            if event_next >= event_prev:
                event_current = event_prev
                continue

            skip_current += event_next - event_current
            event_current = event_next

            available = event_prev - event_current
            if event_succ - event_next < available:
                available = event_succ - event_next

            if not proto_partition[DataSplitter.FileList]:
                proto_partition[DataSplitter.Skipped] = skip_current

            proto_partition[DataSplitter.NEntries] += available
            event_next += available

            proto_partition[DataSplitter.FileList].append(fi[DataProvider.URL])
            if DataProvider.Metadata in fi:
                proto_partition.setdefault(DataSplitter.Metadata, []).append(
                    fi[DataProvider.Metadata])

            if event_next >= event_succ:
                event_succ += events_per_job
                yield proto_partition
                proto_partition = {
                    DataSplitter.Skipped: 0,
                    DataSplitter.NEntries: 0,
                    DataSplitter.FileList: []
                }
Пример #13
0
    def _get_dataset_name(self, metadata_dict, hash_dataset):
        if self._discovery:
            return GCProvider._get_dataset_name(self, metadata_dict,
                                                hash_dataset)
        if 'CMSSW_DATATIER' not in metadata_dict:
            raise DatasetError('Incompatible data tiers in dataset: %s' %
                               repr(metadata_dict))

        def _get_path_components(path):
            if path:
                return path.strip('/').split('/')
            return []

        user_dataset_part_list = tuple(
            _get_path_components(self._dataset_pattern))

        (primary, processed, tier) = (None, None, None)
        # In case of a child dataset, use the parent infos to construct new path
        for parent in metadata_dict.get('PARENT_PATH', []):
            if len(user_dataset_part_list) == 3:
                (primary, processed, tier) = user_dataset_part_list
            else:
                try:
                    (primary, processed,
                     tier) = tuple(_get_path_components(parent))
                except Exception:
                    clear_current_exception()
        if (primary is None) and (len(user_dataset_part_list) > 0):
            primary = user_dataset_part_list[0]
            user_dataset_part_list = user_dataset_part_list[1:]

        if len(user_dataset_part_list) == 2:
            (processed, tier) = user_dataset_part_list
        elif len(user_dataset_part_list) == 1:
            (processed, tier) = (user_dataset_part_list[0],
                                 metadata_dict['CMSSW_DATATIER'])
        elif len(user_dataset_part_list) == 0:
            (processed, tier) = ('Dataset_%s' % hash_dataset,
                                 metadata_dict['CMSSW_DATATIER'])

        raw_dataset_name = '/%s/%s/%s' % (primary, processed, tier)
        if None in (primary, processed, tier):
            raise DatasetError(
                'Invalid dataset name supplied: %r\nresulting in %s' %
                (self._dataset_pattern, raw_dataset_name))
        return replace_with_dict(raw_dataset_name, metadata_dict)
Пример #14
0
 def getAllBlocks():
     for provider in self._providerList:
         try:
             for block in provider.getBlocks(silent):
                 yield block
         except Exception:
             ec.collect()
         if utils.abort():
             raise DatasetError('Could not retrieve all datasets!')
Пример #15
0
    def checkSplitter(self, splitter):
        def getProposal(x):
            return reduce(lambda prop, prov: prov.checkSplitter(prop),
                          self._providerList, x)

        if getProposal(splitter) != getProposal(getProposal(splitter)):
            raise DatasetError(
                'Dataset providers could not agree on valid dataset splitter!')
        return getProposal(splitter)
Пример #16
0
	def get_dataset_name_list(self):
		if self._cache_dataset is None:
			self._cache_dataset = set()
			exc = ExceptionCollector()
			for provider in self._provider_list:
				try:
					self._cache_dataset.update(provider.get_dataset_name_list())
				except Exception:
					exc.collect()
			exc.raise_any(DatasetError('Could not retrieve all datasets!'))
		return list(self._cache_dataset)
Пример #17
0
    def _readBlockFromConfig(self, config, datasetExpr, datasetNick,
                             datasetID):
        common_metadata = parseJSON(
            config.get('metadata common', '[]', onChange=None))
        common_prefix = config.get('prefix', '', onChange=None)
        file_list = []
        has_events = False
        has_se_list = False
        for url in config.getOptions():
            if url == 'se list':
                has_se_list = True
            elif url == 'events':
                has_events = True
            elif url not in [
                    'dataset hash', 'id', 'metadata', 'metadata common',
                    'nickname', 'prefix'
            ]:
                info = config.get(url, onChange=None)
                tmp = info.split(' ', 1)
                fi = {
                    DataProvider.URL: common_prefix + url,
                    DataProvider.NEntries: int(tmp[0])
                }
                if common_metadata:
                    fi[DataProvider.Metadata] = common_metadata
                if len(tmp) == 2:
                    fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata,
                                                       []) + parseJSON(tmp[1])
                file_list.append(fi)
        if not file_list:
            raise DatasetError(
                'There are no dataset files specified for dataset %r' %
                datasetExpr)

        result = {
            DataProvider.Nickname:
            config.get('nickname', datasetNick, onChange=None),
            DataProvider.DatasetID:
            config.getInt('id', datasetID, onChange=None),
            DataProvider.Dataset:
            datasetExpr,
            DataProvider.Metadata:
            parseJSON(config.get('metadata', '[]', onChange=None)),
            DataProvider.FileList:
            sorted(file_list, key=lambda fi: fi[DataProvider.URL]),
        }
        if has_events:
            result[DataProvider.NEntries] = config.getInt('events',
                                                          -1,
                                                          onChange=None)
        if has_se_list:
            result[DataProvider.Locations] = parseList(
                config.get('se list', '', onChange=None), ',')
        return result
Пример #18
0
			def processFI(fiList):
				for fi in fiList:
					urlHash = md5_hex(repr((fi[DataProvider.URL], fi[DataProvider.NEntries], fi.get(DataProvider.Metadata))))
					if urlHash in self._recordedURL:
						msg = 'Multiple occurences of URL: %r!' % fi[DataProvider.URL]
						msg += ' (This check can be configured with %r)' % 'dataset check unique url'
						if self._checkURL == DatasetUniqueMode.warn:
							self._log.warning(msg)
						elif self._checkURL == DatasetUniqueMode.abort:
							raise DatasetError(msg)
						elif self._checkURL == DatasetUniqueMode.skip:
							continue
					self._recordedURL.add(urlHash)
					recordedBlockURL.append(urlHash)
					yield fi
Пример #19
0
    def processBlock(self, block):
        # Check uniqueness of URLs
        recordedBlockURL = []
        if self._checkURL != DatasetUniqueMode.ignore:

            def processFI(fiList):
                for fi in fiList:
                    urlHash = md5_hex(
                        repr((fi[DataProvider.URL], fi[DataProvider.NEntries],
                              fi.get(DataProvider.Metadata))))
                    if urlHash in self._recordedURL:
                        msg = 'Multiple occurences of URL: %r!' % fi[
                            DataProvider.URL]
                        msg += ' (This check can be configured with %r)' % 'dataset check unique url'
                        if self._checkURL == DatasetUniqueMode.warn:
                            self._log.warning(msg)
                        elif self._checkURL == DatasetUniqueMode.abort:
                            raise DatasetError(msg)
                        elif self._checkURL == DatasetUniqueMode.skip:
                            continue
                    self._recordedURL.add(urlHash)
                    recordedBlockURL.append(urlHash)
                    yield fi

            block[DataProvider.FileList] = list(
                processFI(block[DataProvider.FileList]))
            recordedBlockURL.sort()

        # Check uniqueness of blocks
        if self._checkBlock != DatasetUniqueMode.ignore:
            blockHash = md5_hex(
                repr((block.get(DataProvider.Dataset),
                      block[DataProvider.BlockName], recordedBlockURL,
                      block[DataProvider.NEntries],
                      block[DataProvider.Locations],
                      block.get(DataProvider.Metadata))))
            if blockHash in self._recordedBlock:
                msg = 'Multiple occurences of block: "%s"!' % DataProvider.bName(
                    block)
                msg += ' (This check can be configured with %r)' % 'dataset check unique block'
                if self._checkBlock == DatasetUniqueMode.warn:
                    self._log.warning(msg)
                elif self._checkBlock == DatasetUniqueMode.abort:
                    raise DatasetError(msg)
                elif self._checkBlock == DatasetUniqueMode.skip:
                    return None
            self._recordedBlock.add(blockHash)
        return block
Пример #20
0
    def _iter_blocks_raw(self):
        def _filter_block(block):
            if self._filter:
                return self._filter in '/%s#' % DataProvider.get_block_id(
                    block)
            return True

        try:
            fp = SafeFile(self._filename)
        except Exception:
            raise DatasetError('Unable to open dataset file %s' %
                               repr(self._filename))
        for block in self._create_blocks(fp.iter_close()):
            if _filter_block(block):
                self._raise_on_abort()
                yield block
Пример #21
0
	def _getBlocksInternal(self):
		def _filterBlock(block):
			if self._filter:
				name = '/%s#%s#' % (block[DataProvider.Dataset], block.get(DataProvider.BlockName, ''))
				return self._filter in name
			return True
		try:
			fp = open(self._filename, 'r')
		except Exception:
			raise DatasetError('Unable to open dataset file %s' % repr(self._filename))
		try:
			for block in self._parseFile(fp):
				if _filterBlock(block):
					yield block
			fp.close()
		except Exception:
			fp.close()
			raise
Пример #22
0
 def _readFileFromConfig(self, ds_config, url, metadata_keys,
                         common_metadata, common_prefix):
     info = ds_config.get(url, onChange=None)
     tmp = info.split(' ', 1)
     fi = {
         DataProvider.URL: common_prefix + url,
         DataProvider.NEntries: int(tmp[0])
     }
     if common_metadata:
         fi[DataProvider.Metadata] = common_metadata
     if len(tmp) == 2:
         file_metadata = parseJSON(tmp[1])
         if len(common_metadata) + len(file_metadata) > len(metadata_keys):
             raise DatasetError(
                 'Unable to set %d file metadata items with %d metadata keys (%d common metadata items)'
                 % (len(file_metadata), len(metadata_keys),
                    len(common_metadata)))
         fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata,
                                            []) + file_metadata
     return fi
Пример #23
0
    def getBlocks(self, silent=True):
        if self._cache_block is None:
            ec = ExceptionCollector()

            def getAllBlocks():
                for provider in self._providerList:
                    try:
                        for block in provider.getBlocks(silent):
                            yield block
                    except Exception:
                        ec.collect()
                    if utils.abort():
                        raise DatasetError('Could not retrieve all datasets!')

            self._cache_block = list(
                self._stats.process(
                    self._datasetProcessor.process(getAllBlocks())))
            ec.raise_any(DatasetError('Could not retrieve all datasets!'))
            logging.getLogger('user').info(
                'Summary: Running over %s distributed over %d blocks.',
                *self._stats.getStats())
        return self._cache_block
Пример #24
0
 def _read_fi(self, ds_config, url, metadata_name_list, common_metadata,
              common_prefix):
     info = ds_config.get(url, on_change=None)
     tmp = info.split(' ', 1)
     fi = {
         DataProvider.URL: common_prefix + url,
         DataProvider.NEntries: int(tmp[0])
     }
     if common_metadata:
         fi[DataProvider.Metadata] = common_metadata
     if len(tmp) == 2:
         file_metadata = parse_json(tmp[1])
         if len(common_metadata) + len(file_metadata) > len(
                 metadata_name_list):
             raise DatasetError(
                 'Unable to set %d file metadata items ' %
                 len(file_metadata) +
                 'with %d metadata keys ' % len(metadata_name_list) +
                 '(%d common metadata items)' % len(common_metadata))
         fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata,
                                            []) + file_metadata
     return fi
Пример #25
0
 def _handleError(self, msg, mode):
     if mode == DatasetCheckMode.warn:
         self._log.warning(msg)
     elif mode == DatasetCheckMode.abort:
         raise DatasetError(msg)
Пример #26
0
	def get_block_list_cached(self, show_stats):
		exc = ExceptionCollector()
		result = self._create_block_cache(show_stats, lambda: self._iter_all_blocks(exc))
		exc.raise_any(DatasetError('Could not retrieve all datasets!'))
		return result
Пример #27
0
def _try_apply(value, fun, desc):
    try:
        return fun(value)
    except Exception:
        raise DatasetError('Unable to parse %s: %s' % (desc, repr(value)))