def _read_block(self, ds_config, dataset_expr, dataset_nick): metadata_name_list = parse_json(ds_config.get('metadata', '[]', on_change=None)) common_metadata = parse_json(ds_config.get('metadata common', '[]', on_change=None)) if len(common_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) + 'with %d metadata keys' % len(metadata_name_list)) common_prefix = ds_config.get('prefix', '', on_change=None) fn_list = [] has_events = False has_se_list = False for url in ds_config.get_option_list(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in ['dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix']: fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix) fn_list.append(fi) if not fn_list: raise DatasetError('There are no dataset files specified for dataset %r' % dataset_expr) result = { DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None), DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL]) } result.update(DataProvider.parse_block_id(dataset_expr)) if metadata_name_list: result[DataProvider.Metadata] = metadata_name_list if has_events: result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None) if has_se_list: result[DataProvider.Locations] = parse_list(ds_config.get('se list', '', on_change=None), ',') return result
def _process_json_result(self, value): if not value: raise RestError('Received empty reply') try: return parse_json(value) except Exception: raise RestError('Received invalid JSON reply: %r' % value)
def _read_block(self, ds_config, dataset_expr, dataset_nick): metadata_name_list = parse_json( ds_config.get('metadata', '[]', on_change=None)) common_metadata = parse_json( ds_config.get('metadata common', '[]', on_change=None)) if len(common_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) + 'with %d metadata keys' % len(metadata_name_list)) common_prefix = ds_config.get('prefix', '', on_change=None) fn_list = [] has_events = False has_se_list = False for url in ds_config.get_option_list(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in [ 'dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix' ]: fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix) fn_list.append(fi) if not fn_list: raise DatasetError( 'There are no dataset files specified for dataset %r' % dataset_expr) result = { DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None), DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL]) } result.update(DataProvider.parse_block_id(dataset_expr)) if metadata_name_list: result[DataProvider.Metadata] = metadata_name_list if has_events: result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None) if has_se_list: result[DataProvider.Locations] = parse_list( ds_config.get('se list', '', on_change=None), ',') return result
def _read_fi(self, ds_config, url, metadata_name_list, common_metadata, common_prefix): info = ds_config.get(url, on_change=None) tmp = info.split(' ', 1) fi = {DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0])} if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: file_metadata = parse_json(tmp[1]) if len(common_metadata) + len(file_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d file metadata items ' % len(file_metadata) + 'with %d metadata keys ' % len(metadata_name_list) + '(%d common metadata items)' % len(common_metadata)) fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + file_metadata return fi
def __init__(self, fn): ParameterSource.__init__(self) fp = GZipTextFile(fn, 'r') try: header = fp.readline().lstrip('#').strip() self._output_vn_list = [] if header: self._output_vn_list = parse_json(header) def _parse_line(line): if not line.startswith('#'): pnum_str, stored_json = line.split('\t', 1) is_invalid = '!' in pnum_str pnum = int(pnum_str.replace('!', ' ')) return (is_invalid, pnum, lmap(parse_json, stored_json.strip().split('\t'))) self._values = lmap(_parse_line, fp.readlines()) finally: fp.close()
def _read_fi(self, ds_config, url, metadata_name_list, common_metadata, common_prefix): info = ds_config.get(url, on_change=None) tmp = info.split(' ', 1) fi = { DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0]) } if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: file_metadata = parse_json(tmp[1]) if len(common_metadata) + len(file_metadata) > len( metadata_name_list): raise DatasetError( 'Unable to set %d file metadata items ' % len(file_metadata) + 'with %d metadata keys ' % len(metadata_name_list) + '(%d common metadata items)' % len(common_metadata)) fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + file_metadata return fi
def __init__(self, path): activity = Activity('Reading dataset partition file') self._fmt = DictFormat() try: self._tar = tarfile.open(path, 'r:') metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str}) FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs')) self._metadata = metadata activity.finish() except Exception: raise PartitionReaderError('No valid dataset splitting found in %s' % path) self._map_enum2parser = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int, DataSplitter.Invalid: parse_bool, DataSplitter.Locations: lambda x: parse_list(x, ','), DataSplitter.MetadataHeader: parse_json, DataSplitter.Metadata: lambda x: parse_json(x.strip("'")) } (self._cache_nested_fn, self._cache_nested_tar) = (None, None)