def download_wiod2013(storage_folder, years=None, overwrite_existing=False, satellite_urls=WIOD_CONFIG['satellite_urls']): """ Downloads the 2013 wiod release Note ---- Currently, pymrio only works with the 2013 release of the wiod tables. The more recent 2016 release so far (October 2017) lacks the environmental and social extensions. Parameters ---------- storage_folder: str, valid path Location to store the download, folder will be created if not existing. If the file is already present in the folder, the download of the specific file will be skipped. years: list of int or str, optional If years is given only downloads the specific years. This only applies to the IO tables because extensions are stored by country and not per year. The years can be given in 2 or 4 digits. overwrite_existing: boolean, optional If False, skip download of file already existing in the storage folder (default). Set to True to replace files. satellite_urls : list of str (urls), optional Which satellite accounts to download. Default: satellite urls defined in WIOD_CONFIG - list of all available urls Remove items from this list to only download a subset of extensions Returns ------- Meta data of the downloaded MRIOs """ try: os.makedirs(storage_folder) except FileExistsError: pass if type(years) is int or type(years) is str: years = [years] years = years if years else range(1995, 2012) years = [str(yy).zfill(2)[-2:] for yy in years] wiod_web_content = _get_url_datafiles( url_db_view=WIOD_CONFIG['url_db_view'], url_db_content=WIOD_CONFIG['url_db_content'], mrio_regex=WIOD_CONFIG['mrio_regex']) restricted_wiod_io_urls = [ url for url in wiod_web_content.data_urls if re.search(r"(wiot)(\d\d)", os.path.basename(url)).group(2) in years ] meta = MRIOMetaData(location=storage_folder, description='WIOD metadata file for pymrio', name='WIOD', system='IxI', version='data13') meta = _download_urls(url_list=restricted_wiod_io_urls + satellite_urls, storage_folder=storage_folder, overwrite_existing=overwrite_existing, meta_handler=meta) meta.save() return meta
def download_wiod2013(storage_folder, years=None, overwrite_existing=False, satellite_urls=WIOD_CONFIG['satellite_urls']): """ Downloads the 2013 wiod release Note ---- Currently, pymrio only works with the 2013 release of the wiod tables. The more recent 2016 release so far (October 2017) lacks the environmental and social extensions. Parameters ---------- storage_folder: str, valid path Location to store the download, folder will be created if not existing. If the file is already present in the folder, the download of the specific file will be skipped. years: list of int or str, optional If years is given only downloads the specific years. This only applies to the IO tables because extensions are stored by country and not per year. The years can be given in 2 or 4 digits. overwrite_existing: boolean, optional If False, skip download of file already existing in the storage folder (default). Set to True to replace files. satellite_urls : list of str (urls), optional Which satellite accounts to download. Default: satellite urls defined in WIOD_CONFIG - list of all available urls Remove items from this list to only download a subset of extensions """ try: os.makedirs(storage_folder) except FileExistsError: pass if type(years) is int or type(years) is str: years = [years] years = years if years else range(1995, 2012) years = [str(yy).zfill(2)[-2:] for yy in years] wiod_web_content = _get_url_datafiles( url_db_view=WIOD_CONFIG['url_db_view'], url_db_content=WIOD_CONFIG['url_db_content'], mrio_regex='protected.*?wiot\d\d.*?xlsx') restricted_wiod_io_urls = [url for url in wiod_web_content.data_urls if re.search(r"(wiot)(\d\d)", os.path.basename(url)).group(2) in years] meta = MRIOMetaData(location=storage_folder, description='WIOD metadata file for pymrio', name='WIOD', system='ixi', version='data13') meta = _download_urls(url_list=restricted_wiod_io_urls + satellite_urls, storage_folder=storage_folder, overwrite_existing=overwrite_existing, meta_handler=meta) meta.save() return meta
def download_oecd(storage_folder, version='v2018', years=None, overwrite_existing=False): """ Downloads the OECD ICIO tables Parameters ---------- storage_folder: str, valid path Location to store the download, folder will be created if not existing. If the file is already present in the folder, the download of the specific file will be skipped. version: string or int, optional Two versions of the ICIO OECD tables are currently availabe: Version >v2016<: based on >SNA93< / >ISIC Rev.3< Version >v2018<: based on >SNA08< / >ISIC Rev.4< (default) Pass any of the identifiers between >< to specifiy the version to be downloaded. years: list of int (4 digit) or str, optional If years is given only downloads the specific years. overwrite_existing: boolean, optional If False, skip download of file already existing in the storage folder (default). Set to True to replace files. Returns ------- Meta data of the downloaded MRIOs """ # Implementation Notes: # For OECD the generic download routines can not be used # b/c the 2018 version is coded as aspx fileview property # in the html source - instead a hardcoded dict is used # to select the url for download try: os.makedirs(storage_folder) except FileExistsError: pass if type(version) is int: version = str(version) if ('8' in version) or ('4' in version): version = 'v2018' elif ('3' in version) or ('6' in version): version = 'v2016' else: raise ValueError('Version not understood') if type(years) is int or type(years) is str: years = [years] if not years: if version == 'v2018': years = range(2005, 2016) else: years = range(1995, 2012) years = [str(yy) for yy in years] meta = MRIOMetaData(location=storage_folder, description='OECD-ICIO download', name='OECD-ICIO', system='IxI', version=version) oecd_webcontent = requests.get(OECD_CONFIG['url_db_view']).text for yy in years: if yy not in OECD_CONFIG['datafiles'][version].keys(): raise ValueError( 'Datafile for {} not specified or available.'.format(yy)) if version == 'v2016': url_to_check = os.path.basename( OECD_CONFIG['datafiles'][version][yy]) else: url_to_check = OECD_CONFIG['datafiles'][version][yy] if url_to_check not in oecd_webcontent: raise ValueError( 'Specified datafile for {} () not found in the current' 'OECD ICIO webpage.\n' 'Perhaps filenames have been changed - update OECD_CONFIG ' 'to the new filenames'.format(yy, url_to_check)) filename = 'ICIO' + version.lstrip('v') + '_' + yy + '.zip' storage_file = os.path.join(storage_folder, filename) req = requests.get(OECD_CONFIG['datafiles'][version][yy], stream=True) with open(storage_file, 'wb') as lf: for chunk in req.iter_content(1024 * 5): lf.write(chunk) meta._add_fileio('Downloaded {} to {}'.format( OECD_CONFIG['datafiles'][version][yy], filename)) meta.save() return meta
def load(path, include_core=True, path_in_arc=''): """ Loads a IOSystem or Extension previously saved with pymrio This function can be used to load a IOSystem or Extension specified in a metadata file (as defined in DEFAULT_FILE_NAMES['filepara']: metadata.json) DataFrames (tables) are loaded from text or binary pickle files. For the latter, the extension .pkl or .pickle is assumed, in all other case the tables are assumed to be in .txt format. Parameters ---------- path : pathlib.Path or string Path or path with para file name for the data to load. This must either point to the directory containing the uncompressed data or the location of a compressed zip file with the data. In the later case the parameter 'path_in_arc' need to be specific to further indicate the location of the data in the compressed file. include_core : boolean, optional If False the load method does not include A, L and Z matrix. This significantly reduces the required memory if the purpose is only to analyse the results calculated beforehand. path_in_arc: string, optional Path to the data in the zip file (where the fileparameters file is located). path_in_arc must be given without leading dot and slash; thus to point to the data in the root of the compressed file pass '', for data in e.g. the folder 'emissions' pass 'emissions/'. Only used if parameter 'path' points to an compressed zip file. Returns ------- IOSystem or Extension class depending on systemtype in the json file None in case of errors """ path = Path(path) if not path.exists(): raise ReadError('Given path does not exist') file_para = get_file_para(path=path, path_in_arc=path_in_arc) if file_para.content['systemtype'] == GENERIC_NAMES['iosys']: if zipfile.is_zipfile(str(path)): ret_system = IOSystem(meta=MRIOMetaData( location=path, path_in_arc=os.path.join(file_para.folder, DEFAULT_FILE_NAMES['metadata']))) ret_system.meta._add_fileio( "Loaded IO system from {} - {}".format(path, path_in_arc)) else: ret_system = IOSystem(meta=MRIOMetaData( location=path / DEFAULT_FILE_NAMES['metadata'])) ret_system.meta._add_fileio( "Loaded IO system from {}".format(path)) elif file_para.content['systemtype'] == GENERIC_NAMES['ext']: ret_system = Extension(file_para.content['name']) else: raise ReadError('Type of system no defined in the file parameters') return None for key in file_para.content['files']: if not include_core and key not in ['A', 'L', 'Z']: continue file_name = file_para.content['files'][key]['name'] nr_index_col = file_para.content['files'][key]['nr_index_col'] nr_header = file_para.content['files'][key]['nr_header'] _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) _index_col = 0 if _index_col == [0] else _index_col _header = 0 if _header == [0] else _header if key == 'FY': # Legacy code to read data saved with version < 0.4 key = 'F_Y' if zipfile.is_zipfile(str(path)): full_file_name = os.path.join(file_para.folder, file_name) logging.info('Load data from {}'.format(full_file_name)) with zipfile.ZipFile(file=str(path)) as zf: if (os.path.splitext(str(full_file_name))[1] == '.pkl' or os.path.splitext(str(full_file_name))[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(zf.open(full_file_name))) else: setattr(ret_system, key, pd.read_csv(zf.open(full_file_name), index_col=_index_col, header=_header, sep='\t')) else: full_file_name = path / file_name logging.info('Load data from {}'.format(full_file_name)) if (os.path.splitext(str(full_file_name))[1] == '.pkl' or os.path.splitext(str(full_file_name))[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(full_file_name)) else: setattr(ret_system, key, pd.read_csv(full_file_name, index_col=_index_col, header=_header, sep='\t')) return ret_system
def load_test(): """ Returns a small test MRIO The test system contains: - six regions, - seven sectors, - seven final demand categories - two extensions (emissions and factor_inputs) The test system only contains Z, Y, F, F_Y. The rest can be calculated with calc_all() Notes ----- For development: This function can be used as an example of how to parse an IOSystem Returns ------- IOSystem """ # row_header: # number of rows containing header on the top of the file (for the # columns) # col_header: # number of cols containing header on the beginning of the file (for the # rows) # row and columns header contain also the row for the units, this are # afterwards safed as a extra dataframe # # unit_col: column containing the unit for the table file_data = collections.namedtuple( 'file_data', ['file_name', 'row_header', 'col_header', 'unit_col']) # file names and header specs of the system test_system = dict( Z=file_data(file_name='trade_flows_Z.txt', row_header=2, col_header=3, unit_col=2), Y=file_data(file_name='finald_demand_Y.txt', row_header=2, col_header=3, unit_col=2), fac=file_data(file_name='factor_input.txt', row_header=2, col_header=2, unit_col=1), emissions=file_data(file_name='emissions.txt', row_header=2, col_header=3, unit_col=2), FDemissions=file_data(file_name='FDemissions.txt', row_header=2, col_header=3, unit_col=2), ) meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio']) # read the data into a dicts as pandas.DataFrame data = {key: pd.read_csv( os.path.join(PYMRIO_PATH['test_mrio'], test_system[key].file_name), index_col=list(range(test_system[key].col_header)), header=list(range(test_system[key].row_header)), sep='\t') for key in test_system} meta_rec._add_fileio('Load test_mrio from {}'.format( PYMRIO_PATH['test_mrio'])) # distribute the data into dics which can be passed to # the IOSystem. To do so, some preps are necessary: # - name the header data # - save unit in own dataframe and drop unit from the tables trade = dict(Z=data['Z'], Y=data['Y']) factor_inputs = dict(F=data['fac']) emissions = dict(F=data['emissions'], F_Y=data['FDemissions']) trade['Z'].index.names = ['region', 'sector', 'unit'] trade['Z'].columns.names = ['region', 'sector'] trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0] .reset_index(level='unit').unit)) trade['Z'].reset_index(level='unit', drop=True, inplace=True) trade['Y'].index.names = ['region', 'sector', 'unit'] trade['Y'].columns.names = ['region', 'category'] trade['Y'].reset_index(level='unit', drop=True, inplace=True) factor_inputs['name'] = 'Factor Inputs' factor_inputs['F'].index.names = ['inputtype', 'unit', ] factor_inputs['F'].columns.names = ['region', 'sector'] factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0] .reset_index(level='unit').unit)) factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True) emissions['name'] = 'Emissions' emissions['F'].index.names = ['stressor', 'compartment', 'unit', ] emissions['F'].columns.names = ['region', 'sector'] emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0] .reset_index(level='unit').unit)) emissions['F'].reset_index(level='unit', drop=True, inplace=True) emissions['F_Y'].index.names = ['stressor', 'compartment', 'unit'] emissions['F_Y'].columns.names = ['region', 'category'] emissions['F_Y'].reset_index(level='unit', drop=True, inplace=True) # the population data - this is optional (None can be passed if no data is # available) popdata = pd.read_csv( os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'), index_col=0, sep='\t').astype(float) return IOSystem(Z=data['Z'], Y=data['Y'], unit=trade['unit'], meta=meta_rec, factor_inputs=factor_inputs, emissions=emissions, population=popdata)
def download_eora26(storage_folder, years=None, prices=['bp'], overwrite_existing=False): """ Downloads Eora 26 Parameters ---------- storage_folder: str, valid path Location to store the download, folder will be created if not existing. If the file is already present in the folder, the download of the specific file will be skipped. years: list of int or str, optional If years is given only downloads the specific years. This only applies to the IO tables because extensions are stored by country and not per year. The years can be given in 2 or 4 digits. prices: list of str If bp (default), download basic price tables. If pp, download purchaser prices. ['bp', 'pp'] possible. overwrite_existing: boolean, optional If False, skip download of file already existing in the storage folder (default). Set to True to replace files. """ try: os.makedirs(storage_folder) except FileExistsError: pass print("The Eora MRIO is free for academic (university or grant-funded) " "work at degree-granting institutions. " "All other uses require a data license before the " "results are shared.\n\n " "When using Eora, the Eora authors ask you cite " "these publications: \n\n " "Lenzen, M., Kanemoto, K., Moran, D., Geschke, A. " "Mapping the Structure of the World Economy (2012). " "Env. Sci. Tech. 46(15) pp 8374-8381. DOI:10.1021/es300171x \n\n " "Lenzen, M., Moran, D., Kanemoto, K., Geschke, A. (2013) " "Building Eora: A Global Multi-regional Input-Output Database " "at High Country and Sector Resolution, Economic Systems Research, " " 25:1, 20-49, DOI:10.1080/09535314.2013.769 938\n\n ") agree = input("Do you agree with these conditions [y/n]: ") if agree.lower() != 'y': raise ValueError("Download of Eora not possible") if type(years) is int or type(years) is str: years = [years] years = years if years else range(1995, 2012) years = [str(yy).zfill(4) for yy in years] if type(prices) is str: prices = [prices] eora_cookie_str = requests.post( EORA26_CONFIG['url_db_content'], data={'licenseagree': 'true'} ).headers['Set-Cookie'] _cookie_content = eora_cookie_str.split(';')[0].split('=') eora_access_cookie = {_cookie_content[0]: _cookie_content[1]} eora26_web_content = _get_url_datafiles( url_db_view=EORA26_CONFIG['url_db_view'], url_db_content=EORA26_CONFIG['url_db_content'], mrio_regex='Computations.*?Eora26_\d\d\d\d_.*?.zip', access_cookie=eora_access_cookie) version_number = re.findall(">v\d+\.\d+<", eora26_web_content.raw_text)[-1][1:-1] restricted_eora_urls = [url for url in eora26_web_content.data_urls if re.search(r"(Eora26_)(\d\d\d\d)", os.path.basename(url)).group(2) in years and re.search(r"(Eora26_\d\d\d\d_)(..)", os.path.basename(url)).group(2) in prices ] meta = MRIOMetaData(location=storage_folder, description='Eora metadata file for pymrio', name='Eora', system='ixi', version=version_number) meta = _download_urls(url_list=restricted_eora_urls, storage_folder=storage_folder, overwrite_existing=overwrite_existing, meta_handler=meta, access_cookie=eora_access_cookie) # phase=re.findall('v\d+\.', version_number)[0][1:-1] # loop=re.findall('\.\d+', version_number)[0][1:] # if len(loop) == 2: loop = '0' + loop meta.save() return meta
def load(path, include_core=True): """ Loads a IOSystem or Extension previously saved with pymrio This function can be used to load a IOSystem or Extension specified in a ini file. DataFrames (tables) are loaded from text or binary pickle files. For the latter, the extension .pkl or .pickle is assumed, in all other case the tables are assumed to be in .txt format. Parameters ---------- path : string path or ini file name for the data to load include_core : boolean, optional If False the load method does not include A, L and Z matrix. This significantly reduces the required memory if the purpose is only to analyse the results calculated beforehand. Returns ------- IOSystem or Extension class depending on systemtype in the json file None in case of errors """ path = path.rstrip('\\') path = os.path.abspath(path) if not os.path.exists(path): raise ReadError('Given path does not exist') return None para_file_path = os.path.join(path, DEFAULT_FILE_NAMES['filepara']) if not os.path.isfile(para_file_path): raise ReadError('No file parameter file found') return None with open(para_file_path, 'r') as pf: file_para = json.load(pf) if file_para['systemtype'] == GENERIC_NAMES['iosys']: meta_file_path = os.path.join(path, DEFAULT_FILE_NAMES['metadata']) ret_system = IOSystem(meta=MRIOMetaData(location=meta_file_path)) ret_system.meta._add_fileio("Loaded IO system from {}".format(path)) elif file_para['systemtype'] == GENERIC_NAMES['ext']: ret_system = Extension(file_para['name']) else: raise ReadError('Type of system no defined in the file parameters') return None for key in file_para['files']: if not include_core: if key in ['A', 'L', 'Z']: continue file_name = file_para['files'][key]['name'] full_file_name = os.path.join(path, file_name) nr_index_col = file_para['files'][key]['nr_index_col'] nr_header = file_para['files'][key]['nr_header'] logging.info('Load data from {}'.format(full_file_name)) _index_col = list(range(int(nr_index_col))) _header = list(range(int(nr_header))) if _index_col == [0]: _index_col = 0 if _header == [0]: _header = 0 if (os.path.splitext(full_file_name)[1] == '.pkl' or os.path.splitext(full_file_name)[1] == '.pickle'): setattr(ret_system, key, pd.read_pickle(full_file_name)) else: setattr( ret_system, key, pd.read_table(full_file_name, index_col=_index_col, header=_header)) return ret_system
def load_test(): """ Returns a small test MRIO The test system contains: - six regions, - seven sectors, - seven final demand categories - two extensions (emissions and factor_inputs) The test system only contains Z, Y, F, FY. The rest can be calculated with calc_all() Notes ----- For development: This function can be used as an example of how to parse an IOSystem Returns ------- IOSystem """ # row_header: # number of rows containing header on the top of the file (for the # columns) # col_header: # number of cols containing header on the beginning of the file (for the # rows) # row and columns header contain also the row for the units, this are # afterwards safed as a extra dataframe # # unit_col: column containing the unit for the table file_data = collections.namedtuple( 'file_data', ['file_name', 'row_header', 'col_header', 'unit_col']) # file names and header specs of the system test_system = dict( Z=file_data(file_name='trade_flows_Z.txt', row_header=2, col_header=3, unit_col=2), Y=file_data(file_name='finald_demand_Y.txt', row_header=2, col_header=3, unit_col=2), fac=file_data(file_name='factor_input.txt', row_header=2, col_header=2, unit_col=1), emissions=file_data(file_name='emissions.txt', row_header=2, col_header=3, unit_col=2), FDemissions=file_data(file_name='FDemissions.txt', row_header=2, col_header=3, unit_col=2), ) meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio']) # read the data into a dicts as pandas.DataFrame data = {key: pd.read_table( os.path.join(PYMRIO_PATH['test_mrio'], test_system[key].file_name), index_col=list(range(test_system[key].col_header)), header=list(range(test_system[key].row_header))) for key in test_system} meta_rec._add_fileio('Load test_mrio from {}'.format( PYMRIO_PATH['test_mrio'])) # distribute the data into dics which can be passed to # the IOSystem. To do so, some preps are necessary: # - name the header data # - save unit in own dataframe and drop unit from the tables trade = dict(Z=data['Z'], Y=data['Y']) factor_inputs = dict(F=data['fac']) emissions = dict(F=data['emissions'], FY=data['FDemissions']) trade['Z'].index.names = ['region', 'sector', 'unit'] trade['Z'].columns.names = ['region', 'sector'] trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0] .reset_index(level='unit').unit)) trade['Z'].reset_index(level='unit', drop=True, inplace=True) trade['Y'].index.names = ['region', 'sector', 'unit'] trade['Y'].columns.names = ['region', 'category'] trade['Y'].reset_index(level='unit', drop=True, inplace=True) factor_inputs['name'] = 'Factor Inputs' factor_inputs['F'].index.names = ['inputtype', 'unit', ] factor_inputs['F'].columns.names = ['region', 'sector'] factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0] .reset_index(level='unit').unit)) factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True) emissions['name'] = 'Emissions' emissions['F'].index.names = ['stressor', 'compartment', 'unit', ] emissions['F'].columns.names = ['region', 'sector'] emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0] .reset_index(level='unit').unit)) emissions['F'].reset_index(level='unit', drop=True, inplace=True) emissions['FY'].index.names = ['stressor', 'compartment', 'unit'] emissions['FY'].columns.names = ['region', 'category'] emissions['FY'].reset_index(level='unit', drop=True, inplace=True) # the population data - this is optional (None can be passed if no data is # available) popdata = pd.read_table( os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'), index_col=0).astype(float) return IOSystem(Z=data['Z'], Y=data['Y'], unit=trade['unit'], meta=meta_rec, factor_inputs=factor_inputs, emissions=emissions, population=popdata)
def download_exiobase3( storage_folder, years=None, system=None, overwrite_existing=False, doi="10.5281/zenodo.3583070", ): """ Downloads EXIOBASE 3 files from Zenodo Since version 3.7 EXIOBASE gets published on the Zenodo scientific data repository. This function download the lastest available version from Zenodo, for previous version the corresponding DOI (parameter 'doi') needs to specified. Version 3.7: 10.5281/zenodo.3583071 Version 3.8: 10.5281/zenodo.4277368 Parameters ---------- storage_folder: str, valid path Location to store the download, folder will be created if not existing. If the file is already present in the folder, the download of the specific file will be skipped. years: list of int or str, optional If years is given only downloads the specific years (be default all years will be downloaded). Years must be given in 4 digits. system: string or list of strings, optional 'pxp': download product by product classification 'ixi': download industry by industry classification ['ixi', 'pxp'] or None (default): download both classifications overwrite_existing: boolean, optional If False, skip download of file already existing in the storage folder (default). Set to True to replace files. doi: string, optional. The EXIOBASE DOI to be downloaded. By default that resolves to the DOI citing the latest available version. For the previous DOI see the block 'Versions' on the right hand side of https://zenodo.org/record/4277368. Returns ------- Meta data of the downloaded MRIOs """ os.makedirs(storage_folder, exist_ok=True) doi_url = "https://doi.org/" + doi EXIOBASE3_CONFIG["url_db_view"] = doi_url exio_web_content = _get_url_datafiles(**EXIOBASE3_CONFIG) file_pattern = re.compile(r"IOT_[1,2]\d\d\d_[p,i]x[p,i]\.zip") available_files = [ file_pattern.search(url).group() for url in exio_web_content.data_urls ] available_years = {filename.split("_")[1] for filename in available_files} if type(years) is int or type(years) is str: years = [years] years = years if years else list(available_years) system = system if system else ["pxp", "ixi"] if type(system) is str: system = [system] meta = MRIOMetaData( location=storage_folder, description="EXIOBASE3 metadata file for pymrio", name="EXIO3", system=",".join(system), version=doi, ) requested_urls = [] for file_specs in itertools.product(years, system): filename = list( filter( lambda x: str(file_specs[0]) in x and str(file_specs[1]) in x, available_files, )) if not filename: meta._add_fileio( "Could not find EXIOBASE 3 source file with >{}< and >{}<". format(file_specs[0], file_specs[1])) continue requested_urls += [ u for u in exio_web_content.data_urls for f in filename if f in u ] meta = _download_urls( url_list=requested_urls, storage_folder=storage_folder, overwrite_existing=overwrite_existing, meta_handler=meta, ) meta.save() return meta
def load_test(): """Returns a small test MRIO The test system contains: - six regions, - seven sectors, - seven final demand categories - two extensions (emissions and factor_inputs) The test system only contains Z, Y, F, F_Y. The rest can be calculated with calc_all() Notes ----- For development: This function can be used as an example of how to parse an IOSystem Returns ------- IOSystem """ # row_header: # number of rows containing header on the top of the file (for the # columns) # col_header: # number of cols containing header on the beginning of the file (for the # rows) # row and columns header contain also the row for the units, this are # afterwards safed as a extra dataframe # # unit_col: column containing the unit for the table file_data = collections.namedtuple( "file_data", ["file_name", "row_header", "col_header", "unit_col"]) # file names and header specs of the system test_system = dict( Z=file_data(file_name="trade_flows_Z.txt", row_header=2, col_header=3, unit_col=2), Y=file_data(file_name="finald_demand_Y.txt", row_header=2, col_header=3, unit_col=2), fac=file_data(file_name="factor_input.txt", row_header=2, col_header=2, unit_col=1), emissions=file_data(file_name="emissions.txt", row_header=2, col_header=3, unit_col=2), FDemissions=file_data(file_name="FDemissions.txt", row_header=2, col_header=3, unit_col=2), ) meta_rec = MRIOMetaData(location=PYMRIO_PATH["test_mrio"]) # read the data into a dicts as pandas.DataFrame data = { key: pd.read_csv( os.path.join(PYMRIO_PATH["test_mrio"], test_system[key].file_name), index_col=list(range(test_system[key].col_header)), header=list(range(test_system[key].row_header)), sep="\t", ) for key in test_system } meta_rec._add_fileio("Load test_mrio from {}".format( PYMRIO_PATH["test_mrio"])) # distribute the data into dics which can be passed to # the IOSystem. To do so, some preps are necessary: # - name the header data # - save unit in own dataframe and drop unit from the tables trade = dict(Z=data["Z"], Y=data["Y"]) factor_inputs = dict(F=data["fac"]) emissions = dict(F=data["emissions"], F_Y=data["FDemissions"]) trade["Z"].index.names = ["region", "sector", "unit"] trade["Z"].columns.names = ["region", "sector"] trade["unit"] = pd.DataFrame( trade["Z"].iloc[:, 0].reset_index(level="unit").unit) trade["Z"].reset_index(level="unit", drop=True, inplace=True) trade["Y"].index.names = ["region", "sector", "unit"] trade["Y"].columns.names = ["region", "category"] trade["Y"].reset_index(level="unit", drop=True, inplace=True) factor_inputs["name"] = "Factor Inputs" factor_inputs["F"].index.names = [ "inputtype", "unit", ] factor_inputs["F"].columns.names = ["region", "sector"] factor_inputs["unit"] = pd.DataFrame( factor_inputs["F"].iloc[:, 0].reset_index(level="unit").unit) factor_inputs["F"].reset_index(level="unit", drop=True, inplace=True) emissions["name"] = "Emissions" emissions["F"].index.names = [ "stressor", "compartment", "unit", ] emissions["F"].columns.names = ["region", "sector"] emissions["unit"] = pd.DataFrame( emissions["F"].iloc[:, 0].reset_index(level="unit").unit) emissions["F"].reset_index(level="unit", drop=True, inplace=True) emissions["F_Y"].index.names = ["stressor", "compartment", "unit"] emissions["F_Y"].columns.names = ["region", "category"] emissions["F_Y"].reset_index(level="unit", drop=True, inplace=True) # the population data - this is optional (None can be passed if no data is # available) popdata = pd.read_csv( os.path.join(PYMRIO_PATH["test_mrio"], "./population.txt"), index_col=0, sep="\t", ).astype(float) return IOSystem( Z=data["Z"], Y=data["Y"], unit=trade["unit"], meta=meta_rec, factor_inputs=factor_inputs, emissions=emissions, population=popdata, )