Пример #1
0
def download_wiod2013(storage_folder,
                      years=None,
                      overwrite_existing=False,
                      satellite_urls=WIOD_CONFIG['satellite_urls']):
    """ Downloads the 2013 wiod release

    Note
    ----
    Currently, pymrio only works with the 2013 release of the wiod tables. The
    more recent 2016 release so far (October 2017) lacks the environmental and
    social extensions.


    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.


    years: list of int or str, optional
        If years is given only downloads the specific years. This
        only applies to the IO tables because extensions are stored
        by country and not per year.
        The years can be given in 2 or 4 digits.

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    satellite_urls : list of str (urls), optional
        Which satellite accounts to download.  Default: satellite urls defined
        in WIOD_CONFIG - list of all available urls Remove items from this list
        to only download a subset of extensions

    Returns
    -------

    Meta data of the downloaded MRIOs

    """

    try:
        os.makedirs(storage_folder)
    except FileExistsError:
        pass

    if type(years) is int or type(years) is str:
        years = [years]
    years = years if years else range(1995, 2012)
    years = [str(yy).zfill(2)[-2:] for yy in years]

    wiod_web_content = _get_url_datafiles(
        url_db_view=WIOD_CONFIG['url_db_view'],
        url_db_content=WIOD_CONFIG['url_db_content'],
        mrio_regex=WIOD_CONFIG['mrio_regex'])

    restricted_wiod_io_urls = [
        url for url in wiod_web_content.data_urls
        if re.search(r"(wiot)(\d\d)", os.path.basename(url)).group(2) in years
    ]

    meta = MRIOMetaData(location=storage_folder,
                        description='WIOD metadata file for pymrio',
                        name='WIOD',
                        system='IxI',
                        version='data13')

    meta = _download_urls(url_list=restricted_wiod_io_urls + satellite_urls,
                          storage_folder=storage_folder,
                          overwrite_existing=overwrite_existing,
                          meta_handler=meta)

    meta.save()
    return meta
Пример #2
0
def download_wiod2013(storage_folder, years=None, overwrite_existing=False,
                      satellite_urls=WIOD_CONFIG['satellite_urls']):
    """ Downloads the 2013 wiod release

    Note
    ----
    Currently, pymrio only works with the 2013 release of the wiod tables. The
    more recent 2016 release so far (October 2017) lacks the environmental and
    social extensions.


    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.


    years: list of int or str, optional
        If years is given only downloads the specific years. This
        only applies to the IO tables because extensions are stored
        by country and not per year.
        The years can be given in 2 or 4 digits.

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    satellite_urls : list of str (urls), optional
        Which satellite accounts to download.  Default: satellite urls defined
        in WIOD_CONFIG - list of all available urls Remove items from this list
        to only download a subset of extensions

    """

    try:
        os.makedirs(storage_folder)
    except FileExistsError:
        pass

    if type(years) is int or type(years) is str:
        years = [years]
    years = years if years else range(1995, 2012)
    years = [str(yy).zfill(2)[-2:] for yy in years]

    wiod_web_content = _get_url_datafiles(
        url_db_view=WIOD_CONFIG['url_db_view'],
        url_db_content=WIOD_CONFIG['url_db_content'],
        mrio_regex='protected.*?wiot\d\d.*?xlsx')

    restricted_wiod_io_urls = [url for url in wiod_web_content.data_urls if
                               re.search(r"(wiot)(\d\d)",
                                         os.path.basename(url)).group(2)
                               in years]

    meta = MRIOMetaData(location=storage_folder,
                        description='WIOD metadata file for pymrio',
                        name='WIOD',
                        system='ixi',
                        version='data13')

    meta = _download_urls(url_list=restricted_wiod_io_urls + satellite_urls,
                          storage_folder=storage_folder,
                          overwrite_existing=overwrite_existing,
                          meta_handler=meta)

    meta.save()
    return meta
Пример #3
0
def download_oecd(storage_folder,
                  version='v2018',
                  years=None,
                  overwrite_existing=False):
    """ Downloads the OECD ICIO tables

    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.

    version: string or int, optional
        Two versions of the ICIO OECD tables are currently availabe:
        Version >v2016<: based on >SNA93< / >ISIC Rev.3<
        Version >v2018<: based on >SNA08< / >ISIC Rev.4< (default)
        Pass any of the identifiers between >< to specifiy the
        version to be downloaded.

    years: list of int (4 digit) or str, optional
        If years is given only downloads the specific years.

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    Returns
    -------

    Meta data of the downloaded MRIOs

    """
    # Implementation Notes:
    # For OECD the generic download routines can not be used
    # b/c the 2018 version is coded as aspx fileview property
    # in the html source - instead a hardcoded dict is used
    # to select the url for download

    try:
        os.makedirs(storage_folder)
    except FileExistsError:
        pass

    if type(version) is int:
        version = str(version)

    if ('8' in version) or ('4' in version):
        version = 'v2018'
    elif ('3' in version) or ('6' in version):
        version = 'v2016'
    else:
        raise ValueError('Version not understood')

    if type(years) is int or type(years) is str:
        years = [years]
    if not years:
        if version == 'v2018':
            years = range(2005, 2016)
        else:
            years = range(1995, 2012)
    years = [str(yy) for yy in years]

    meta = MRIOMetaData(location=storage_folder,
                        description='OECD-ICIO download',
                        name='OECD-ICIO',
                        system='IxI',
                        version=version)

    oecd_webcontent = requests.get(OECD_CONFIG['url_db_view']).text
    for yy in years:
        if yy not in OECD_CONFIG['datafiles'][version].keys():
            raise ValueError(
                'Datafile for {} not specified or available.'.format(yy))
        if version == 'v2016':
            url_to_check = os.path.basename(
                OECD_CONFIG['datafiles'][version][yy])
        else:
            url_to_check = OECD_CONFIG['datafiles'][version][yy]
        if url_to_check not in oecd_webcontent:
            raise ValueError(
                'Specified datafile for {} () not found in the current'
                'OECD ICIO webpage.\n'
                'Perhaps filenames have been changed - update OECD_CONFIG '
                'to the new filenames'.format(yy, url_to_check))

        filename = 'ICIO' + version.lstrip('v') + '_' + yy + '.zip'
        storage_file = os.path.join(storage_folder, filename)
        req = requests.get(OECD_CONFIG['datafiles'][version][yy], stream=True)
        with open(storage_file, 'wb') as lf:
            for chunk in req.iter_content(1024 * 5):
                lf.write(chunk)

        meta._add_fileio('Downloaded {} to {}'.format(
            OECD_CONFIG['datafiles'][version][yy], filename))

    meta.save()
    return meta
Пример #4
0
def load(path, include_core=True, path_in_arc=''):
    """ Loads a IOSystem or Extension previously saved with pymrio

    This function can be used to load a IOSystem or Extension specified in a
    metadata file (as defined in DEFAULT_FILE_NAMES['filepara']: metadata.json)

    DataFrames (tables) are loaded from text or binary pickle files.
    For the latter, the extension .pkl or .pickle is assumed, in all other case
    the tables are assumed to be in .txt format.

    Parameters
    ----------
    path : pathlib.Path or string
        Path or path with para file name for the data to load. This must
        either point to the directory containing the uncompressed data or
        the location of a compressed zip file with the data. In the
        later case the parameter 'path_in_arc' need to be specific to
        further indicate the location of the data in the compressed file.

    include_core : boolean, optional
        If False the load method does not include A, L and Z matrix. This
        significantly reduces the required memory if the purpose is only
        to analyse the results calculated beforehand.

    path_in_arc: string, optional
        Path to the data in the zip file (where the fileparameters file is
        located). path_in_arc must be given without leading dot and slash;
        thus to point to the data in the root of the compressed file pass '',
        for data in e.g. the folder 'emissions' pass 'emissions/'.  Only used
        if parameter 'path' points to an compressed zip file.

    Returns
    -------

        IOSystem or Extension class depending on systemtype in the json file
        None in case of errors

    """
    path = Path(path)

    if not path.exists():
        raise ReadError('Given path does not exist')

    file_para = get_file_para(path=path, path_in_arc=path_in_arc)

    if file_para.content['systemtype'] == GENERIC_NAMES['iosys']:
        if zipfile.is_zipfile(str(path)):
            ret_system = IOSystem(meta=MRIOMetaData(
                location=path,
                path_in_arc=os.path.join(file_para.folder,
                                         DEFAULT_FILE_NAMES['metadata'])))
            ret_system.meta._add_fileio(
                "Loaded IO system from {} - {}".format(path, path_in_arc))
        else:
            ret_system = IOSystem(meta=MRIOMetaData(
                location=path / DEFAULT_FILE_NAMES['metadata']))
            ret_system.meta._add_fileio(
                "Loaded IO system from {}".format(path))

    elif file_para.content['systemtype'] == GENERIC_NAMES['ext']:
        ret_system = Extension(file_para.content['name'])

    else:
        raise ReadError('Type of system no defined in the file parameters')
        return None

    for key in file_para.content['files']:
        if not include_core and key not in ['A', 'L', 'Z']:
            continue

        file_name = file_para.content['files'][key]['name']
        nr_index_col = file_para.content['files'][key]['nr_index_col']
        nr_header = file_para.content['files'][key]['nr_header']
        _index_col = list(range(int(nr_index_col)))
        _header = list(range(int(nr_header)))
        _index_col = 0 if _index_col == [0] else _index_col
        _header = 0 if _header == [0] else _header

        if key == 'FY':  # Legacy code to read data saved with version < 0.4
            key = 'F_Y'

        if zipfile.is_zipfile(str(path)):
            full_file_name = os.path.join(file_para.folder, file_name)
            logging.info('Load data from {}'.format(full_file_name))

            with zipfile.ZipFile(file=str(path)) as zf:
                if (os.path.splitext(str(full_file_name))[1] == '.pkl' or
                        os.path.splitext(str(full_file_name))[1] == '.pickle'):
                    setattr(ret_system, key,
                            pd.read_pickle(zf.open(full_file_name)))
                else:
                    setattr(ret_system, key,
                            pd.read_csv(zf.open(full_file_name),
                                        index_col=_index_col,
                                        header=_header, sep='\t'))
        else:
            full_file_name = path / file_name
            logging.info('Load data from {}'.format(full_file_name))

            if (os.path.splitext(str(full_file_name))[1] == '.pkl' or
                    os.path.splitext(str(full_file_name))[1] == '.pickle'):
                setattr(ret_system, key,
                        pd.read_pickle(full_file_name))
            else:
                setattr(ret_system, key,
                        pd.read_csv(full_file_name,
                                    index_col=_index_col,
                                    header=_header,
                                    sep='\t'))
    return ret_system
Пример #5
0
def load_test():
    """ Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
            'file_data', ['file_name', 'row_header', 'col_header', 'unit_col'])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name='trade_flows_Z.txt',
                    row_header=2, col_header=3, unit_col=2),
        Y=file_data(file_name='finald_demand_Y.txt',
                    row_header=2, col_header=3, unit_col=2),
        fac=file_data(file_name='factor_input.txt',
                      row_header=2, col_header=2, unit_col=1),
        emissions=file_data(file_name='emissions.txt',
                            row_header=2, col_header=3, unit_col=2),
        FDemissions=file_data(file_name='FDemissions.txt',
                              row_header=2, col_header=3, unit_col=2),
        )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio'])

    # read the data into a dicts as pandas.DataFrame
    data = {key: pd.read_csv(
                 os.path.join(PYMRIO_PATH['test_mrio'],
                              test_system[key].file_name),
                 index_col=list(range(test_system[key].col_header)),
                 header=list(range(test_system[key].row_header)),
                 sep='\t')
            for key in test_system}

    meta_rec._add_fileio('Load test_mrio from {}'.format(
        PYMRIO_PATH['test_mrio']))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data['Z'], Y=data['Y'])
    factor_inputs = dict(F=data['fac'])
    emissions = dict(F=data['emissions'], F_Y=data['FDemissions'])

    trade['Z'].index.names = ['region', 'sector', 'unit']
    trade['Z'].columns.names = ['region', 'sector']
    trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0]
                     .reset_index(level='unit').unit))
    trade['Z'].reset_index(level='unit', drop=True, inplace=True)

    trade['Y'].index.names = ['region', 'sector', 'unit']
    trade['Y'].columns.names = ['region', 'category']
    trade['Y'].reset_index(level='unit', drop=True, inplace=True)

    factor_inputs['name'] = 'Factor Inputs'
    factor_inputs['F'].index.names = ['inputtype', 'unit', ]
    factor_inputs['F'].columns.names = ['region', 'sector']
    factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0]
                             .reset_index(level='unit').unit))
    factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True)

    emissions['name'] = 'Emissions'
    emissions['F'].index.names = ['stressor', 'compartment', 'unit', ]
    emissions['F'].columns.names = ['region', 'sector']
    emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0]
                         .reset_index(level='unit').unit))
    emissions['F'].reset_index(level='unit', drop=True, inplace=True)
    emissions['F_Y'].index.names = ['stressor', 'compartment', 'unit']
    emissions['F_Y'].columns.names = ['region', 'category']
    emissions['F_Y'].reset_index(level='unit', drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
            os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'),
            index_col=0, sep='\t').astype(float)

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=trade['unit'],
                    meta=meta_rec,
                    factor_inputs=factor_inputs,
                    emissions=emissions,
                    population=popdata)
Пример #6
0
def download_eora26(storage_folder, years=None, prices=['bp'],
                    overwrite_existing=False):
    """ Downloads Eora 26

    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.


    years: list of int or str, optional
        If years is given only downloads the specific years. This
        only applies to the IO tables because extensions are stored
        by country and not per year.
        The years can be given in 2 or 4 digits.

    prices: list of str
        If bp (default), download basic price tables.
        If pp, download purchaser prices. ['bp', 'pp'] possible.

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    """
    try:
        os.makedirs(storage_folder)
    except FileExistsError:
        pass

    print("The Eora MRIO is free for academic (university or grant-funded) "
          "work at degree-granting institutions. "
          "All other uses require a data license before the "
          "results are shared.\n\n "
          "When using Eora, the Eora authors ask you cite "
          "these publications: \n\n "
          "Lenzen, M., Kanemoto, K., Moran, D., Geschke, A. "
          "Mapping the Structure of the World Economy (2012). "
          "Env. Sci. Tech. 46(15) pp 8374-8381. DOI:10.1021/es300171x \n\n "
          "Lenzen, M., Moran, D., Kanemoto, K., Geschke, A. (2013) "
          "Building Eora: A Global Multi-regional Input-Output Database "
          "at High Country and Sector Resolution, Economic Systems Research, "
          " 25:1, 20-49, DOI:10.1080/09535314.2013.769 938\n\n ")

    agree = input("Do you agree with these conditions [y/n]: ")

    if agree.lower() != 'y':
        raise ValueError("Download of Eora not possible")

    if type(years) is int or type(years) is str:
        years = [years]
    years = years if years else range(1995, 2012)
    years = [str(yy).zfill(4) for yy in years]

    if type(prices) is str:
        prices = [prices]

    eora_cookie_str = requests.post(
        EORA26_CONFIG['url_db_content'],
        data={'licenseagree': 'true'}
        ).headers['Set-Cookie']

    _cookie_content = eora_cookie_str.split(';')[0].split('=')
    eora_access_cookie = {_cookie_content[0]: _cookie_content[1]}

    eora26_web_content = _get_url_datafiles(
            url_db_view=EORA26_CONFIG['url_db_view'],
            url_db_content=EORA26_CONFIG['url_db_content'],
            mrio_regex='Computations.*?Eora26_\d\d\d\d_.*?.zip',
            access_cookie=eora_access_cookie)

    version_number = re.findall(">v\d+\.\d+<",
                                eora26_web_content.raw_text)[-1][1:-1]

    restricted_eora_urls = [url for url in eora26_web_content.data_urls if
                            re.search(r"(Eora26_)(\d\d\d\d)",
                                      os.path.basename(url)).group(2)
                            in years and
                            re.search(r"(Eora26_\d\d\d\d_)(..)",
                                      os.path.basename(url)).group(2)
                            in prices
                            ]

    meta = MRIOMetaData(location=storage_folder,
                        description='Eora metadata file for pymrio',
                        name='Eora',
                        system='ixi',
                        version=version_number)

    meta = _download_urls(url_list=restricted_eora_urls,
                          storage_folder=storage_folder,
                          overwrite_existing=overwrite_existing,
                          meta_handler=meta,
                          access_cookie=eora_access_cookie)

    # phase=re.findall('v\d+\.', version_number)[0][1:-1]
    # loop=re.findall('\.\d+', version_number)[0][1:]
    # if len(loop) == 2: loop = '0' + loop
    meta.save()

    return meta
Пример #7
0
def load(path, include_core=True):
    """ Loads a IOSystem or Extension previously saved with pymrio

    This function can be used to load a IOSystem or Extension specified in a
    ini file. DataFrames (tables) are loaded from text or binary pickle files.
    For the latter, the extension .pkl or .pickle is assumed, in all other case
    the tables are assumed to be in .txt format.

    Parameters
    ----------

    path : string
        path or ini file name for the data to load

    include_core : boolean, optional
        If False the load method does not include A, L and Z matrix. This
        significantly reduces the required memory if the purpose is only
        to analyse the results calculated beforehand.

    Returns
    -------

        IOSystem or Extension class depending on systemtype in the json file
        None in case of errors

    """
    path = path.rstrip('\\')
    path = os.path.abspath(path)

    if not os.path.exists(path):
        raise ReadError('Given path does not exist')
        return None

    para_file_path = os.path.join(path, DEFAULT_FILE_NAMES['filepara'])
    if not os.path.isfile(para_file_path):
        raise ReadError('No file parameter file found')
        return None

    with open(para_file_path, 'r') as pf:
        file_para = json.load(pf)

    if file_para['systemtype'] == GENERIC_NAMES['iosys']:
        meta_file_path = os.path.join(path, DEFAULT_FILE_NAMES['metadata'])
        ret_system = IOSystem(meta=MRIOMetaData(location=meta_file_path))
        ret_system.meta._add_fileio("Loaded IO system from {}".format(path))
    elif file_para['systemtype'] == GENERIC_NAMES['ext']:
        ret_system = Extension(file_para['name'])
    else:
        raise ReadError('Type of system no defined in the file parameters')
        return None

    for key in file_para['files']:
        if not include_core:
            if key in ['A', 'L', 'Z']:
                continue

        file_name = file_para['files'][key]['name']
        full_file_name = os.path.join(path, file_name)
        nr_index_col = file_para['files'][key]['nr_index_col']
        nr_header = file_para['files'][key]['nr_header']

        logging.info('Load data from {}'.format(full_file_name))

        _index_col = list(range(int(nr_index_col)))
        _header = list(range(int(nr_header)))

        if _index_col == [0]:
            _index_col = 0
        if _header == [0]:
            _header = 0

        if (os.path.splitext(full_file_name)[1] == '.pkl'
                or os.path.splitext(full_file_name)[1] == '.pickle'):
            setattr(ret_system, key, pd.read_pickle(full_file_name))
        else:
            setattr(
                ret_system, key,
                pd.read_table(full_file_name,
                              index_col=_index_col,
                              header=_header))

    return ret_system
Пример #8
0
def load_test():
    """ Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, FY. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
            'file_data', ['file_name', 'row_header', 'col_header', 'unit_col'])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name='trade_flows_Z.txt',
                    row_header=2, col_header=3, unit_col=2),
        Y=file_data(file_name='finald_demand_Y.txt',
                    row_header=2, col_header=3, unit_col=2),
        fac=file_data(file_name='factor_input.txt',
                      row_header=2, col_header=2, unit_col=1),
        emissions=file_data(file_name='emissions.txt',
                            row_header=2, col_header=3, unit_col=2),
        FDemissions=file_data(file_name='FDemissions.txt',
                              row_header=2, col_header=3, unit_col=2),
        )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH['test_mrio'])

    # read the data into a dicts as pandas.DataFrame
    data = {key: pd.read_table(
                 os.path.join(PYMRIO_PATH['test_mrio'],
                              test_system[key].file_name),
                 index_col=list(range(test_system[key].col_header)),
                 header=list(range(test_system[key].row_header)))
            for key in test_system}

    meta_rec._add_fileio('Load test_mrio from {}'.format(
        PYMRIO_PATH['test_mrio']))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data['Z'], Y=data['Y'])
    factor_inputs = dict(F=data['fac'])
    emissions = dict(F=data['emissions'], FY=data['FDemissions'])

    trade['Z'].index.names = ['region', 'sector', 'unit']
    trade['Z'].columns.names = ['region', 'sector']
    trade['unit'] = (pd.DataFrame(trade['Z'].iloc[:, 0]
                     .reset_index(level='unit').unit))
    trade['Z'].reset_index(level='unit', drop=True, inplace=True)

    trade['Y'].index.names = ['region', 'sector', 'unit']
    trade['Y'].columns.names = ['region', 'category']
    trade['Y'].reset_index(level='unit', drop=True, inplace=True)

    factor_inputs['name'] = 'Factor Inputs'
    factor_inputs['F'].index.names = ['inputtype', 'unit', ]
    factor_inputs['F'].columns.names = ['region', 'sector']
    factor_inputs['unit'] = (pd.DataFrame(factor_inputs['F'].iloc[:, 0]
                             .reset_index(level='unit').unit))
    factor_inputs['F'].reset_index(level='unit', drop=True, inplace=True)

    emissions['name'] = 'Emissions'
    emissions['F'].index.names = ['stressor', 'compartment', 'unit', ]
    emissions['F'].columns.names = ['region', 'sector']
    emissions['unit'] = (pd.DataFrame(emissions['F'].iloc[:, 0]
                         .reset_index(level='unit').unit))
    emissions['F'].reset_index(level='unit', drop=True, inplace=True)
    emissions['FY'].index.names = ['stressor', 'compartment', 'unit']
    emissions['FY'].columns.names = ['region', 'category']
    emissions['FY'].reset_index(level='unit', drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_table(
            os.path.join(PYMRIO_PATH['test_mrio'], './population.txt'),
            index_col=0).astype(float)

    return IOSystem(Z=data['Z'],
                    Y=data['Y'],
                    unit=trade['unit'],
                    meta=meta_rec,
                    factor_inputs=factor_inputs,
                    emissions=emissions,
                    population=popdata)
Пример #9
0
def download_exiobase3(
    storage_folder,
    years=None,
    system=None,
    overwrite_existing=False,
    doi="10.5281/zenodo.3583070",
):
    """
    Downloads EXIOBASE 3 files from Zenodo

    Since version 3.7 EXIOBASE gets published on the Zenodo scientific data
    repository.  This function download the lastest available version from
    Zenodo, for previous version the corresponding DOI (parameter 'doi') needs
    to specified.

    Version 3.7: 10.5281/zenodo.3583071
    Version 3.8: 10.5281/zenodo.4277368


    Parameters
    ----------
    storage_folder: str, valid path
        Location to store the download, folder will be created if
        not existing. If the file is already present in the folder,
        the download of the specific file will be skipped.


    years: list of int or str, optional
        If years is given only downloads the specific years (be default all years will be downloaded).
        Years must be given in 4 digits.

    system: string or list of strings, optional
        'pxp': download product by product classification
        'ixi': download industry by industry classification
        ['ixi', 'pxp'] or None (default): download both classifications

    overwrite_existing: boolean, optional
        If False, skip download of file already existing in
        the storage folder (default). Set to True to replace
        files.

    doi: string, optional.
        The EXIOBASE DOI to be downloaded. By default that resolves
        to the DOI citing the latest available version. For the previous DOI
        see the block 'Versions' on the right hand side of
        https://zenodo.org/record/4277368.

    Returns
    -------

    Meta data of the downloaded MRIOs

    """

    os.makedirs(storage_folder, exist_ok=True)

    doi_url = "https://doi.org/" + doi
    EXIOBASE3_CONFIG["url_db_view"] = doi_url

    exio_web_content = _get_url_datafiles(**EXIOBASE3_CONFIG)

    file_pattern = re.compile(r"IOT_[1,2]\d\d\d_[p,i]x[p,i]\.zip")
    available_files = [
        file_pattern.search(url).group() for url in exio_web_content.data_urls
    ]

    available_years = {filename.split("_")[1] for filename in available_files}
    if type(years) is int or type(years) is str:
        years = [years]
    years = years if years else list(available_years)

    system = system if system else ["pxp", "ixi"]
    if type(system) is str:
        system = [system]

    meta = MRIOMetaData(
        location=storage_folder,
        description="EXIOBASE3 metadata file for pymrio",
        name="EXIO3",
        system=",".join(system),
        version=doi,
    )

    requested_urls = []
    for file_specs in itertools.product(years, system):
        filename = list(
            filter(
                lambda x: str(file_specs[0]) in x and str(file_specs[1]) in x,
                available_files,
            ))

        if not filename:
            meta._add_fileio(
                "Could not find EXIOBASE 3 source file with >{}< and >{}<".
                format(file_specs[0], file_specs[1]))
            continue
        requested_urls += [
            u for u in exio_web_content.data_urls for f in filename if f in u
        ]

    meta = _download_urls(
        url_list=requested_urls,
        storage_folder=storage_folder,
        overwrite_existing=overwrite_existing,
        meta_handler=meta,
    )

    meta.save()
    return meta
Пример #10
0
def load_test():
    """Returns a small test MRIO

    The test system contains:

        - six regions,
        - seven sectors,
        - seven final demand categories
        - two extensions (emissions and factor_inputs)

    The test system only contains Z, Y, F, F_Y. The rest can be calculated with
    calc_all()

    Notes
    -----

        For development: This function can be used as an example of
        how to parse an IOSystem

    Returns
    -------

    IOSystem

    """

    # row_header:
    #    number of rows containing header on the top of the file (for the
    #    columns)
    # col_header:
    #    number of cols containing header on the beginning of the file (for the
    #    rows)
    # row and columns header contain also the row for the units, this are
    # afterwards safed as a extra dataframe
    #
    # unit_col: column containing the unit for the table
    file_data = collections.namedtuple(
        "file_data", ["file_name", "row_header", "col_header", "unit_col"])

    # file names and header specs of the system
    test_system = dict(
        Z=file_data(file_name="trade_flows_Z.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        Y=file_data(file_name="finald_demand_Y.txt",
                    row_header=2,
                    col_header=3,
                    unit_col=2),
        fac=file_data(file_name="factor_input.txt",
                      row_header=2,
                      col_header=2,
                      unit_col=1),
        emissions=file_data(file_name="emissions.txt",
                            row_header=2,
                            col_header=3,
                            unit_col=2),
        FDemissions=file_data(file_name="FDemissions.txt",
                              row_header=2,
                              col_header=3,
                              unit_col=2),
    )

    meta_rec = MRIOMetaData(location=PYMRIO_PATH["test_mrio"])

    # read the data into a dicts as pandas.DataFrame
    data = {
        key: pd.read_csv(
            os.path.join(PYMRIO_PATH["test_mrio"], test_system[key].file_name),
            index_col=list(range(test_system[key].col_header)),
            header=list(range(test_system[key].row_header)),
            sep="\t",
        )
        for key in test_system
    }

    meta_rec._add_fileio("Load test_mrio from {}".format(
        PYMRIO_PATH["test_mrio"]))

    # distribute the data into dics which can be passed to
    # the IOSystem. To do so, some preps are necessary:
    # - name the header data
    # - save unit in own dataframe and drop unit from the tables

    trade = dict(Z=data["Z"], Y=data["Y"])
    factor_inputs = dict(F=data["fac"])
    emissions = dict(F=data["emissions"], F_Y=data["FDemissions"])

    trade["Z"].index.names = ["region", "sector", "unit"]
    trade["Z"].columns.names = ["region", "sector"]
    trade["unit"] = pd.DataFrame(
        trade["Z"].iloc[:, 0].reset_index(level="unit").unit)
    trade["Z"].reset_index(level="unit", drop=True, inplace=True)

    trade["Y"].index.names = ["region", "sector", "unit"]
    trade["Y"].columns.names = ["region", "category"]
    trade["Y"].reset_index(level="unit", drop=True, inplace=True)

    factor_inputs["name"] = "Factor Inputs"
    factor_inputs["F"].index.names = [
        "inputtype",
        "unit",
    ]
    factor_inputs["F"].columns.names = ["region", "sector"]
    factor_inputs["unit"] = pd.DataFrame(
        factor_inputs["F"].iloc[:, 0].reset_index(level="unit").unit)
    factor_inputs["F"].reset_index(level="unit", drop=True, inplace=True)

    emissions["name"] = "Emissions"
    emissions["F"].index.names = [
        "stressor",
        "compartment",
        "unit",
    ]
    emissions["F"].columns.names = ["region", "sector"]
    emissions["unit"] = pd.DataFrame(
        emissions["F"].iloc[:, 0].reset_index(level="unit").unit)
    emissions["F"].reset_index(level="unit", drop=True, inplace=True)
    emissions["F_Y"].index.names = ["stressor", "compartment", "unit"]
    emissions["F_Y"].columns.names = ["region", "category"]
    emissions["F_Y"].reset_index(level="unit", drop=True, inplace=True)

    # the population data - this is optional (None can be passed if no data is
    # available)
    popdata = pd.read_csv(
        os.path.join(PYMRIO_PATH["test_mrio"], "./population.txt"),
        index_col=0,
        sep="\t",
    ).astype(float)

    return IOSystem(
        Z=data["Z"],
        Y=data["Y"],
        unit=trade["unit"],
        meta=meta_rec,
        factor_inputs=factor_inputs,
        emissions=emissions,
        population=popdata,
    )