示例#1
0
    def process_data(cls, reader, stream, metadata):
        index = np.arange(metadata["firstyear"], metadata["lastyear"] + 1)

        columns = {}
        data = []
        for i in range(metadata["datacolumns"]):
            column_header, column_data = cls.process_variable(stream)

            for k in column_header:
                if k not in columns:
                    columns[k] = []
                columns[k].append(column_header[k])
            data.append(column_data)

        df = pd.DataFrame(np.asarray(data).T, index=index)

        if isinstance(df.index, pd.core.indexes.numeric.Float64Index):
            df.index = df.index.to_series().round(3)

        df.index.name = "time"

        # Convert the regions to openscm regions
        columns["region"] = convert_magicc_to_openscm_regions(columns["region"])

        # Convert the variable names to openscm variables
        columns["variable"] = [
            d[4:] if d.startswith("DAT_") else d for d in columns["variable"]
        ]
        columns["variable"] = convert_magicc6_to_magicc7_variables(columns["variable"])
        columns["variable"] = convert_magicc7_to_openscm_variables(columns["variable"])

        return df, metadata, columns
示例#2
0
    def _get_initial_nml_and_data_block(self):
        data_block = self.data_block

        regions = convert_magicc_to_openscm_regions(
            data_block.columns.get_level_values("region").tolist(),
            inverse=True)
        regions = self._ensure_file_region_type_consistency(regions)
        variables = convert_magicc7_to_openscm_variables(
            data_block.columns.get_level_values("variable").tolist(),
            inverse=True)
        # trailing EMIS is incompatible, for now
        variables = [v.replace("_EMIS", "") for v in variables]
        units = convert_pint_to_fortran_safe_units(
            data_block.columns.get_level_values("unit").tolist())
        todos = data_block.columns.get_level_values("todo").tolist()

        data_block = data_block.rename(columns=str).reset_index()
        data_block.columns = [
            [self._variable_header_row_name] + variables,
            ["TODO"] + todos,
            ["UNITS"] + units,
            ["YEARS"] + regions,
        ]

        nml = Namelist()
        nml["THISFILE_SPECIFICATIONS"] = Namelist()
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_DATACOLUMNS"] = (
            len(data_block.columns) - 1  # for YEARS column
        )
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_DATAROWS"] = len(data_block)
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_FIRSTYEAR"] = int(
            np.floor(data_block.iloc[0, 0]))
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_LASTYEAR"] = int(
            np.floor(data_block.iloc[-1, 0]))

        step_length = data_block.iloc[1:, 0].values - data_block.iloc[:-1,
                                                                      0].values
        try:
            np.testing.assert_allclose(step_length, step_length[0], rtol=0.02)
            step_length = step_length[0]
            annual_steps = np.round(1 / step_length, 1)
            if annual_steps < 1:
                annual_steps = 0
            else:
                annual_steps = int(annual_steps)
        except AssertionError:
            annual_steps = 0  # irregular timesteps
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_ANNUALSTEPS"] = annual_steps

        units_unique = list(set(self._get_df_header_row("unit")))
        nml["THISFILE_SPECIFICATIONS"]["THISFILE_UNITS"] = (
            convert_pint_to_fortran_safe_units(units_unique[0])
            if len(units_unique) == 1 else "MISC")

        nml["THISFILE_SPECIFICATIONS"].update(
            self._get_dattype_regionmode(regions))

        return nml, data_block
示例#3
0
    def _get_region_order(self, data_block):
        regions = data_block.columns.get_level_values("region").tolist()
        region_order_magicc = get_region_order(regions, self._scen_7)

        region_order = convert_magicc_to_openscm_regions(region_order_magicc)
        unrecognised_regions = set(regions) - set(region_order)
        if unrecognised_regions:
            error_msg = ("Are all of your regions OpenSCM regions? I don't "
                         "recognise: {}".format(sorted(unrecognised_regions)))
            raise ValueError(error_msg)

        return region_order
示例#4
0
 def _get_region_order(self, data_block):
     try:
         regions = data_block.columns.get_level_values("region").tolist()
         region_order_magicc = get_region_order(regions, self._scen_7)
         region_order = convert_magicc_to_openscm_regions(
             region_order_magicc)
         return region_order
     except ValueError:
         abbreviations = [
             convert_magicc_to_openscm_regions(r, inverse=True)
             for r in set(regions)
         ]
         unrecognised_regions = [
             a for a in abbreviations
             if a in regions or DATA_HIERARCHY_SEPARATOR in a
         ]
         if unrecognised_regions:
             warnings.warn(
                 "Not abbreviating regions, could not find abbreviation for {}"
                 .format(unrecognised_regions))
         return regions
示例#5
0
文件: scen.py 项目: openscm/pymagicc
    def read_data_block(self):
        number_years = int(self.lines[0].strip())

        # go through datablocks until there are none left
        while True:
            ch = {}
            pos_block = self._stream.tell()
            region = convert_magicc_to_openscm_regions(self._stream.readline().strip())

            try:
                variables = self._read_data_header_line(
                    self._stream, ["Years", "Year", "YEARS", "YEAR"]
                )
            except IndexError:  # tried to get variables from empty string
                break
            except AssertionError:  # tried to get variables from a notes line
                break

            variables = convert_magicc6_to_magicc7_variables(variables)
            ch["variable"] = convert_magicc7_to_openscm_variables(
                [v + "_EMIS" for v in variables]
            )

            ch["unit"] = self._read_data_header_line(self._stream, ["Yrs", "YEARS"])

            ch = self._read_units(ch)
            ch["todo"] = ["SET"] * len(variables)
            ch["region"] = [region] * len(variables)

            region_block = StringIO()
            for i in range(number_years):
                region_block.write(self._stream.readline())
            region_block.seek(0)

            region_df = self._convert_data_block_to_df(region_block)

            try:
                df = pd.concat([region_df, df], axis="columns")
                columns = {key: ch[key] + columns[key] for key in columns}
            except NameError:
                df = region_df
                columns = ch

        self._stream.seek(pos_block)

        try:
            return df, columns
        except NameError:
            error_msg = (
                "This is unexpected, please raise an issue on "
                "https://github.com/openscm/pymagicc/issues"
            )
            raise Exception(error_msg)
示例#6
0
    def _get_column_headers_and_update_metadata(self, stream, metadata):
        if self._magicc7_style_header():
            column_headers, metadata = self._read_magicc7_style_header(
                stream, metadata)

        else:
            column_headers, metadata = self._read_magicc6_style_header(
                stream, metadata)

        column_headers["variable"] = convert_magicc7_to_openscm_variables(
            column_headers["variable"])
        column_headers["region"] = convert_magicc_to_openscm_regions(
            column_headers["region"])

        return column_headers, metadata
示例#7
0
文件: scen.py 项目: openscm/pymagicc
    def _write_header(self, output):
        header_lines = []
        header_lines.append("{}".format(len(self.data_block)))

        variables = self._get_df_header_row("variable")
        variables = convert_magicc7_to_openscm_variables(variables, inverse=True)
        variables = _strip_emis_variables(variables)

        regions = self._get_df_header_row("region")
        regions = convert_magicc_to_openscm_regions(regions, inverse=True)
        regions = self._ensure_file_region_type_consistency(regions)

        special_scen_code = get_special_scen_code(regions=regions, emissions=variables)

        header_lines.append("{}".format(special_scen_code))

        # for a scen file, the convention is (although all these lines are
        # actually ignored by source so could be anything):
        # - line 3 is name
        # - line 4 is description
        # - line 5 is notes (other notes lines go at the end)
        # - line 6 is empty
        header_lines.append("name: {}".format(self.minput["scenario"].unique()[0]))
        header_lines.append(
            "description: {}".format(
                self.minput.metadata.pop(
                    "description", "metadata['description'] is written here"
                )
            )
        )
        header_lines.append(
            "notes: {}".format(
                self.minput.metadata.pop("notes", "metadata['notes'] is written here")
            )
        )
        header_lines.append("")

        try:
            header_lines.append(self.minput.metadata.pop("header"))
        except KeyError:
            pass
        for k, v in self.minput.metadata.items():
            header_lines.append("{}: {}".format(k, v))

        output.write(self._newline_char.join(header_lines))
        output.write(self._newline_char)

        return output
示例#8
0
def _get_dattype_regionmode_regions_row(regions, scen7=False):
    regions_unique = set([
        convert_magicc_to_openscm_regions(r, inverse=True)
        for r in set(regions)
    ])

    def find_region(x):
        return set(x) == regions_unique

    region_rows = DATTYPE_REGIONMODE_REGIONS["regions"].apply(find_region)

    scen7_rows = DATTYPE_REGIONMODE_REGIONS["thisfile_dattype"] == "SCEN7"
    dattype_rows = scen7_rows if scen7 else ~scen7_rows

    region_dattype_row = region_rows & dattype_rows
    if sum(region_dattype_row) != 1:
        error_msg = (
            "Unrecognised regions, they must be part of "
            "pymagicc.definitions.DATTYPE_REGIONMODE_REGIONS. If that doesn't make "
            "sense, please raise an issue at "
            "https://github.com/openscm/pymagicc/issues")
        raise ValueError(error_msg)

    return region_dattype_row
示例#9
0
def test_convert_openscm_to_magicc_regions_one_way(magicc7, openscm):
    assert convert_magicc_to_openscm_regions(magicc7, inverse=False) == openscm
示例#10
0
def test_convert_openscm_to_magicc_regions(magicc7, openscm):
    assert convert_magicc_to_openscm_regions(magicc7, inverse=False) == openscm
    assert convert_magicc_to_openscm_regions(openscm, inverse=True) == magicc7
示例#11
0
    def _convert_compact_table_to_df_metadata_column_headers(
            self, compact_table):
        ts_cols = [c for c in compact_table if "__" in c]
        para_cols = [c for c in compact_table if "__" not in c]

        ts = compact_table[ts_cols]
        ts = ts.T

        def sort_ts_ids(inid):
            variable, region, year = inid.split("__")
            variable = variable.replace("DAT_", "")

            return {"variable": variable, "region": region, "year": year}

        ts["variable"] = ts.index.map(
            lambda x: convert_magicc7_to_openscm_variables(
                x.split("__")[0].replace("DAT_", "")))
        ts["region"] = ts.index.map(
            lambda x: convert_magicc_to_openscm_regions(x.split("__")[1]))

        ts["year"] = ts.index.map(lambda x: x.split("__")[2])
        # Make sure all the year strings are four characters long. Not the best test,
        # but as good as we can do for now.
        if not (ts["year"].apply(len)
                == 4).all():  # pragma: no cover # safety valve
            raise NotImplementedError("Non-annual data not yet supported")

        ts["year"] = ts["year"].astype(int)

        ts = ts.reset_index(drop=True)

        id_cols = {"variable", "region", "year"}
        run_cols = set(ts.columns) - id_cols
        ts = ts.melt(value_vars=run_cols, var_name="run_id", id_vars=id_cols)

        ts["unit"] = "unknown"

        new_index = list(set(ts.columns) - {"value"})
        ts = ts.set_index(new_index)["value"].unstack("year")

        paras = compact_table[para_cols]
        paras.index.name = "run_id"

        cols_to_merge = find_parameter_groups(paras.columns.tolist())

        paras_clean = paras.copy()
        # Aggregate the columns
        for new_col, components in cols_to_merge.items():
            components = sorted(components)
            paras_clean.loc[:,
                            new_col] = tuple(paras[components].values.tolist())
            paras_clean = paras_clean.drop(columns=components)

        years = ts.columns.tolist()
        ts = ts.reset_index().set_index("run_id")
        out = pd.merge(ts, paras_clean, left_index=True,
                       right_index=True).reset_index()

        id_cols = set(out.columns) - set(years)
        out = out.melt(value_vars=years, var_name="year", id_vars=id_cols)
        new_index = list(set(out.columns) - {"value"})
        out = out.set_index(new_index)["value"].unstack("year")
        out = out.T

        column_headers = {
            name.lower(): out.columns.get_level_values(name).tolist()
            for name in out.columns.names
        }
        df = out.copy()
        metadata = {}

        return metadata, df, column_headers
示例#12
0
文件: scen.py 项目: openscm/pymagicc
    def _write_datablock(self, output):
        # for SCEN files, the data format is vitally important for the source code
        # we have to work out a better way of matching up all these conventions/testing them, tight coupling between pymagicc and MAGICC may solve it for us...
        lines = output.getvalue().split(self._newline_char)
        # notes are everything except the first 6 lines
        number_notes_lines = len(lines) - 6

        def _gip(lines, number_notes_lines):
            """
            Get the point where we should insert the data block.
            """
            return len(lines) - number_notes_lines

        region_order_db = get_region_order(
            self._get_df_header_row("region"), scen7=self._scen_7
        )
        region_order_magicc = self._ensure_file_region_type_consistency(region_order_db)
        # format is vitally important for SCEN files as far as I can tell
        time_col_length = 11
        first_col_format_str = ("{" + ":{}d".format(time_col_length) + "}").format
        other_col_format_str = "{:10.4f}".format

        # TODO: doing it this way, out of the loop,  should ensure things
        # explode if your regions don't all have the same number of emissions
        # timeseries or does extra timeseries in there (that probably
        # shouldn't raise an error, another one for the future), although the
        # explosion will be cryptic so should add a test for good error
        # message at some point
        formatters = [other_col_format_str] * (
            int(len(self.data_block.columns) / len(region_order_db))
            + 1  # for the years column
        )
        formatters[0] = first_col_format_str

        variables = convert_magicc7_to_openscm_variables(
            self._get_df_header_row("variable"), inverse=True
        )
        variables = _strip_emis_variables(variables)

        special_scen_code = get_special_scen_code(
            regions=region_order_magicc, emissions=variables
        )
        if special_scen_code % 10 == 0:
            variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_0
        else:
            variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_1

        for region_db, region_magicc in zip(region_order_db, region_order_magicc):
            region_block_region = convert_magicc_to_openscm_regions(region_db)
            region_block = self.data_block.xs(
                region_block_region, axis=1, level="region", drop_level=False
            )
            region_block.columns = region_block.columns.droplevel("todo")
            region_block.columns = region_block.columns.droplevel("region")

            variables = region_block.columns.levels[0]
            variables = convert_magicc7_to_openscm_variables(variables, inverse=True)
            region_block.columns = region_block.columns.set_levels(
                levels=_strip_emis_variables(variables), level="variable",
            )

            region_block = region_block.reindex(
                variable_order, axis=1, level="variable"
            )

            variables = region_block.columns.get_level_values("variable").tolist()
            variables = convert_magicc6_to_magicc7_variables(
                [v.replace("_EMIS", "") for v in variables], inverse=True
            )

            units = convert_pint_to_fortran_safe_units(
                region_block.columns.get_level_values("unit").tolist()
            )
            # column widths don't work with expressive units
            units = [u.replace("_", "").replace("peryr", "") for u in units]

            if not (region_block.columns.names == ["variable", "unit"]):
                raise AssertionError(
                    "Unexpected region block columns: "
                    "{}".format(region_block.columns.names)
                )

            region_block = region_block.rename(columns=str).reset_index()
            region_block.columns = [["YEARS"] + variables, ["Yrs"] + units]

            region_block_str = region_magicc + self._newline_char
            region_block_str += region_block.to_string(
                index=False, formatters=formatters, sparsify=False
            )
            region_block_str += self._newline_char * 2

            lines.insert(_gip(lines, number_notes_lines), region_block_str)

        output.seek(0)
        output.write(self._newline_char.join(lines))
        return output