Exemplo n.º 1
0
    def get_year_month_to_period_map(self, start_year, end_year):

        """
        generate mapping (year, month) --> period
        :param start_year: 
        :param end_year: 
        :return: 
        """
        res = {}

        for y in range(start_year, end_year + 1):
            start = Pendulum(y, self.start_month, 1)
            end = start.add(months=self.nmonths).subtract(microseconds=1)


            if end.year > end_year:
                continue

            print(start, end)
            p = Period(start, end)

            for s in p.range("months"):
                res[(s.year, s.month)] = p

        return res
Exemplo n.º 2
0
def main_crcm5_hl():
    label = "CRCM5_HL"

    period = Period(datetime(1980, 1, 1), datetime(2009, 12, 31))

    pool = Pool(processes=12)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [
            month_start.month,
        ]

        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({default_varname_mappings.SNOWFALL_RATE: "U3"})

        label_to_config = OrderedDict([(label, {
            DataManager.SP_BASE_FOLDER:
            "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected",
            DataManager.SP_DATASOURCE_TYPE:
            data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME,
            DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING:
            vname_map,
            DataManager.SP_LEVEL_MAPPING:
            vname_to_level_erai,
            DataManager.SP_OFFSET_MAPPING:
            vname_to_offset_CRCM5,
            DataManager.SP_MULTIPLIER_MAPPING:
            vname_to_multiplier_CRCM5,
            "out_folder":
            "lake_effect_analysis_{}_{}-{}_monthly".format(
                label, period.start.year, period.end.year)
        })])

        kwargs = dict(
            label_to_config=label_to_config,
            period=current_month_period,
            months_of_interest=current_month_period.months_of_interest,
            nprocs_to_use=1)

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 3
0
    def test_range_months_overflow(self):
        dt1 = Pendulum(2016, 1, 30, tzinfo='America/Sao_Paulo')
        dt2 = dt1.add(months=4)

        p = Period(dt1, dt2)
        r = p.range('months')
        self.assertPendulum(r[0], 2016, 1, 30, 0, 0, 0)
        self.assertPendulum(r[-1], 2016, 5, 30, 0, 0, 0)
Exemplo n.º 4
0
def main_obs():
    label = "Obs_monthly_icefix_test2_1proc_speedtest_3"

    period = Period(datetime(1980, 1, 1), datetime(2010, 12, 31))

    pool = Pool(processes=20)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [
            month_start.month,
        ]

        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        label_to_config = OrderedDict([(label, {
            DataManager.SP_BASE_FOLDER:
            "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix",
            DataManager.SP_DATASOURCE_TYPE:
            data_source_types.
            ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY,
            DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING:
            vname_map,
            DataManager.SP_LEVEL_MAPPING:
            vname_to_level_erai,
            DataManager.SP_OFFSET_MAPPING:
            vname_to_offset_CRCM5,
            DataManager.SP_MULTIPLIER_MAPPING:
            vname_to_multiplier_CRCM5,
            DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING:
            vname_to_fname_prefix_CRCM5,
            "out_folder":
            "lake_effect_analysis_daily_{}_{}-{}".format(
                label, period.start.year, period.end.year)
        })])

        kwargs = dict(
            label_to_config=label_to_config,
            period=current_month_period,
            months_of_interest=current_month_period.months_of_interest,
            nprocs_to_use=1)

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 5
0
def main_crcm5_nemo():
    label = "CRCM5_NEMO"

    period = Period(
        datetime(1980, 1, 1), datetime(2015, 12, 31)
    )


    pool = Pool(processes=10)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }



        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({
            default_varname_mappings.SNOWFALL_RATE: "SN"
        })

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: default_varname_mappings.vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 6
0
def main_crcm5_hl():
    label = "CRCM5_HL"

    period = Period(
        datetime(1980, 1, 1), datetime(2009, 12, 31)
    )


    pool = Pool(processes=12)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({
            default_varname_mappings.SNOWFALL_RATE: "U3"
        })

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config,
            period=current_month_period,
            months_of_interest=current_month_period.months_of_interest,
            nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 7
0
    def test_range_with_dst(self):
        dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo')
        dt2 = dt1.add(weeks=1)

        p = Period(dt1, dt2)
        r = p.range('days')
        self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0)
        self.assertPendulum(r[2], 2016, 10, 16, 1, 0, 0)
        self.assertPendulum(r[-1], 2016, 10, 21, 0, 0, 0)
Exemplo n.º 8
0
    def test_range(self):
        dt1 = Pendulum(2000, 1, 1, 12, 45, 37)
        dt2 = Pendulum(2000, 1, 31, 12, 45, 37)

        p = Period(dt1, dt2)
        r = p.range('days')
        self.assertEqual(31, len(r))
        self.assertPendulum(r[0], 2000, 1, 1, 12, 45, 37)
        self.assertPendulum(r[-1], 2000, 1, 31, 12, 45, 37)
def main_future(nprocs=20):

    period = Period(
        datetime(2079, 1, 1), datetime(2100, 12, 31)
    )

    label = "CRCM5_NEMO_fix_TT_PR_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year)

    vname_to_level_erai = {
        T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
        U_WE: VerticalLevel(1, level_kinds.HYBRID),
        V_SN: VerticalLevel(1, level_kinds.HYBRID),
    }

    base_folder = "/scratch/huziy/Output/GL_CC_CanESM2_RCP85/coupled-GL-future_CanESM2/Samples"

    vname_map = {}
    vname_map.update(vname_map_CRCM5)
    # vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN"
    vname_map[default_varname_mappings.SNOWFALL_RATE] = "XXX"



    pool = Pool(processes=nprocs)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]

        label_to_config = OrderedDict([(
            label, {
                # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples",
                DataManager.SP_BASE_FOLDER: base_folder,
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 10
0
def test_range_inverted():
    dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37)
    dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37)

    p = Period(dt2, dt1)
    r = list(p.range('days'))

    assert len(r) == 31
    assert_datetime(r[-1], 2000, 1, 1, 12, 45, 37)
    assert_datetime(r[0], 2000, 1, 31, 12, 45, 37)
Exemplo n.º 11
0
def test_range():
    dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37)
    dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37)

    p = Period(dt1, dt2)
    r = list(p.range("days"))

    assert len(r) == 31
    assert_datetime(r[0], 2000, 1, 1, 12, 45, 37)
    assert_datetime(r[-1], 2000, 1, 31, 12, 45, 37)
Exemplo n.º 12
0
def test_range_no_overflow():
    dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37)
    dt2 = pendulum.datetime(2000, 1, 31, 11, 45, 37)

    p = Period(dt1, dt2)
    r = list(p.range('days'))

    assert len(r) == 30
    assert_datetime(r[0], 2000, 1, 1, 12, 45, 37)
    assert_datetime(r[-1], 2000, 1, 30, 12, 45, 37)
def main_future(nprocs=20):

    period = Period(
        datetime(2079, 1, 1), datetime(2100, 12, 31)
    )

    label = "CRCM5_HL_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year)

    vname_to_level_erai = {
        T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
        U_WE: VerticalLevel(1, level_kinds.HYBRID),
        V_SN: VerticalLevel(1, level_kinds.HYBRID),
    }

    base_folder = "/scratch/huziy/NEI/GL_samples_only/GL_CC_CanESM2_RCP85/HL-GL-current_CanESM2/Samples"

    vname_map = {}
    vname_map.update(vname_map_CRCM5)
    vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN"



    pool = Pool(processes=nprocs)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]

        label_to_config = OrderedDict([(
            label, {
                # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples",
                DataManager.SP_BASE_FOLDER: base_folder,
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 14
0
    def test_range_amount(self):
        dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo')
        dt2 = dt1.add(weeks=1)

        p = Period(dt1, dt2)
        r = p.range('days', 2)

        self.assertEqual(len(r), 4)
        self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0)
        self.assertPendulum(r[1], 2016, 10, 16, 1, 0, 0)
        self.assertPendulum(r[2], 2016, 10, 18, 0, 0, 0)
        self.assertPendulum(r[3], 2016, 10, 20, 0, 0, 0)
Exemplo n.º 15
0
def main_obs():
    label = "Obs_monthly_icefix_test2_1proc_speedtest_3"


    period = Period(
        datetime(1980, 1, 1), datetime(2010, 12, 31)
    )


    pool = Pool(processes=20)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 16
0
    def get_year_month_to_period_map(self, start_year, end_year):
        """
        generate mapping (year, month) --> period
        :param start_year: 
        :param end_year: 
        :return: 
        """
        res = {}

        for y in range(start_year, end_year + 1):
            start = Pendulum(y, self.start_month, 1)
            end = start.add(months=self.nmonths).subtract(microseconds=1)

            if end.year > end_year:
                continue

            print(start, end)
            p = Period(start, end)

            for s in p.range("months"):
                res[(s.year, s.month)] = p

        return res
Exemplo n.º 17
0
    def read_data_for_period_3d(self, period: Period, varname_internal: str) -> DataArray:
        """
        Read 3D fields
        :param period:
        :param varname_internal:
        """
        data_list = []
        dates = []
        vert_levels = None
        vert_level_units = None

        if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES:
            raise NotImplementedError()
        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT:
            assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}"

            filename_prefix = self.varname_to_file_prefix[varname_internal]

            if filename_prefix in ["dp", ]:
                vert_level_units = "mb"

            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print(f"Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in sorted(month_dir.iterdir()):
                    # Skip the file for time step 0
                    if f.name[-9:-1] == "0" * 8:
                        continue

                    # read only files with the specified prefix
                    if not f.name.startswith(filename_prefix):
                        continue

                    with RPN(str(f)) as r:

                        print(f"Reading {self.varname_mapping[varname_internal]} from {f}")
                        data_rvar = r.variables[self.varname_mapping[varname_internal]]

                        assert isinstance(data_rvar, rpn.RPNVariable)

                        dates.extend(data_rvar.sorted_dates)
                        if vert_levels is None:
                            vert_levels = data_rvar.sorted_levels

                        data_list.append(data_rvar[:])

                        if self.lons is None:
                            self.__update_bmp_info_from_rpnfile_obj(r)

        else:
            raise NotImplementedError()

        data_list = np.concatenate(data_list, axis=0)
        print(f"data_list.shape={data_list.shape}, var_name={varname_internal}")
        # data_list = np.transpose(data_list, axes=(0, 2, 3, 1))

        # Construct a dictionary for xarray.DataArray ...
        vardict = {
            "coords": {
                "t": {"dims": "t", "data": dates},
                "lon": {"dims": ("x", "y"), "data": self.lons},
                "lat": {"dims": ("x", "y"), "data": self.lats},
                "lev": {"dims": ("z",), "data": vert_levels}
            },
            "dims": ("t", "z", "x", "y"),
            "data": data_list,
            "name": varname_internal
        }

        if vert_level_units is not None:
            vardict["coords"]["lev"].update({"attrs": {"units": vert_level_units}})

        if len(data_list) == 0:
            print("retreived dates: {}".format(dates))
            raise IOError(
                "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal],
                                                                                period.start, period.end,
                                                                                self.base_folder))
        # Convert units based on supplied mappings
        return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
Exemplo n.º 18
0
    def read_data_for_period(self, period: Period, varname_internal: str, ndims=3) -> DataArray:

        """
        Read the data for period and varname into memory, and return it as xarray DataArray
        :param ndims: number of dimensions ndims=3 for (t, x, y)[default] and ndims=4 for (t, x, y, z)
        :param period:
        :param varname_internal:

        Note: this method will read everything into memory, please be easy on the period duration for large datasets
        """
        assert isinstance(period, Period)

        level, level_kind = -1, -1

        if varname_internal in self.level_mapping:
            lvl = self.level_mapping[varname_internal]
            assert isinstance(lvl, VerticalLevel)
            level, level_kind = lvl.get_value_and_kind()


        data = {}
        lons, lats = None, None
        data_list = None
        dates = None

        # for each datasource type the following arrays should be defined:
        #       data(t, x, y), dates(t), lons(x, y), lats(x, y)
        if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES:

            assert isinstance(period, Period)

            for month_start in period.range("months"):
                f = self.yearmonth_to_path[(month_start.year, month_start.month)]

                with RPN(str(f)) as r:
                    # read the data into memory

                    data1 = r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                      level=level, level_kind=level_kind)

                    if self.lons is None:
                        self.__update_bmp_info_from_rpnfile_obj(r)

                    data.update(data1)

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT:

            assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}"

            filename_prefix = self.varname_to_file_prefix[varname_internal]

            # handle 3d variables
            if ndims == 4:
                return self.read_data_for_period_3d(period, varname_internal=varname_internal)

            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                print(year, m)

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print(f"Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in month_dir.iterdir():
                    # Skip the file for time step 0
                    if f.name[-9:-1] == "0" * 8:
                        continue

                    # read only files with the specified prefix
                    if not f.name.startswith(filename_prefix):
                        continue

                    r = RPN(str(f))

                    data.update(
                        r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                  level=level, level_kind=level_kind))

                    if self.lons is None:
                        self.__update_bmp_info_from_rpnfile_obj(r)

                    r.close()

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME:
            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                print(year, m)

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print("Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in month_dir.iterdir():
                    # read only files containing the variable name in the name, i.e. *TT*.rpn
                    if not "_" + self.varname_mapping[varname_internal] in f.name:
                        continue

                    r = RPN(str(f))

                    data.update(
                        r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                  level=level, level_kind=level_kind))

                    if self.lons is None:
                        self.lons, self.lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

                    r.close()

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type in [data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES,
                                       data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY]:

            if self.varname_to_file_path is None:
                base_folder = Path(self.base_folder)
                ds = xarray.open_mfdataset(str(base_folder / "*.nc*"), data_vars="minimal")
            else:
                ## In the case of very different netcdf files in the folder
                ## i.e. data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY
                ds = xarray.open_dataset(self.varname_to_file_path[varname_internal])
                print("reading {} from {}".format(varname_internal, self.varname_to_file_path[varname_internal]))

            # select the variable by name and time
            # print(period.start, period.end)
            # print(ds[self.varname_mapping[varname_internal]])

            # try both time and t
            try:
                var = ds[self.varname_mapping[varname_internal]].sel(time=slice(period.start, period.end)).squeeze()
            except ValueError:
                var = ds[self.varname_mapping[varname_internal]].sel(t=slice(period.start, period.end)).squeeze()

            for cname, cvals in var.coords.items():
                if "time" in cname.lower() or "t" == cname.lower():
                    dates = cvals

            if self.lons is None:
                need_to_create_meshgrid = False
                for cname, cvals in var.coords.items():

                    if "lon" in cname.lower():
                        lons = cvals.values

                        if lons.ndim == 1:
                            need_to_create_meshgrid = True

                    if "lat" in cname.lower():
                        lats = cvals.values

                if need_to_create_meshgrid:
                    lats, lons = np.meshgrid(lats, lons)

                self.lons, self.lats = lons, lats

            # if still could not find longitudes and latitudes
            if self.lons is None:

                for vname, ncvar in ds.items():
                    if "lon" in vname.lower():
                        self.lons = ncvar.values

                    if "lat" in vname.lower():
                        self.lats = ncvar.values

            # if still could not find => raise an exception
            if self.lons is None:
                raise IOError(f"Could not find lon/lat fields in the\n {ds}")

            if var.ndim > 3:
                var = var[:, self.level_mapping[varname_internal].value, :, :]
            if var.shape[-2:] == self.lons.shape:
                data_list = var.values

            else:
                if var.ndim == 3:
                    data_list = np.transpose(var.values, axes=(0, 2, 1))
                elif var.ndim == 2:
                    data_list = np.transpose(var.values)
                else:
                    raise Exception(f"{var.ndim}-dimensional variables are not supported")

            # close the dataset
            ds.close()

        else:

            raise NotImplementedError(
                "reading of the layout type {} is not implemented yet.".format(self.data_source_type))

        # print(dates[0], dates[1], "...", dates[-1], len(dates))

        # Construct a dictionary for xarray.DataArray ...
        vardict = {
            "coords": {
                "t": {"dims": "t", "data": dates},
                "lon": {"dims": ("x", "y"), "data": self.lons},
                "lat": {"dims": ("x", "y"), "data": self.lats},
            },
            "dims": ("t", "x", "y"),
            "data": data_list,
            "name": varname_internal
        }

        if len(data_list) == 0:
            print("retreived dates: {}".format(dates))
            raise IOError(
                "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal],
                                                                                period.start, period.end,
                                                                                self.base_folder))
        # Convert units based on supplied mappings
        return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
Exemplo n.º 19
0
def main_crcm5_nemo():
    label = "CRCM5_NEMO"

    period = Period(datetime(1980, 1, 1), datetime(2015, 12, 31))

    pool = Pool(processes=10)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [
            month_start.month,
        ]

        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({default_varname_mappings.SNOWFALL_RATE: "SN"})

        label_to_config = OrderedDict([(label, {
            DataManager.SP_BASE_FOLDER:
            "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples",
            DataManager.SP_DATASOURCE_TYPE:
            data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
            DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING:
            vname_map,
            DataManager.SP_LEVEL_MAPPING:
            vname_to_level_erai,
            DataManager.SP_OFFSET_MAPPING:
            vname_to_offset_CRCM5,
            DataManager.SP_MULTIPLIER_MAPPING:
            vname_to_multiplier_CRCM5,
            DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING:
            default_varname_mappings.vname_to_fname_prefix_CRCM5,
            "out_folder":
            "lake_effect_analysis_{}_{}-{}_monthly".format(
                label, period.start.year, period.end.year)
        })])

        kwargs = dict(
            label_to_config=label_to_config,
            period=current_month_period,
            months_of_interest=current_month_period.months_of_interest,
            nprocs_to_use=1)

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Exemplo n.º 20
0
    def read_data_for_period_3d(self, period: Period, varname_internal: str) -> DataArray:
        """
        Read 3D fields
        :param period:
        :param varname_internal:
        """
        data_list = []
        dates = []
        vert_levels = None
        vert_level_units = None

        if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES:
            raise NotImplementedError()
        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT:
            assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}"

            filename_prefix = self.varname_to_file_prefix[varname_internal]

            if filename_prefix in ["dp", ]:
                vert_level_units = "mb"

            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print(f"Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in sorted(month_dir.iterdir()):
                    # Skip the file for time step 0
                    if f.name[-9:-1] == "0" * 8:
                        continue

                    # read only files with the specified prefix
                    if not f.name.startswith(filename_prefix):
                        continue

                    with RPN(str(f)) as r:

                        print(f"Reading {self.varname_mapping[varname_internal]} from {f}")
                        data_rvar = r.variables[self.varname_mapping[varname_internal]]

                        assert isinstance(data_rvar, rpn.RPNVariable)

                        dates.extend(data_rvar.sorted_dates)
                        if vert_levels is None:
                            vert_levels = data_rvar.sorted_levels

                        data_list.append(data_rvar[:])

                        if self.lons is None:
                            self.__update_bmp_info_from_rpnfile_obj(r)

        else:
            raise NotImplementedError()

        data_list = np.concatenate(data_list, axis=0)
        print(f"data_list.shape={data_list.shape}, var_name={varname_internal}")
        # data_list = np.transpose(data_list, axes=(0, 2, 3, 1))

        # Construct a dictionary for xarray.DataArray ...
        vardict = {
            "coords": {
                "t": {"dims": "t", "data": dates},
                "lon": {"dims": ("x", "y"), "data": self.lons},
                "lat": {"dims": ("x", "y"), "data": self.lats},
                "lev": {"dims": ("z",), "data": vert_levels}
            },
            "dims": ("t", "z", "x", "y"),
            "data": data_list,
            "name": varname_internal
        }

        if vert_level_units is not None:
            vardict["coords"]["lev"].update({"attrs": {"units": vert_level_units}})

        if len(data_list) == 0:
            print("retreived dates: {}".format(dates))
            raise IOError(
                "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal],
                                                                                period.start, period.end,
                                                                                self.base_folder))
        # Convert units based on supplied mappings
        return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
Exemplo n.º 21
0
    def read_data_for_period(self, period: Period, varname_internal: str, ndims=3) -> DataArray:

        """
        Read the data for period and varname into memory, and return it as xarray DataArray
        :param ndims: number of dimensions ndims=3 for (t, x, y)[default] and ndims=4 for (t, x, y, z)
        :param period:
        :param varname_internal:

        Note: this method will read everything into memory, please be easy on the period duration for large datasets
        """
        assert isinstance(period, Period)

        level, level_kind = -1, -1

        if varname_internal in self.level_mapping:
            lvl = self.level_mapping[varname_internal]
            assert isinstance(lvl, VerticalLevel)
            level, level_kind = lvl.get_value_and_kind()


        data = {}
        lons, lats = None, None
        data_list = None
        dates = None

        # for each datasource type the following arrays should be defined:
        #       data(t, x, y), dates(t), lons(x, y), lats(x, y)
        if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES:

            assert isinstance(period, Period)

            for month_start in period.range("months"):
                f = self.yearmonth_to_path[(month_start.year, month_start.month)]

                with RPN(str(f)) as r:
                    # read the data into memory

                    data1 = r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                      level=level, level_kind=level_kind)

                    if self.lons is None:
                        self.__update_bmp_info_from_rpnfile_obj(r)

                    data.update(data1)

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT:

            assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}"

            filename_prefix = self.varname_to_file_prefix[varname_internal]

            # handle 3d variables
            if ndims == 4:
                return self.read_data_for_period_3d(period, varname_internal=varname_internal)

            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                print(year, m)

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print(f"Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in month_dir.iterdir():
                    # Skip the file for time step 0
                    if f.name[-9:-1] == "0" * 8:
                        continue

                    # read only files with the specified prefix
                    if not f.name.startswith(filename_prefix):
                        continue

                    r = RPN(str(f))

                    data.update(
                        r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                  level=level, level_kind=level_kind))

                    if self.lons is None:
                        self.__update_bmp_info_from_rpnfile_obj(r)

                    r.close()

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME:
            for month_start in period.range("months"):

                year, m = month_start.year, month_start.month

                print(year, m)

                # Skip years or months that are not available
                if (year, m) not in self.yearmonth_to_path:
                    print("Skipping {year}-{m}")
                    continue

                month_dir = self.yearmonth_to_path[(year, m)]

                for f in month_dir.iterdir():
                    # read only files containing the variable name in the name, i.e. *TT*.rpn
                    if not "_" + self.varname_mapping[varname_internal] in f.name:
                        continue

                    r = RPN(str(f))

                    data.update(
                        r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal],
                                                                  level=level, level_kind=level_kind))

                    if self.lons is None:
                        self.lons, self.lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

                    r.close()

            dates = list(sorted(data))[:-1]  # Ignore the last date because it is from the next month
            data_list = [data[d] for d in dates]

        elif self.data_source_type in [data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES,
                                       data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY]:

            if self.varname_to_file_path is None:
                base_folder = Path(self.base_folder)
                ds = xarray.open_mfdataset(str(base_folder / "*.nc*"), data_vars="minimal")
            else:
                ## In the case of very different netcdf files in the folder
                ## i.e. data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY
                ds = xarray.open_dataset(self.varname_to_file_path[varname_internal])
                print("reading {} from {}".format(varname_internal, self.varname_to_file_path[varname_internal]))

            # select the variable by name and time
            # print(period.start, period.end)
            # print(ds[self.varname_mapping[varname_internal]])

            # try both time and t
            try:
                var = ds[self.varname_mapping[varname_internal]].sel(time=slice(period.start, period.end)).squeeze()
            except ValueError:
                var = ds[self.varname_mapping[varname_internal]].sel(t=slice(period.start, period.end)).squeeze()

            for cname, cvals in var.coords.items():
                if "time" in cname.lower() or "t" == cname.lower():
                    dates = cvals

            if self.lons is None:
                need_to_create_meshgrid = False
                for cname, cvals in var.coords.items():

                    if "lon" in cname.lower():
                        lons = cvals.values

                        if lons.ndim == 1:
                            need_to_create_meshgrid = True

                    if "lat" in cname.lower():
                        lats = cvals.values

                if need_to_create_meshgrid:
                    lats, lons = np.meshgrid(lats, lons)

                self.lons, self.lats = lons, lats

            # if still could not find longitudes and latitudes
            if self.lons is None:

                for vname, ncvar in ds.items():
                    if "lon" in vname.lower():
                        self.lons = ncvar.values

                    if "lat" in vname.lower():
                        self.lats = ncvar.values

            # if still could not find => raise an exception
            if self.lons is None:
                raise IOError(f"Could not find lon/lat fields in the\n {ds}")

            if var.ndim > 3:
                var = var[:, self.level_mapping[varname_internal].value, :, :]
            if var.shape[-2:] == self.lons.shape:
                data_list = var.values

            else:
                if var.ndim == 3:
                    data_list = np.transpose(var.values, axes=(0, 2, 1))
                elif var.ndim == 2:
                    data_list = np.transpose(var.values)
                else:
                    raise Exception(f"{var.ndim}-dimensional variables are not supported")

            # close the dataset
            ds.close()

        else:

            raise NotImplementedError(
                "reading of the layout type {} is not implemented yet.".format(self.data_source_type))

        # print(dates[0], dates[1], "...", dates[-1], len(dates))

        # Construct a dictionary for xarray.DataArray ...
        vardict = {
            "coords": {
                "t": {"dims": "t", "data": dates},
                "lon": {"dims": ("x", "y"), "data": self.lons},
                "lat": {"dims": ("x", "y"), "data": self.lats},
            },
            "dims": ("t", "x", "y"),
            "data": data_list,
            "name": varname_internal
        }

        if len(data_list) == 0:
            print("retreived dates: {}".format(dates))
            raise IOError(
                "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal],
                                                                                period.start, period.end,
                                                                                self.base_folder))
        # Convert units based on supplied mappings
        return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]