Exemplo n.º 1
0
    def get_mean_number_of_hles_days(self, start_year: int, end_year: int, season_to_months: dict, hles_vname: str):
        result = defaultdict(dict)

        cache_dir = Path(self.base_folder) / "cache"
        cache_dir.mkdir(exist_ok=True)
        seasons_str = "-".join(season_to_months)
        cache_file = cache_dir / f"get_mean_number_of_hles_days_{start_year}-{end_year}_m{seasons_str}_{hles_vname}.bin"

        if cache_file.exists():
            return pickle.load(cache_file.open("rb"))

        for season, months in season_to_months.items():

            for y in range(start_year, end_year + 1):
                d1 = Pendulum(y, months[0], 1)
                d2 = d1.add(months=len(months)).subtract(seconds=1)

                if d2.year > end_year:
                    continue

                current_period = Period(d1, d2)
                logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end))
                data = self.read_data_for_period(current_period, hles_vname)

                # calculate number of hles days

                data_daily = data.resample(t="1D", keep_attrs=True).mean(dim="t")

                result[season][y] = (data_daily.values >= 0.1).sum(axis=0)


        pickle.dump(result, cache_file.open("wb"))
        return result
Exemplo n.º 2
0
    def get_seasonal_maxima(self, start_year: int, end_year: int, season_to_months: dict, varname_internal: str):

        """
        returns a dictionary {season:{year: field of maxima}}
        :param start_year:
        :param end_year:
        :param season_to_months:

        (order of months in the list of months is important, i.e. for DJF the order should be [12, 1, 2])
        """
        result = defaultdict(dict)

        for season, months in season_to_months.items():

            for y in range(start_year, end_year + 1):
                d1 = Pendulum(y, months[0], 1)
                d2 = d1.add(months=len(months)).subtract(seconds=1)

                if d2.year > end_year:
                    continue

                current_period = Period(d1, d2)
                ("calculating mean for [{}, {}]".format(current_period.start, current_period.end))
                data = self.read_data_for_period(current_period, varname_internal)

                if varname_internal == LAKE_ICE_FRACTION:
                    result[season][y] = np.ma.masked_where(data.values > 1, data.values).max(axis=0)
                else:
                    result[season][y] = data.max(dim="t").values

        return result
Exemplo n.º 3
0
def iter_date(start_date: pendulum.datetime,
              end_date: pendulum.datetime,
              chunk_size=59):
    if end_date < start_date:
        raise ValueError("start_date:%s should not large than end_date:%s",
                         start_date, end_date)
    while start_date <= end_date:
        new_end_date = min(start_date.add(days=chunk_size), end_date)
        yield start_date, new_end_date
        start_date = new_end_date.add(days=1)
Exemplo n.º 4
0
def iter_date(
    start_date: pendulum.datetime, end_date: pendulum.datetime, chunk_size=59
):
    if end_date < start_date:
        raise ValueError(
            "start_date:%s should not large than end_date:%s", start_date, end_date
        )
    while start_date <= end_date:
        new_end_date = min(start_date.add(days=chunk_size), end_date)
        yield start_date, new_end_date
        start_date = new_end_date.add(days=1)
Exemplo n.º 5
0
    def get_seasonal_means(self, start_year: int, end_year: int, season_to_months: dict, varname_internal: str):

        """
        returns a dictionary {season:{year: mean_field}}
        :param start_year:
        :param end_year:
        :param season_to_months:

        (order of months in the list of months is important, i.e. for DJF the order should be [12, 1, 2])
        """
        result = defaultdict(dict)

        cache_dir = Path(self.base_folder) / "cache"
        cache_dir.mkdir(exist_ok=True)
        seasons_str = "-".join(season_to_months)
        cache_file = cache_dir / f"get_seasonal_means_{start_year}-{end_year}_m{seasons_str}_{varname_internal}.bin"

        if cache_file.exists():
            return pickle.load(cache_file.open("rb"))

        for season, months in season_to_months.items():

            for y in range(start_year, end_year + 1):
                d1 = Pendulum(y, months[0], 1)
                d2 = d1.add(months=len(months)).subtract(seconds=1)

                if d2.year > end_year:
                    continue

                current_period = Period(d1, d2)
                logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end))
                data = self.read_data_for_period(current_period, varname_internal)

                result[season][y] = data.mean(dim="t").values

        pickle.dump(result, cache_file.open("wb"))
        return result
Exemplo n.º 6
0
    def get_mean_number_of_cao_days(self, start_year: int, end_year: int, season_to_months: dict,
                                    temperature_vname: str, min_cao_width_cells=5):
        """
        calculate mean number of CAO days for each season and year {season: {year: field}}
        Calculation following Wheeler et al 2011
        :param self:
        :param start_year:
        :param end_year:
        :param season_to_months:
        :param temperature_vname:
        """
        season_to_year_to_std = defaultdict(dict)
        season_to_year_to_data = defaultdict(dict)
        season_to_year_to_rolling_mean = defaultdict(dict)
        season_to_year_to_n_cao_days = defaultdict(dict)

        cache_dir = Path(self.base_folder) / "cache"
        cache_dir.mkdir(exist_ok=True)
        seasons_str = "-".join(season_to_months)
        cache_file = cache_dir / f"get_mean_number_of_cao_days_{start_year}-{end_year}_m{seasons_str}_{temperature_vname}.bin"

        if cache_file.exists() and False:
            return pickle.load(cache_file.open("rb"))

        for season, months in season_to_months.items():

            for y in range(start_year, end_year + 1):
                d1 = Pendulum(y, months[0], 1)
                d2 = d1.add(months=len(months)).subtract(seconds=1)

                if d2.year > end_year:
                    continue

                current_period = Period(d1, d2)
                logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end))
                data = self.read_data_for_period(current_period, temperature_vname)

                # calculate daily means
                data_daily = data.resample(t="1D", keep_attrs=True).mean(dim="t").dropna(dim="t")



                assert isinstance(data_daily, xarray.DataArray)

                # save the data for reuse below
                season_to_year_to_data[season][y] = data_daily.values
                season_to_year_to_std[season][y] = data_daily.std(dim="t").values
                season_to_year_to_rolling_mean[season][y] = data_daily.rolling(center=True, t=31).mean(dim="t").values

        #  Calculate climatological std and rolling mean
        season_to_std_clim = {
            s: np.mean([f for f in y_to_std.values()], axis=0) for s, y_to_std in season_to_year_to_std.items()
        }

        season_to_rolling_clim = {
            s: np.mean([f for f in y_to_rolling.values()], axis=0) for s, y_to_rolling in season_to_year_to_rolling_mean.items()
        }

        #  calculate number of CAO days
        for season, std_clim in season_to_std_clim.items():
            for y in range(start_year, end_year + 1):

                t31_rolling = season_to_rolling_clim[season]

                cao_suspect = (np.array(season_to_year_to_data[season][y]) <= t31_rolling - 1.5 * std_clim) & (std_clim > 2)
                

                n_cao_days = cao_suspect.sum(axis=0)

                season_to_year_to_n_cao_days[season][y] = n_cao_days


        pickle.dump(season_to_year_to_n_cao_days, cache_file.open("wb"))
        return season_to_year_to_n_cao_days
Exemplo n.º 7
0
def _get_period_for_ym(year, month):
    start = Pendulum(year, month, 1)
    end = start.add(months=1).subtract(microseconds=1)
    return Period(start, end)