def main_crcm5_hl(): label = "CRCM5_HL" period = Period(datetime(1980, 1, 1), datetime(2009, 12, 31)) pool = Pool(processes=12) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({default_varname_mappings.SNOWFALL_RATE: "U3"}) label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format( label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_intersect_excluded(self): start = Pendulum(2016, 8, 7) end = start.add(weeks=1) p1 = Period(start, end) intersection = p1.intersect( Period(start.add(days=-2), start.add(days=-1))) self.assertIsNone(intersection)
def test_intersect_same(self): start = Pendulum(2016, 8, 7) end = start.add(weeks=1) p1 = Period(start, end) intersection = p1.intersect(Period(start.copy(), end.copy())) self.assertPendulum(intersection.start, 2016, 8, 7) self.assertPendulum(intersection.end, 2016, 8, 14)
def main_obs(): label = "Obs_monthly_icefix_test2_1proc_speedtest_3" period = Period(datetime(1980, 1, 1), datetime(2010, 12, 31)) pool = Pool(processes=20) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix", DataManager.SP_DATASOURCE_TYPE: data_source_types. ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format( label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_intersect_included(self): start = Pendulum(2016, 8, 7) end = start.add(weeks=1) p1 = Period(start, end) intersection = p1.intersect( Period(start.add(days=2), start.add(days=4))) self.assertPendulum(intersection.start, 2016, 8, 9) self.assertPendulum(intersection.end, 2016, 8, 11)
def test_intersect_multiple(self): start = Pendulum(2016, 8, 7) end = start.add(weeks=1) p1 = Period(start, end) intersection = p1.intersect( Period(start.add(days=-2), start.add(days=2)), Period(start.add(days=1), start.add(days=2))) self.assertPendulum(intersection.start, 2016, 8, 8) self.assertPendulum(intersection.end, 2016, 8, 9)
def main_future(nprocs=20): period = Period( datetime(2079, 1, 1), datetime(2100, 12, 31) ) label = "CRCM5_HL_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year) vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } base_folder = "/scratch/huziy/NEI/GL_samples_only/GL_CC_CanESM2_RCP85/HL-GL-current_CanESM2/Samples" vname_map = {} vname_map.update(vname_map_CRCM5) vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN" pool = Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] label_to_config = OrderedDict([( label, { # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples", DataManager.SP_BASE_FOLDER: base_folder, DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def entry_for_cc_canesm2_gl(): """ for CanESM2 driven CRCM5_NEMO simulation """ data_root = common_params.data_root label_to_datapath = OrderedDict([ (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010/merged/" ), (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100/merged/" ), ]) cur_st_date = datetime(1989, 1, 1) cur_en_date = datetime(2011, 1, 1) # end date not inclusive fut_st_date = datetime(2079, 1, 1) fut_en_date = datetime(2101, 1, 1) # end date not inclusive cur_period = Period(cur_st_date, cur_en_date) fut_period = Period(fut_st_date, fut_en_date) periods_info = CcPeriodsInfo(cur_period=cur_period, fut_period=fut_period) season_to_months = OrderedDict([("ND", [11, 12]), ("JF", [1, 2]), ("MA", [3, 4])]) var_pairs = [("hles_snow", "TT"), ("hles_snow", "PR"), ("hles_snow", "lake_ice_fraction")] var_display_names = { "hles_snow": "HLES", "lake_ice_fraction": "Mean Lake ice \nfraction", "TT": "2m air\n temperature", "PR": "total\nprecipitation" } plot_utils.apply_plot_params(width_cm=25, height_cm=25, font_size=8) gl_mask = get_gl_mask(label_to_datapath[common_params.crcm_nemo_cur_label]) hles_region_mask = get_mask_of_points_near_lakes(gl_mask, npoints_radius=20) main(label_to_data_path=label_to_datapath, var_pairs=var_pairs, periods_info=periods_info, vname_display_names=var_display_names, season_to_months=season_to_months, hles_region_mask=hles_region_mask, lakes_mask=gl_mask)
def test_multiply(self): dt1 = Pendulum(2016, 8, 7, 12, 34, 56) dt2 = dt1.add(days=6, seconds=34) it = Period(dt1, dt2) mul = it * 2 self.assertIsInstanceOfInterval(mul) self.assertInterval(mul, 1, 5, 0, 1, 8) dt1 = Pendulum(2016, 8, 7, 12, 34, 56) dt2 = dt1.add(days=6, seconds=34) it = Period(dt1, dt2) mul = it * 2 self.assertIsInstanceOfInterval(mul) self.assertInterval(mul, 1, 5, 0, 1, 8)
def test_floor_divide(self): dt1 = Pendulum(2016, 8, 7, 12, 34, 56) dt2 = dt1.add(days=2, seconds=34) it = Period(dt1, dt2) mul = it // 2 self.assertIsInstanceOfInterval(mul) self.assertInterval(mul, 0, 1, 0, 0, 17) dt1 = Pendulum(2016, 8, 7, 12, 34, 56) dt2 = dt1.add(days=2, seconds=35) it = Period(dt1, dt2) mul = it // 3 self.assertIsInstanceOfInterval(mul) self.assertInterval(mul, 0, 0, 16, 0, 11)
def generate_hles_obs_variants(): period = Period(datetime(1980, 11, 1), datetime(2009, 2, 1)) # should be continuous?? months_of_interest = list(range(1, 13)) period.months_of_interest = months_of_interest vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) # set the paths to the data label_to_base_folder = OrderedDict() common_dir = Path( "/home/data/big1/obs_data_for_HLES/interploated_to_the_same_grid/") # label_to_base_folder["obs_anuspmaurer_erai"] = common_dir / "obs_anuspmaurer_erai" label_to_base_folder["obs_daymet_erai"] = common_dir / "obs_daymet_erai" # label_to_base_folder["obs_anuspmaurer_narr"] = common_dir / "obs_anuspmaurer_narr" # label_to_base_folder["obs_daymet_narr"] = common_dir / "obs_daymet_narr" # --- for label, base_folder in label_to_base_folder.items(): __obs_case_monthly(period, vname_to_level_erai, vname_map, label, base_folder)
def get_mean_number_of_hles_days(self, start_year: int, end_year: int, season_to_months: dict, hles_vname: str): result = defaultdict(dict) cache_dir = Path(self.base_folder) / "cache" cache_dir.mkdir(exist_ok=True) seasons_str = "-".join(season_to_months) cache_file = cache_dir / f"get_mean_number_of_hles_days_{start_year}-{end_year}_m{seasons_str}_{hles_vname}.bin" if cache_file.exists(): return pickle.load(cache_file.open("rb")) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, hles_vname) # calculate number of hles days data_daily = data.resample(t="1D", keep_attrs=True).mean(dim="t") result[season][y] = (data_daily.values >= 0.1).sum(axis=0) pickle.dump(result, cache_file.open("wb")) return result
def test_contains_with_datetime(self): dt1 = Pendulum(2000, 1, 1, 12, 45, 37) dt2 = Pendulum(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) dt = datetime(2000, 1, 7) self.assertTrue(dt in p)
def get_seasonal_maxima(self, start_year: int, end_year: int, season_to_months: dict, varname_internal: str): """ returns a dictionary {season:{year: field of maxima}} :param start_year: :param end_year: :param season_to_months: (order of months in the list of months is important, i.e. for DJF the order should be [12, 1, 2]) """ result = defaultdict(dict) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) ("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, varname_internal) if varname_internal == LAKE_ICE_FRACTION: result[season][y] = np.ma.masked_where(data.values > 1, data.values).max(axis=0) else: result[season][y] = data.max(dim="t").values return result
def get_min_max_avg_for_short_period(self, start_time: Pendulum, end_time: Pendulum, varname_internal: str): """ The short period means that all the data from the period fits into RAM :param start_time: :param end_time: :param varname_internal: :return: """ p = Period(start_time, end_time) data = self.read_data_for_period(p, varname_internal=varname_internal) min_current = data.min(dim="t").values max_current = data.max(dim="t").values avg_current = data.mean(dim="t").values min_dates = _get_dates_for_extremes(min_current, data) min_dates.name = "min_dates" max_dates = _get_dates_for_extremes(max_current, data) max_dates.name = "max_dates" # assign names min_vals = xarray.DataArray(name="min_{}".format(varname_internal), data=min_current, dims=("x", "y")) max_vals = xarray.DataArray(name="max_{}".format(varname_internal), data=max_current, dims=("x", "y")) avg_vals = xarray.DataArray(name="avg_{}".format(varname_internal), data=avg_current, dims=("x", "y")) result = { min_vals.name: min_vals, min_dates.name: min_dates, max_vals.name: max_vals, max_dates.name: max_dates, avg_vals.name: avg_vals } return result
def test_not_contains(self): dt1 = Pendulum(2000, 1, 1, 12, 45, 37) dt2 = Pendulum(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) dt = Pendulum(2000, 1, 1, 11, 45, 37) self.assertFalse(dt in p)
def test_range_months_overflow(self): dt1 = Pendulum(2016, 1, 30, tzinfo='America/Sao_Paulo') dt2 = dt1.add(months=4) p = Period(dt1, dt2) r = p.range('months') self.assertPendulum(r[0], 2016, 1, 30, 0, 0, 0) self.assertPendulum(r[-1], 2016, 5, 30, 0, 0, 0)
def __obs_case_monthly(period, vname_to_level: dict, vname_map: dict, label: str, base_dir: str, nprocs: int = 4): """ find hles for each month of interest :param period: :param vname_to_level: :param vname_map: :param label: :param base_dir: """ pool = multiprocessing.Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: base_dir, DataManager.SP_DATASOURCE_TYPE: data_source_types. ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_timedelta_behavior(self): dt1 = Pendulum(2000, 11, 20, 1) dt2 = Pendulum(2000, 11, 25, 2) dt3 = Pendulum(2016, 11, 5, 3) p1 = Period(dt1, dt3) p2 = Period(dt2, dt3) it1 = p1.as_timedelta() it2 = p2.as_timedelta() self.assertEqual(p1.total_seconds(), it1.total_seconds()) self.assertEqual(p2.total_seconds(), it2.total_seconds()) self.assertEqual(p1.days, it1.days) self.assertEqual(p2.days, it2.days) self.assertEqual(p1.seconds, it1.seconds) self.assertEqual(p2.seconds, it2.seconds) self.assertEqual(p1.microseconds, it1.microseconds) self.assertEqual(p2.microseconds, it2.microseconds)
def test_with_pendulum(self): dt1 = Pendulum(2000, 1, 1) dt2 = Pendulum(2000, 1, 31) p = Period(dt1, dt2) self.assertIsInstanceOfPendulum(p.start) self.assertIsInstanceOfPendulum(p.end) self.assertPendulum(p.start, 2000, 1, 1) self.assertPendulum(p.end, 2000, 1, 31)
def test_inverted_and_absolute(self): dt1 = Pendulum(2000, 1, 1) dt2 = Pendulum(2000, 1, 31) p = Period(dt2, dt1, True) self.assertIsInstanceOfPendulum(p.start) self.assertIsInstanceOfPendulum(p.end) self.assertPendulum(p.start, 2000, 1, 1) self.assertPendulum(p.end, 2000, 1, 31)
def test_range(self): dt1 = Pendulum(2000, 1, 1, 12, 45, 37) dt2 = Pendulum(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) r = p.range('days') self.assertEqual(31, len(r)) self.assertPendulum(r[0], 2000, 1, 1, 12, 45, 37) self.assertPendulum(r[-1], 2000, 1, 31, 12, 45, 37)
def test_range_with_dst(self): dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo') dt2 = dt1.add(weeks=1) p = Period(dt1, dt2) r = p.range('days') self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0) self.assertPendulum(r[2], 2016, 10, 16, 1, 0, 0) self.assertPendulum(r[-1], 2016, 10, 21, 0, 0, 0)
def test_range(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) r = list(p.range("days")) assert len(r) == 31 assert_datetime(r[0], 2000, 1, 1, 12, 45, 37) assert_datetime(r[-1], 2000, 1, 31, 12, 45, 37)
def test_range_no_overflow(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 11, 45, 37) p = Period(dt1, dt2) r = list(p.range('days')) assert len(r) == 30 assert_datetime(r[0], 2000, 1, 1, 12, 45, 37) assert_datetime(r[-1], 2000, 1, 30, 12, 45, 37)
def test_range_inverted(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt2, dt1) r = list(p.range('days')) assert len(r) == 31 assert_datetime(r[-1], 2000, 1, 1, 12, 45, 37) assert_datetime(r[0], 2000, 1, 31, 12, 45, 37)
def test_iter(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) i = 0 for dt in p: assert isinstance(dt, pendulum.DateTime) i += 1 assert i == 31
def test_iter(self): dt1 = Pendulum(2000, 1, 1, 12, 45, 37) dt2 = Pendulum(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) i = 0 for dt in p: self.assertIsInstanceOfPendulum(dt) i += 1 self.assertEqual(31, i)
def test_range_amount(self): dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo') dt2 = dt1.add(weeks=1) p = Period(dt1, dt2) r = p.range('days', 2) self.assertEqual(len(r), 4) self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0) self.assertPendulum(r[1], 2016, 10, 16, 1, 0, 0) self.assertPendulum(r[2], 2016, 10, 18, 0, 0, 0) self.assertPendulum(r[3], 2016, 10, 20, 0, 0, 0)
def test_accuracy(self): dt1 = Pendulum(2000, 11, 20) dt2 = Pendulum(2000, 11, 25) dt3 = Pendulum(2016, 11, 5) p1 = Period(dt1, dt3) p2 = Period(dt2, dt3) self.assertEqual(15, p1.years) self.assertEqual(15, p1.in_years()) self.assertEqual(11, p1.months) self.assertEqual(191, p1.in_months()) self.assertEqual(5829, p1.days) self.assertEqual(2, p1.remaining_days) self.assertEqual(5829, p1.in_days()) self.assertEqual(15, p2.years) self.assertEqual(15, p2.in_years()) self.assertEqual(11, p2.months) self.assertEqual(191, p2.in_months()) self.assertEqual(5824, p2.days) self.assertEqual(4, p2.remaining_days) self.assertEqual(5824, p2.in_days())