def get_year_month_to_period_map(self, start_year, end_year): """ generate mapping (year, month) --> period :param start_year: :param end_year: :return: """ res = {} for y in range(start_year, end_year + 1): start = Pendulum(y, self.start_month, 1) end = start.add(months=self.nmonths).subtract(microseconds=1) if end.year > end_year: continue print(start, end) p = Period(start, end) for s in p.range("months"): res[(s.year, s.month)] = p return res
def generate_hles_obs_variants(): period = Period( datetime(1980, 11, 1), datetime(2009, 2, 1) ) # should be continuous?? months_of_interest = list(range(1, 13)) period.months_of_interest = months_of_interest vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) # set the paths to the data label_to_base_folder = OrderedDict() common_dir = Path("/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/") label_to_base_folder["obs_anuspmaurer_erai"] = common_dir / "obs_anuspmaurer_erai" label_to_base_folder["obs_daymet_erai"] = common_dir / "obs_daymet_erai" label_to_base_folder["obs_anuspmaurer_narr"] = common_dir / "obs_anuspmaurer_narr" label_to_base_folder["obs_daymet_narr"] = common_dir / "obs_daymet_narr" # --- for label, base_folder in label_to_base_folder.items(): __obs_case(period, vname_to_level_erai, vname_map, label, base_folder)
def main_crcm5_nemo(): label = "CRCM5_NEMO" period = Period( datetime(1980, 1, 1), datetime(2015, 12, 31) ) pool = Pool(processes=10) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "SN" }) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: default_varname_mappings.vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def main_crcm5_hl(): label = "CRCM5_HL" period = Period( datetime(1980, 1, 1), datetime(2009, 12, 31) ) pool = Pool(processes=12) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "U3" }) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def main_future(nprocs=20): period = Period( datetime(2079, 1, 1), datetime(2100, 12, 31) ) label = "CRCM5_NEMO_fix_TT_PR_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year) vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } base_folder = "/scratch/huziy/Output/GL_CC_CanESM2_RCP85/coupled-GL-future_CanESM2/Samples" vname_map = {} vname_map.update(vname_map_CRCM5) # vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN" vname_map[default_varname_mappings.SNOWFALL_RATE] = "XXX" pool = Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] label_to_config = OrderedDict([( label, { # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples", DataManager.SP_BASE_FOLDER: base_folder, DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_range(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) r = list(p.range("days")) assert len(r) == 31 assert_datetime(r[0], 2000, 1, 1, 12, 45, 37) assert_datetime(r[-1], 2000, 1, 31, 12, 45, 37)
def main_obs(): label = "Obs_monthly_icefix_test2_1proc_speedtest_3" period = Period( datetime(1980, 1, 1), datetime(2010, 12, 31) ) pool = Pool(processes=20) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix", DataManager.SP_DATASOURCE_TYPE: data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def enh_lakeffect_snfall_calculator_proc(args): """ For multiprocessing :param args: """ data_manager, label, period, out_folder = args if not isinstance(period, Period): p = Period(start=period[0], end=period[1]) p.months_of_interest = period[2] period = p print("Start calculations for {} ... {}".format(period.start, period.end)) calculate_enh_lakeffect_snowfall_for_a_datasource(data_mngr=data_manager, label=label, period=period, out_folder=out_folder) print("Finish calculations for {} ... {}".format(period.start, period.end))
def main(): label = "Obs" period = Period( datetime(1980, 11, 1), datetime(1981, 2, 1) ) # should be continuous?? months_of_interest = [11, 12, 1] period.months_of_interest = months_of_interest vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260", DataManager.SP_DATASOURCE_TYPE: data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_icefix_{}_{}-{}_test1".format(label, period.start.year, period.end.year) } )]) calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config, period=period, nprocs_to_use=20)
def calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config, period=None): """ :param label_to_config: :param period: The period of interest defined by the start and the end year of the period (inclusive) """ assert hasattr(period, "months_of_interest") for label, the_config in label_to_config.items(): data_manager = DataManager(store_config=the_config) print(the_config) if "out_folder" in the_config: out_folder = the_config["out_folder"] else: out_folder = "." # Use a fraction of the available processes nprocs_to_use = max(int(multiprocessing.cpu_count() * 0.75), 1) nprocs_to_use = min(nprocs_to_use, period.in_years()) # No need for more processes than there is of years print("Using {} processes for parallelization".format(nprocs_to_use)) pool = Pool(processes=nprocs_to_use) # Construct the input params for each process in_data = [] for start in period.range("years"): p = Period(start=start, end=start.add(months=len(period.months_of_interest)).subtract(seconds=1)) p.months_of_interest = period.months_of_interest in_data.append([data_manager, label, [p.start, p.end, period.months_of_interest], out_folder]) print(in_data) pool.map(enh_lakeffect_snfall_calculator_proc, in_data)
def calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config, period=None, months_of_interest=None, nprocs_to_use=None): """ :param label_to_config: :param period: The period of interest defined by the start and the end year of the period (inclusive) """ if months_of_interest is not None: period.months_of_interest = months_of_interest assert hasattr(period, "months_of_interest") for label, the_config in label_to_config.items(): data_manager = DataManager(store_config=the_config) print(the_config) if "out_folder" in the_config: out_folder = the_config["out_folder"] else: out_folder = "." out_folder = Path(out_folder) try: # Try to create the output folder if it does not exist if not out_folder.exists(): out_folder.mkdir() print("{}: {} created".format(multiprocessing.current_process().name, out_folder)) except FileExistsError: print("{}: {} already exists".format(multiprocessing.current_process().name, out_folder)) if nprocs_to_use is None: # Use a fraction of the available processes nprocs_to_use = max(int(multiprocessing.cpu_count() * 0.75), 1) nprocs_to_use = min(nprocs_to_use, period.in_years()) # No need for more processes than there is of years nprocs_to_use = max(1, nprocs_to_use) # make sure that nprocs_to_use is not 0 print("Using {} processes for parallelization".format(nprocs_to_use)) # Construct the input params for each process in_data = [] for start in period.range("years"): end_date = start.add(months=len(period.months_of_interest)).subtract(seconds=1) end_date = min(end_date, period.end) p = Period(start=start, end=end_date) p.months_of_interest = period.months_of_interest in_data.append([data_manager, label, [p.start, p.end, period.months_of_interest], out_folder]) print(in_data) if nprocs_to_use > 1: pool = Pool(processes=nprocs_to_use) pool.map(enh_lakeffect_snfall_calculator_proc, in_data) else: for current_in_data in in_data: enh_lakeffect_snfall_calculator_proc(current_in_data) del in_data del data_manager
def main(): # First approximation of the lake-effect snow, by looking at the daily snowfall of more than 1 cm/day period = Period( datetime(1994, 12, 1), datetime(1995, 3, 1) ) # should be consequent months_of_interest = [12, 1, 2] period.months_of_interest = months_of_interest ERAI_label = "ERA-Interim" vname_to_level_erai = { default_varname_mappings.T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), default_varname_mappings.U_WE: VerticalLevel(1, level_kinds.HYBRID), default_varname_mappings.V_SN: VerticalLevel(1, level_kinds.HYBRID), } label_to_config = OrderedDict( [ # ERA-Interim (ERAI_label, { "base_folder": "/RECH/data/Driving_data/Offline/ERA-Interim_0.75/6h_Analysis", "data_source_type": data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES, "min_dt": timedelta(hours=6), "varname_mapping": default_varname_mappings.vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": default_varname_mappings.vname_to_offset_CRCM5, "multiplier_mapping": default_varname_mappings.vname_to_multiplier_CRCM5 } ), # Add additional sources below ] ) label = "ECMWF_CRCM5_FLake_0" label_to_config_CRCM5 = OrderedDict([( label, { "base_folder": "/HOME/huziy/skynet3_rech1/ens_simulations_links_diro/ENSSEASF_NorthAmerica_0.22deg_B1_0", "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, "min_dt": timedelta(hours=3), "varname_mapping": default_varname_mappings.vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": default_varname_mappings.vname_to_offset_CRCM5, "multiplier_mapping": default_varname_mappings.vname_to_multiplier_CRCM5, "filename_prefix_mapping": default_varname_mappings.vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}".format(label) } )]) # for i in range(1, 9): # label = "ECMWF_CRCM5_FLake_{}".format(i) # label_to_config_CRCM5[label] = label_to_config_CRCM5[label0] # label_to_config_CRCM5[label]["out_folder"] = "lake_effect_analysis_{}".format(label) # ECMWF GCM ensemble member outputs label_ECMWF_GCM = "ECMWF_GCM_1" multiplier_map_ECMWF_GCM = defaultdict(lambda: 1) multiplier_map_ECMWF_GCM[default_varname_mappings.TOTAL_PREC] = 1.0e-3 / (24.0 * 3600.0) # convert to M/S] label_to_config_ECMWF_GCM = OrderedDict( [ (label_ECMWF_GCM, { "base_folder": "/RESCUE/skynet3_rech1/huziy/ens_simulations_links_diro/ECMWF_GCM/ensm_1", "data_source_type": data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES, "out_folder": "lake_effect_analysis_{}".format(label_ECMWF_GCM), "varname_mapping": { default_varname_mappings.T_AIR_2M: "tas", default_varname_mappings.TOTAL_PREC: "prlr", default_varname_mappings.U_WE: "uas", default_varname_mappings.V_SN: "vas", }, "multiplier_mapping": multiplier_map_ECMWF_GCM, "offset_mapping": defaultdict(lambda: 0), "level_mapping": defaultdict(lambda: 0), }), ] ) calculate_lake_effect_snowfall(label_to_config=label_to_config_CRCM5, period=period)
def main(): period = Period( datetime(1979, 12, 1), datetime(1988, 3, 1) ) # should be consequent months_of_interest = [12, 1, 2] period.months_of_interest = months_of_interest vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } ERAI_label = "ERA-Interim" label = ERAI_label label_to_config = OrderedDict( [ # ERA-Interim (label, { "base_folder": "/RECH/data/Driving_data/Offline/ERA-Interim_0.75/6h_Analysis", "data_source_type": data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES, "min_dt": timedelta(hours=6), "varname_mapping": vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } ), # Add additional sources below ] ) # calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config, period=period) # calculate_lake_effect_snowfall(label_to_config=label_to_config, period=period) label = "CRCM5_NEMO" label_to_config_CRCM5 = OrderedDict([( label, { "base_folder": "/RECH2/huziy/coupling/coupled-GL-NEMO1h_30min/Samples", "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, "min_dt": timedelta(hours=3), "varname_mapping": vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "filename_prefix_mapping": vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) # calculate_lake_effect_snowfall(label_to_config=label_to_config_CRCM5, period=period) # calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config_CRCM5, period=period) label = "CRCM5_Hostetler" label_to_config_CRCM5 = OrderedDict([( label, { "base_folder": "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, "min_dt": timedelta(hours=3), "varname_mapping": vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "filename_prefix_mapping": vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config_CRCM5, period=period)
def main(): period = Period( datetime(1980, 12, 1), datetime(1985, 3, 1) ) # should be consequent months_of_interest = [11, 12, 1] period.months_of_interest = months_of_interest vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } ERAI_label = "ERA-Interim" label = ERAI_label label_to_config = OrderedDict( [ # ERA-Interim (label, { "base_folder": "/RECH/data/Driving_data/Offline/ERA-Interim_0.75/6h_Analysis", "data_source_type": data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES, "min_dt": timedelta(hours=6), "varname_mapping": vname_map_CRCM5, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } ), # Add additional sources below ] ) import time t0 = time.time() calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config, period=period, nprocs_to_use=5) print("Execution time: {} s".format(time.time() - t0)) #calculate_lake_effect_snowfall(label_to_config=label_to_config, period=period) label = "CRCM5_NEMO" vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "U3" }) label_to_config_CRCM5 = OrderedDict([( label, { "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/coupled-GL-NEMO1h/selected_fields", "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, "min_dt": timedelta(hours=3), "varname_mapping": vname_map, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) # calculate_lake_effect_snowfall_each_year_in_parallel(label_to_config=label_to_config_CRCM5, # period=period, # nprocs_to_use=16) label = "CRCM5_Hostetler" vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "U3" }) label_to_config_CRCM5 = OrderedDict([( label, { "base_folder": "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, "min_dt": timedelta(hours=3), "varname_mapping": vname_map, "level_mapping": vname_to_level_erai, "offset_mapping": vname_to_offset_CRCM5, "multiplier_mapping": vname_to_multiplier_CRCM5, "filename_prefix_mapping": vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )])
def _get_period_for_year(y): start = Pendulum(y, 1, 1) end = Pendulum(y + 1, 1, 1).subtract(microseconds=1) return Period(start, end)
def _get_period_for_ym(year, month): start = Pendulum(year, month, 1) end = start.add(months=1).subtract(microseconds=1) return Period(start, end)
def read_data_for_period(self, period: Period, varname_internal: str, ndims=3) -> DataArray: """ Read the data for period and varname into memory, and return it as xarray DataArray :param ndims: number of dimensions ndims=3 for (t, x, y)[default] and ndims=4 for (t, x, y, z) :param period: :param varname_internal: Note: this method will read everything into memory, please be easy on the period duration for large datasets """ assert isinstance(period, Period) level, level_kind = -1, -1 if varname_internal in self.level_mapping: lvl = self.level_mapping[varname_internal] assert isinstance(lvl, VerticalLevel) level, level_kind = lvl.get_value_and_kind() data = {} lons, lats = None, None data_list = None dates = None # for each datasource type the following arrays should be defined: # data(t, x, y), dates(t), lons(x, y), lats(x, y) if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES: assert isinstance(period, Period) for month_start in period.range("months"): f = self.yearmonth_to_path[(month_start.year, month_start.month)] with RPN(str(f)) as r: # read the data into memory data1 = r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) data.update(data1) dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT: assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}" filename_prefix = self.varname_to_file_prefix[varname_internal] # handle 3d variables if ndims == 4: return self.read_data_for_period_3d(period, varname_internal=varname_internal) for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print(f"Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # Skip the file for time step 0 if f.name[-9:-1] == "0" * 8: continue # read only files with the specified prefix if not f.name.startswith(filename_prefix): continue r = RPN(str(f)) data.update( r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME: for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print("Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # read only files containing the variable name in the name, i.e. *TT*.rpn if not "_" + self.varname_mapping[varname_internal] in f.name: continue r = RPN(str(f)) data.update( r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if self.lons is None: self.lons, self.lats = r.get_longitudes_and_latitudes_for_the_last_read_rec() r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type in [data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES, data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY]: if self.varname_to_file_path is None: base_folder = Path(self.base_folder) ds = xarray.open_mfdataset(str(base_folder / "*.nc*"), data_vars="minimal") else: ## In the case of very different netcdf files in the folder ## i.e. data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY ds = xarray.open_dataset(self.varname_to_file_path[varname_internal]) print("reading {} from {}".format(varname_internal, self.varname_to_file_path[varname_internal])) # select the variable by name and time # print(period.start, period.end) # print(ds[self.varname_mapping[varname_internal]]) # try both time and t try: var = ds[self.varname_mapping[varname_internal]].sel(time=slice(period.start, period.end)).squeeze() except ValueError: var = ds[self.varname_mapping[varname_internal]].sel(t=slice(period.start, period.end)).squeeze() for cname, cvals in var.coords.items(): if "time" in cname.lower() or "t" == cname.lower(): dates = cvals if self.lons is None: need_to_create_meshgrid = False for cname, cvals in var.coords.items(): if "lon" in cname.lower(): lons = cvals.values if lons.ndim == 1: need_to_create_meshgrid = True if "lat" in cname.lower(): lats = cvals.values if need_to_create_meshgrid: lats, lons = np.meshgrid(lats, lons) self.lons, self.lats = lons, lats # if still could not find longitudes and latitudes if self.lons is None: for vname, ncvar in ds.items(): if "lon" in vname.lower(): self.lons = ncvar.values if "lat" in vname.lower(): self.lats = ncvar.values # if still could not find => raise an exception if self.lons is None: raise IOError(f"Could not find lon/lat fields in the\n {ds}") if var.ndim > 3: var = var[:, self.level_mapping[varname_internal].value, :, :] if var.shape[-2:] == self.lons.shape: data_list = var.values else: if var.ndim == 3: data_list = np.transpose(var.values, axes=(0, 2, 1)) elif var.ndim == 2: data_list = np.transpose(var.values) else: raise Exception(f"{var.ndim}-dimensional variables are not supported") # close the dataset ds.close() else: raise NotImplementedError( "reading of the layout type {} is not implemented yet.".format(self.data_source_type)) # print(dates[0], dates[1], "...", dates[-1], len(dates)) # Construct a dictionary for xarray.DataArray ... vardict = { "coords": { "t": {"dims": "t", "data": dates}, "lon": {"dims": ("x", "y"), "data": self.lons}, "lat": {"dims": ("x", "y"), "data": self.lats}, }, "dims": ("t", "x", "y"), "data": data_list, "name": varname_internal } if len(data_list) == 0: print("retreived dates: {}".format(dates)) raise IOError( "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal], period.start, period.end, self.base_folder)) # Convert units based on supplied mappings return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
def get_min_max_avg_for_period(self, start_year: int, end_year: int, varname_internal: str): """ :param start_year: :param end_year: :param varname_internal: """ min_vals = None max_vals = None avg_vals = None min_dates = None max_dates = None avg_n = 0 for y in range(start_year, end_year + 1): p_start = Pendulum(y, 1, 1) p_end = Pendulum(y + 1, 1, 1).subtract(microseconds=1) p = Period(p_start, p_end) data = self.read_data_for_period(p, varname_internal=varname_internal) min_current = data.min(dim="t").values max_current = data.max(dim="t").values avg_current = data.mean(dim="t").values # Find extremes and dates when they are occurring if min_vals is None: min_vals = min_current else: min_vals = np.where(min_vals <= min_current, min_vals, min_current) if max_vals is None: max_vals = max_current else: max_vals = np.where(max_vals >= max_current, max_vals, max_current) min_dates = _get_dates_for_extremes(min_vals, data, min_dates) assert min_dates is not None max_dates = _get_dates_for_extremes(max_vals, data, max_dates) # calculate the mean if avg_vals is None: avg_vals = avg_current avg_n = data.shape[0] else: incr = data.shape[0] # calculate the mean incrementally to avoid overflow avg_vals = avg_vals * (avg_n / (avg_n + incr)) + (incr / (avg_n + incr)) * avg_current # assign names min_vals = xarray.DataArray(name="min_{}".format(varname_internal), data=min_vals, dims=("x", "y")) min_dates.name = "min_dates" max_vals = xarray.DataArray(name="max_{}".format(varname_internal), data=max_vals, dims=("x", "y")) max_dates.name = "max_dates" avg_vals = xarray.DataArray(name="avg_{}".format(varname_internal), data=avg_vals, dims=("x", "y")) result = { min_vals.name: min_vals, min_dates.name: min_dates, max_vals.name: max_vals, max_dates.name: max_dates, avg_vals.name: avg_vals } return result
def main(): hless_data_path = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2015_monthly" print(f"HLES data source: {hless_data_path}") # create a directory for images img_dir.mkdir(parents=True, exist_ok=True) month_period = MonthPeriod(start_month=11, nmonths=3) hles_vname = "hles_snow" total_snfall_vname = "total_snowfall" start_year = 1980 end_year = 2009 stfl_data_source_base_dir = "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples/" stfl_period = Period(Pendulum(start_year, 1, 1), Pendulum(end_year + 1, 1, 1)) hles_snfall = [] total_snfall = [] period_list = [] lons, lats = None, None for p in month_period.get_season_periods(start_year=start_year, end_year=end_year): print(p.start, p.end) flist = [] for start in p.range("months"): y = start.year m = start.month a_file = glob.glob( f"{hless_data_path}/*{y}-{y}_m{m:02d}-{m:02d}_daily.nc")[0] flist.append(a_file) ds = xarray.open_mfdataset(flist, data_vars="minimal") if lons is None: lons, lats = [ds[k][:].values for k in ["lon", "lat"]] hles1, total1 = get_acc_hles_and_total_snfl( ds, hles_vname=hles_vname, total_snfall_vname=total_snfall_vname) hles_snfall.append(hles1) total_snfall.append(total1) period_list.append(p) # convert annual mean hles_snfall = np.array(hles_snfall) total_snfall = np.array(total_snfall) #convert to cm hles_snfall *= 100 total_snfall *= 100 # upstream of Cornwall stfl_lon = 284.64685 stfl_lat = 44.873371 stfl_data = get_streamflow_at( stfl_lon, stfl_lat, data_source_base_dir=stfl_data_source_base_dir, period=stfl_period, varname=default_varname_mappings.STREAMFLOW) # stfl_data.plot() # plt.show() # do the plotting plot_utils.apply_plot_params(font_size=10) snow_clevs = np.array(common_params.clevs_lkeff_snowfall) * 1.25 # cmap, bn = colors.from_levels_and_colors(snow_clevs, # ["white", "indigo", "blue", "dodgerblue", "aqua", "lime", "yellow", "gold", # "orange", "red"][:len(snow_clevs)], # extend="max") cmap = LinearSegmentedColormap.from_list( "mycmap", common_params.lkeff_snowfall_colors, N=len(common_params.lkeff_snowfall_colors)) bn = BoundaryNorm(snow_clevs, len(snow_clevs) - 1) b = Basemap(llcrnrlon=lons[0, 0], llcrnrlat=lats[0, 0], urcrnrlon=lons[-1, -1], urcrnrlat=lats[-1, -1], resolution="i", area_thresh=1000) xx, yy = b(lons, lats) # b.drawcoastlines() plot_avg_snfall_maps(b, xx, yy, hles_snfall, total_snfall, cmap=cmap, bnorm=bn, label=Path(hless_data_path).name) # plot area avg timeseries plot_area_avg_snfall(hles_snfall, total_snfall, hles_period_list=period_list, label=Path(hless_data_path).name) # plot correlation maps plot_correlation_maps_with_stfl(b, xx, yy, hles_snfall, total_snfall, period_list=period_list, stfl_series=stfl_data, label=Path(hless_data_path).name, stfl_months_of_interest=tuple(range(1, 5))) # plot_area_avg_snfall_and_stfl(hles_snfall, total_snfall, stfl_series=stfl_data, hles_period_list=period_list, label=Path(hless_data_path).name, stfl_months_of_interest=tuple(range(1, 5)))
def format_fn_period_words(raw_value: pendulum.Period, _format_str: str, now_dt: pendulum.DateTime) -> str: return raw_value.in_words()
def main_current(nprocs=20): period = Period(datetime(1989, 1, 1), datetime(2010, 12, 31)) label = "CRCM5_NEMO_fix_TT_PR_CanESM2_RCP85_{}-{}_monthly".format( period.start.year, period.end.year) vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) # vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN" vname_map[default_varname_mappings.SNOWFALL_RATE] = "XXX" base_folder = "/scratch/huziy/Output/GL_CC_CanESM2_RCP85/coupled-GL-current_CanESM2/Samples" pool = Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: base_folder, DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def read_data_for_period_3d(self, period: Period, varname_internal: str) -> DataArray: """ Read 3D fields :param period: :param varname_internal: """ data_list = [] dates = [] vert_levels = None vert_level_units = None if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES: raise NotImplementedError() elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT: assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}" filename_prefix = self.varname_to_file_prefix[varname_internal] if filename_prefix in ["dp", ]: vert_level_units = "mb" for month_start in period.range("months"): year, m = month_start.year, month_start.month # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print(f"Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in sorted(month_dir.iterdir()): # Skip the file for time step 0 if f.name[-9:-1] == "0" * 8: continue # read only files with the specified prefix if not f.name.startswith(filename_prefix): continue with RPN(str(f)) as r: print(f"Reading {self.varname_mapping[varname_internal]} from {f}") data_rvar = r.variables[self.varname_mapping[varname_internal]] assert isinstance(data_rvar, rpn.RPNVariable) dates.extend(data_rvar.sorted_dates) if vert_levels is None: vert_levels = data_rvar.sorted_levels data_list.append(data_rvar[:]) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) else: raise NotImplementedError() data_list = np.concatenate(data_list, axis=0) print(f"data_list.shape={data_list.shape}, var_name={varname_internal}") # data_list = np.transpose(data_list, axes=(0, 2, 3, 1)) # Construct a dictionary for xarray.DataArray ... vardict = { "coords": { "t": {"dims": "t", "data": dates}, "lon": {"dims": ("x", "y"), "data": self.lons}, "lat": {"dims": ("x", "y"), "data": self.lats}, "lev": {"dims": ("z",), "data": vert_levels} }, "dims": ("t", "z", "x", "y"), "data": data_list, "name": varname_internal } if vert_level_units is not None: vardict["coords"]["lev"].update({"attrs": {"units": vert_level_units}}) if len(data_list) == 0: print("retreived dates: {}".format(dates)) raise IOError( "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal], period.start, period.end, self.base_folder)) # Convert units based on supplied mappings return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
def entry_for_cc_canesm2_gl(): """ for CanESM2 driven CRCM5_NEMO simulation """ data_root = common_params.data_root label_to_datapath = OrderedDict([ (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010/merged/" ), (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100/merged/" ), ]) cur_st_date = datetime(1989, 1, 1) cur_en_date = datetime(2011, 1, 1) # end date not inclusive fut_st_date = datetime(2079, 1, 1) fut_en_date = datetime(2101, 1, 1) # end date not inclusive cur_period = Period(cur_st_date, cur_en_date) fut_period = Period(fut_st_date, fut_en_date) periods_info = CcPeriodsInfo(cur_period=cur_period, fut_period=fut_period) season_to_months = OrderedDict([("ND", [11, 12]), ("JF", [1, 2]), ("MA", [3, 4])]) varnames = [ "hles_snow", "lake_ice_fraction", "TT", "PR", ] var_display_names = { "hles_snow": "HLES", "hles_snow_days": "HLES freq", "lake_ice_fraction": "Lake ice fraction", "TT": "2m air\n temperature", "PR": "total\nprecipitation", "cao_days": "CAO freq" } plot_utils.apply_plot_params(width_cm=25, height_cm=25, font_size=8) the_mask = get_gl_mask( label_to_datapath[common_params.crcm_nemo_cur_label]) vars_info = { "hles_snow": { # convert to cm/day "multiplier": 10, "display_units": "cm", "offset": 0, "vmin": -2, "vmax": 2, "accumulation": True, "mask": ~the_mask }, "hles_snow_days": { # convert to mm/day "multiplier": 1, "display_units": "days", "offset": 0, "vmin": -1, "vmax": 1, "mask": ~the_mask }, "cao_days": { # convert to mm/day "multiplier": 1, "display_units": "days", "offset": 0, "vmin": -1, "vmax": 1, }, "lake_ice_fraction": { "multiplier": 1, "offset": 0, "vmin": -0.5, "vmax": 0.5, "mask": the_mask }, "TT": { "multiplier": 1, "display_units": r"${\rm ^\circ C}$", "offset": 0, "vmin": 0, "vmax": 8, "cmap": cm.get_cmap("Reds", 16) }, "PR": { "multiplier": 1, "display_units": "mm/day", "offset": 0, "vmin": 0, "vmax": 3, "cmap": cm.get_cmap("Reds", 12) } } main(label_to_datapath, varnames=varnames, cur_label=common_params.crcm_nemo_cur_label, fut_label=common_params.crcm_nemo_fut_label, season_to_months=season_to_months, vname_display_names=var_display_names, periods_info=periods_info, vars_info=vars_info)