def get_year_month_to_period_map(self, start_year, end_year): """ generate mapping (year, month) --> period :param start_year: :param end_year: :return: """ res = {} for y in range(start_year, end_year + 1): start = Pendulum(y, self.start_month, 1) end = start.add(months=self.nmonths).subtract(microseconds=1) if end.year > end_year: continue print(start, end) p = Period(start, end) for s in p.range("months"): res[(s.year, s.month)] = p return res
def main_crcm5_hl(): label = "CRCM5_HL" period = Period(datetime(1980, 1, 1), datetime(2009, 12, 31)) pool = Pool(processes=12) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({default_varname_mappings.SNOWFALL_RATE: "U3"}) label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format( label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_range_months_overflow(self): dt1 = Pendulum(2016, 1, 30, tzinfo='America/Sao_Paulo') dt2 = dt1.add(months=4) p = Period(dt1, dt2) r = p.range('months') self.assertPendulum(r[0], 2016, 1, 30, 0, 0, 0) self.assertPendulum(r[-1], 2016, 5, 30, 0, 0, 0)
def main_obs(): label = "Obs_monthly_icefix_test2_1proc_speedtest_3" period = Period(datetime(1980, 1, 1), datetime(2010, 12, 31)) pool = Pool(processes=20) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix", DataManager.SP_DATASOURCE_TYPE: data_source_types. ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format( label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def main_crcm5_nemo(): label = "CRCM5_NEMO" period = Period( datetime(1980, 1, 1), datetime(2015, 12, 31) ) pool = Pool(processes=10) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "SN" }) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: default_varname_mappings.vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def main_crcm5_hl(): label = "CRCM5_HL" period = Period( datetime(1980, 1, 1), datetime(2009, 12, 31) ) pool = Pool(processes=12) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({ default_varname_mappings.SNOWFALL_RATE: "U3" }) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_range_with_dst(self): dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo') dt2 = dt1.add(weeks=1) p = Period(dt1, dt2) r = p.range('days') self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0) self.assertPendulum(r[2], 2016, 10, 16, 1, 0, 0) self.assertPendulum(r[-1], 2016, 10, 21, 0, 0, 0)
def test_range(self): dt1 = Pendulum(2000, 1, 1, 12, 45, 37) dt2 = Pendulum(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) r = p.range('days') self.assertEqual(31, len(r)) self.assertPendulum(r[0], 2000, 1, 1, 12, 45, 37) self.assertPendulum(r[-1], 2000, 1, 31, 12, 45, 37)
def main_future(nprocs=20): period = Period( datetime(2079, 1, 1), datetime(2100, 12, 31) ) label = "CRCM5_NEMO_fix_TT_PR_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year) vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } base_folder = "/scratch/huziy/Output/GL_CC_CanESM2_RCP85/coupled-GL-future_CanESM2/Samples" vname_map = {} vname_map.update(vname_map_CRCM5) # vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN" vname_map[default_varname_mappings.SNOWFALL_RATE] = "XXX" pool = Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] label_to_config = OrderedDict([( label, { # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples", DataManager.SP_BASE_FOLDER: base_folder, DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_range_inverted(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt2, dt1) r = list(p.range('days')) assert len(r) == 31 assert_datetime(r[-1], 2000, 1, 1, 12, 45, 37) assert_datetime(r[0], 2000, 1, 31, 12, 45, 37)
def test_range(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 12, 45, 37) p = Period(dt1, dt2) r = list(p.range("days")) assert len(r) == 31 assert_datetime(r[0], 2000, 1, 1, 12, 45, 37) assert_datetime(r[-1], 2000, 1, 31, 12, 45, 37)
def test_range_no_overflow(): dt1 = pendulum.datetime(2000, 1, 1, 12, 45, 37) dt2 = pendulum.datetime(2000, 1, 31, 11, 45, 37) p = Period(dt1, dt2) r = list(p.range('days')) assert len(r) == 30 assert_datetime(r[0], 2000, 1, 1, 12, 45, 37) assert_datetime(r[-1], 2000, 1, 30, 12, 45, 37)
def main_future(nprocs=20): period = Period( datetime(2079, 1, 1), datetime(2100, 12, 31) ) label = "CRCM5_HL_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year) vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } base_folder = "/scratch/huziy/NEI/GL_samples_only/GL_CC_CanESM2_RCP85/HL-GL-current_CanESM2/Samples" vname_map = {} vname_map.update(vname_map_CRCM5) vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN" pool = Pool(processes=nprocs) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] label_to_config = OrderedDict([( label, { # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples", DataManager.SP_BASE_FOLDER: base_folder, DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def test_range_amount(self): dt1 = Pendulum(2016, 10, 14, tzinfo='America/Sao_Paulo') dt2 = dt1.add(weeks=1) p = Period(dt1, dt2) r = p.range('days', 2) self.assertEqual(len(r), 4) self.assertPendulum(r[0], 2016, 10, 14, 0, 0, 0) self.assertPendulum(r[1], 2016, 10, 16, 1, 0, 0) self.assertPendulum(r[2], 2016, 10, 18, 0, 0, 0) self.assertPendulum(r[3], 2016, 10, 20, 0, 0, 0)
def main_obs(): label = "Obs_monthly_icefix_test2_1proc_speedtest_3" period = Period( datetime(1980, 1, 1), datetime(2010, 12, 31) ) pool = Pool(processes=20) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) label_to_config = OrderedDict([( label, { DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix", DataManager.SP_DATASOURCE_TYPE: data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format(label, period.start.year, period.end.year) } )]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1 ) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)
def read_data_for_period_3d(self, period: Period, varname_internal: str) -> DataArray: """ Read 3D fields :param period: :param varname_internal: """ data_list = [] dates = [] vert_levels = None vert_level_units = None if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES: raise NotImplementedError() elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT: assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}" filename_prefix = self.varname_to_file_prefix[varname_internal] if filename_prefix in ["dp", ]: vert_level_units = "mb" for month_start in period.range("months"): year, m = month_start.year, month_start.month # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print(f"Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in sorted(month_dir.iterdir()): # Skip the file for time step 0 if f.name[-9:-1] == "0" * 8: continue # read only files with the specified prefix if not f.name.startswith(filename_prefix): continue with RPN(str(f)) as r: print(f"Reading {self.varname_mapping[varname_internal]} from {f}") data_rvar = r.variables[self.varname_mapping[varname_internal]] assert isinstance(data_rvar, rpn.RPNVariable) dates.extend(data_rvar.sorted_dates) if vert_levels is None: vert_levels = data_rvar.sorted_levels data_list.append(data_rvar[:]) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) else: raise NotImplementedError() data_list = np.concatenate(data_list, axis=0) print(f"data_list.shape={data_list.shape}, var_name={varname_internal}") # data_list = np.transpose(data_list, axes=(0, 2, 3, 1)) # Construct a dictionary for xarray.DataArray ... vardict = { "coords": { "t": {"dims": "t", "data": dates}, "lon": {"dims": ("x", "y"), "data": self.lons}, "lat": {"dims": ("x", "y"), "data": self.lats}, "lev": {"dims": ("z",), "data": vert_levels} }, "dims": ("t", "z", "x", "y"), "data": data_list, "name": varname_internal } if vert_level_units is not None: vardict["coords"]["lev"].update({"attrs": {"units": vert_level_units}}) if len(data_list) == 0: print("retreived dates: {}".format(dates)) raise IOError( "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal], period.start, period.end, self.base_folder)) # Convert units based on supplied mappings return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
def read_data_for_period(self, period: Period, varname_internal: str, ndims=3) -> DataArray: """ Read the data for period and varname into memory, and return it as xarray DataArray :param ndims: number of dimensions ndims=3 for (t, x, y)[default] and ndims=4 for (t, x, y, z) :param period: :param varname_internal: Note: this method will read everything into memory, please be easy on the period duration for large datasets """ assert isinstance(period, Period) level, level_kind = -1, -1 if varname_internal in self.level_mapping: lvl = self.level_mapping[varname_internal] assert isinstance(lvl, VerticalLevel) level, level_kind = lvl.get_value_and_kind() data = {} lons, lats = None, None data_list = None dates = None # for each datasource type the following arrays should be defined: # data(t, x, y), dates(t), lons(x, y), lats(x, y) if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES: assert isinstance(period, Period) for month_start in period.range("months"): f = self.yearmonth_to_path[(month_start.year, month_start.month)] with RPN(str(f)) as r: # read the data into memory data1 = r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) data.update(data1) dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT: assert varname_internal in self.varname_to_file_prefix, f"Could not find {varname_internal} in {self.varname_to_file_prefix}" filename_prefix = self.varname_to_file_prefix[varname_internal] # handle 3d variables if ndims == 4: return self.read_data_for_period_3d(period, varname_internal=varname_internal) for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print(f"Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # Skip the file for time step 0 if f.name[-9:-1] == "0" * 8: continue # read only files with the specified prefix if not f.name.startswith(filename_prefix): continue r = RPN(str(f)) data.update( r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if self.lons is None: self.__update_bmp_info_from_rpnfile_obj(r) r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME: for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print("Skipping {year}-{m}") continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # read only files containing the variable name in the name, i.e. *TT*.rpn if not "_" + self.varname_mapping[varname_internal] in f.name: continue r = RPN(str(f)) data.update( r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if self.lons is None: self.lons, self.lats = r.get_longitudes_and_latitudes_for_the_last_read_rec() r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type in [data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES, data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY]: if self.varname_to_file_path is None: base_folder = Path(self.base_folder) ds = xarray.open_mfdataset(str(base_folder / "*.nc*"), data_vars="minimal") else: ## In the case of very different netcdf files in the folder ## i.e. data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY ds = xarray.open_dataset(self.varname_to_file_path[varname_internal]) print("reading {} from {}".format(varname_internal, self.varname_to_file_path[varname_internal])) # select the variable by name and time # print(period.start, period.end) # print(ds[self.varname_mapping[varname_internal]]) # try both time and t try: var = ds[self.varname_mapping[varname_internal]].sel(time=slice(period.start, period.end)).squeeze() except ValueError: var = ds[self.varname_mapping[varname_internal]].sel(t=slice(period.start, period.end)).squeeze() for cname, cvals in var.coords.items(): if "time" in cname.lower() or "t" == cname.lower(): dates = cvals if self.lons is None: need_to_create_meshgrid = False for cname, cvals in var.coords.items(): if "lon" in cname.lower(): lons = cvals.values if lons.ndim == 1: need_to_create_meshgrid = True if "lat" in cname.lower(): lats = cvals.values if need_to_create_meshgrid: lats, lons = np.meshgrid(lats, lons) self.lons, self.lats = lons, lats # if still could not find longitudes and latitudes if self.lons is None: for vname, ncvar in ds.items(): if "lon" in vname.lower(): self.lons = ncvar.values if "lat" in vname.lower(): self.lats = ncvar.values # if still could not find => raise an exception if self.lons is None: raise IOError(f"Could not find lon/lat fields in the\n {ds}") if var.ndim > 3: var = var[:, self.level_mapping[varname_internal].value, :, :] if var.shape[-2:] == self.lons.shape: data_list = var.values else: if var.ndim == 3: data_list = np.transpose(var.values, axes=(0, 2, 1)) elif var.ndim == 2: data_list = np.transpose(var.values) else: raise Exception(f"{var.ndim}-dimensional variables are not supported") # close the dataset ds.close() else: raise NotImplementedError( "reading of the layout type {} is not implemented yet.".format(self.data_source_type)) # print(dates[0], dates[1], "...", dates[-1], len(dates)) # Construct a dictionary for xarray.DataArray ... vardict = { "coords": { "t": {"dims": "t", "data": dates}, "lon": {"dims": ("x", "y"), "data": self.lons}, "lat": {"dims": ("x", "y"), "data": self.lats}, }, "dims": ("t", "x", "y"), "data": data_list, "name": varname_internal } if len(data_list) == 0: print("retreived dates: {}".format(dates)) raise IOError( "Could not find any {} data for the period {}..{} in {}".format(self.varname_mapping[varname_internal], period.start, period.end, self.base_folder)) # Convert units based on supplied mappings return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]
def main_crcm5_nemo(): label = "CRCM5_NEMO" period = Period(datetime(1980, 1, 1), datetime(2015, 12, 31)) pool = Pool(processes=10) input_params = [] for month_start in period.range("months"): month_end = month_start.add(months=1).subtract(seconds=1) current_month_period = Period(month_start, month_end) current_month_period.months_of_interest = [ month_start.month, ] vname_to_level_erai = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), } vname_map = {} vname_map.update(vname_map_CRCM5) vname_map = {} vname_map.update(vname_map_CRCM5) vname_map.update({default_varname_mappings.SNOWFALL_RATE: "SN"}) label_to_config = OrderedDict([(label, { DataManager.SP_BASE_FOLDER: "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples", DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT, DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map, DataManager.SP_LEVEL_MAPPING: vname_to_level_erai, DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5, DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5, DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: default_varname_mappings.vname_to_fname_prefix_CRCM5, "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format( label, period.start.year, period.end.year) })]) kwargs = dict( label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1) print(current_month_period.months_of_interest) input_params.append(kwargs) # execute in parallel pool.map(monthly_func, input_params)