def read_data(settings, suffix, name, year, grid_lats, grid_lons, period, N_OBS_PER_DAY): ''' Read in the data from the netCDF files :param Settings settings: object to hold all filepaths etc. :param str suffix: used to determine whether using strict or relaxed criteria :param str name: variable name :param int year: year to read :param array grid_lats: latitudes :param array grid_lons: longitudes :param str period: which period (day/night/all) :param int N_OBS_PER_DAY: number of observation times per day :returns: var_3hrlys - array of 3hrly data for single variable ''' if suffix == "relax": N_OBS_OVER_DAYS = 1 N_OBS_OVER_PENTAD = 2 elif suffix == "strict": N_OBS_OVER_DAYS = 2 N_OBS_OVER_PENTAD = 4 # set up empty data array var_3hrlys = np.ma.zeros([utils.days_in_year(year)*N_OBS_PER_DAY, len(grid_lats), len(grid_lons)]) var_3hrlys.mask = np.zeros([utils.days_in_year(year)*N_OBS_PER_DAY, len(grid_lats), len(grid_lons)]) var_3hrlys.fill_value = settings.mdi year_start = dt.datetime(year, 1, 1, 0, 0) for month in np.arange(12) + 1: print year, month month_start = utils.day_of_year(year, month) month_end = month_start + calendar.monthrange(year, month)[1] filename = "{}/{}_1x1_3hr_{}{:02d}_{}_{}.nc".format(settings.DATA_LOCATION, settings.OUTROOT, year, month, period, suffix) ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4') if month == 12: # run to end of year if december var_3hrlys[month_start*N_OBS_PER_DAY:, :, :] = ncdf_file.variables[name][:] else: var_3hrlys[month_start*N_OBS_PER_DAY:month_end*N_OBS_PER_DAY, :, :] = ncdf_file.variables[name][:] return var_3hrlys # read_data
def do_conversion(start_year=defaults.START_YEAR, end_year=defaults.END_YEAR, period="all", doBC=False, doQC=True): """ Convert dailies to pentads 1x1 :param int start_year: start year to process :param int end_year: end year to process :param str period: which period to do day/night/all? :param bool doBC: work on the bias corrected data :param bool doQC: incorporate the QC flags or not :returns: """ settings = set_paths_and_vars.set(doBC=doBC, doQC=doQC) OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False) for year in np.arange(start_year, end_year + 1): # set up empty data array all_dailies = np.ma.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)]) all_dailies.mask = np.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)]) all_dailies.fill_value = settings.mdi all_n_obs = np.zeros([utils.days_in_year(year), len(grid_lats), len(grid_lons)]) year_start = dt.datetime(year, 1, 1, 0, 0) for month in np.arange(12) + 1: print year, month month_start = utils.day_of_year(year, month) month_end = month_start + calendar.monthrange(year, month)[1] filename = "{}/{}_1x1_daily_{}{:02d}_{}.nc".format( settings.DATA_LOCATION, settings.OUTROOT, year, month, period ) ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4") for v, var in enumerate(OBS_ORDER): if month == 12: # run to end of year if december all_dailies[v, month_start:, :, :] = ncdf_file.variables[var.name][:] else: all_dailies[v, month_start:month_end, :, :] = ncdf_file.variables[var.name][:] # now get number of observations if month == 12: all_n_obs[month_start:, :, :] = ncdf_file.variables["n_obs"][:] else: all_n_obs[month_start:month_end, :, :] = ncdf_file.variables["n_obs"][:] if calendar.isleap(year): assert all_dailies.shape[1] == 366 # extract 6-day pentad incl_feb29th = all_dailies[:, 55:61, :, :] # remove the data of Feb 29th from array # np.ma.delete doesn't exist, so have to copy mask separately mask = all_dailies.mask all_dailies = np.delete(all_dailies, 59, 1) mask = np.delete(mask, 59, 1) all_dailies = np.ma.array(all_dailies, mask=mask) del mask # number of observations incl_feb29th_n_obs = all_n_obs[55:61, :, :] all_n_obs = np.delete(all_n_obs, 59, 0) else: assert all_dailies.shape[1] == 365 shape = all_dailies.shape all_dailies = all_dailies.reshape(shape[0], -1, 5, shape[-2], shape[-1]) n_days_per_pentad = np.ma.count(all_dailies, axis=2) if settings.doMedian: pentad_grid = utils.bn_median(all_dailies, axis=2) else: pentad_grid = np.ma.mean(all_dailies, axis=2) # clear up memory del all_dailies gc.collect() all_n_obs = all_n_obs.reshape(-1, 5, shape[-2], shape[-1]) all_n_obs = np.sum(all_n_obs, axis=1) pentad_grid.mask[ n_days_per_pentad < N_OBS ] = True # mask where fewer than 2 days have values # KW THIS IS ACTUALLY 2 - WHICH I THINK IS GOOD # the pentad containing feb 29th is the 11th in the year if calendar.isleap(year): # overwrite this with the me(di)an of a 6-day pentad if settings.doMedian: pentad_grid[:, 11, :, :] = utils.bn_median(incl_feb29th, axis=1) else: pentad_grid[:, 11, :, :] = np.ma.mean(incl_feb29th, axis=1) feb_n_days_per_pentad = np.ma.count(incl_feb29th, axis=1) pentad_grid.mask[:, 11, :, :][feb_n_days_per_pentad < N_OBS] = True n_days_per_pentad[:, 11, :, :] = feb_n_days_per_pentad all_n_obs[11, :, :] = np.sum(incl_feb29th_n_obs, axis=0) print "processed Feb 29th" times = utils.TimeVar("time", "time since 1/1/{} in hours".format(year), "hours", "time") times.data = np.arange(0, pentad_grid.shape[1]) * 5 * 24 out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_{}_{}.nc".format(year, period) utils.netcdf_write( out_filename, pentad_grid, n_days_per_pentad[0], all_n_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency="P", ) del pentad_grid del all_n_obs del n_days_per_pentad gc.collect() return # do_conversion