def __init__(self, filename, varname, t_dim=None, var_attrs=None, glob_attrs=None): self.__filename = filename self.__varname = varname # allowing t_dim, var_attrs and glob_attrs to be None allows overloading # and creating from pickle if isinstance(t_dim, NoneType) or isinstance(var_attrs, NoneType) or \ isinstance(glob_attrs, NoneType): temp_box = cpdn_box() temp_box.load(self.__filename, self.__varname) # quality check temp_vals = temp_box.get_values() self.__valid = True self.__valid &= numpy.isfinite(temp_vals).any() self.__valid &= (numpy.max(temp_vals) < 2e20) self.__valid &= (numpy.min(temp_vals) > -2e20) if isinstance(t_dim, NoneType): self.__t_dim = temp_box.get_dimension("T") else: self.__t_dim = t_dim if isinstance(var_attrs, NoneType): self.__var_attrs = temp_box.get_attributes() else: self.__var_attrs = var_attrs if isinstance(glob_attrs, NoneType): self.__glob_attrs = temp_box.get_global_attributes() else: self.__glob_attrs = glob_attrs
def cpdn_zonal_mean(box, mask=None): """Create a zonal mean from a cpdn_box. The meaning will always work on the X dimension, operating over the other dimensions. For example, if the box contains a time dimensions, then a time-series will be created. If the box contains a Z dimension then a multi-level time-series will be created. Returns : a box containing the zonal means""" # No need to calculate weights as the Y axis is not meaned # get the axis number used for the "X" dimension X_axis = box.get_dimension_axes().index("X") src_data = box.__getitem__().get_values() tgt_data = numpy.average(src_data, axis=X_axis) # create target dimensions tgt_dims = create_tgt_dims(box, ["X"]) # now create a box to put the data in - all dimensions except X # get the attributes first and the scale factor and offset (if any) from them method_str = "longitude: zonal mean" history_str = datetime.now().isoformat() + " altered by CPDN: zonal mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=box.get_name(), off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_global_mean(box, mask=None): """Create an area-weighted global mean from a cpdn_box. Weights are automatically calculated from the box dimensions. The meaning will always work on the X & Y dimensions, operating over the other dimensions. For example, if the box contains a time dimensions, then a time-series will be created. If the box contains a Z dimension then a multi-level time-series will be created. Returns : a box containing the area-average means""" # get the the X and Y bounds X_bounds = box.get_dimension("X").get_bounds() Y_bounds = box.get_dimension("Y").get_bounds() # calculate the weights from these bounds wts = calc_aa_weights(X_bounds, Y_bounds, mask) wts = wts.squeeze() # get the position of the X and Y axes dim_axes = box.get_dimension_axes() X_axis = dim_axes.index("X") Y_axis = dim_axes.index("Y") # get the dimensions to iterate over it_dims = [] dim_lens = [] for d in box.get_dimensions(): axis = d.get_axis() if axis != "X" and axis != "Y": it_dims.append(axis) dim_lens.append(d.get_len()) # create the target data tgt_data = numpy.zeros(dim_lens, 'f') # create an iterator box_it = cpdn_box_iterator(box, it_dims) src_idx, tgt_idx = box_it.begin(True) # iterate over the box dimensions that are not X and Y while not box_it.end(): src_data = box[src_idx].get_values().squeeze() tgt_data[tgt_idx] = numpy.average(src_data, weights=wts) src_idx, tgt_idx = box_it.next(True) tgt_dims = create_tgt_dims(box, ["X","Y"]) # now create a box to put the data in - all dimensions except X & Y, same attributes # as original # get the attributes first and the scale factor and offset (if any) from them method_str = "longitude: latitude: area-weighted mean" history_str = datetime.now().isoformat() + " altered by CPDN: area-weighted mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=box.get_name(), glob_attrs=tgt_glob_attrs, off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def read_cpdnbox(var_ncfile, varname): var_nparray = numpy.zeros([13 * 6, 1, 145, 192], "f") # Ostia N96 Grid var = cpdn_box() var.load(var_ncfile, varname) var_nparray = var.get_values() return var_nparray
def read_cpdnbox(var_ncfile, varname): var_nparray=numpy.zeros([13*6,1,145,192],"f") # Ostia N96 Grid var=cpdn_box() var.load(var_ncfile,varname) var_nparray=var.get_values() return var_nparray
def cpdn_reshape(box, new_shape, new_dim_names=[], new_dim_axes=[]): """Reshape the box to take on the new shape - will create dummy dimensions if neccessary.""" # get the values src_data = box.get_values("true_value") # check that the size of the arrays match if numpy.product(new_shape) != numpy.product(src_data.shape): raise Exception("Box cannot be refactored into new shape - size differs.") # match dimensions up by length dls = box.get_dimension_lengths() box_dims = box.get_dimensions() dimension_map = [[-1,-1] for s in new_shape] for s in range(0, len(new_shape)): if new_shape[s] in dls: i = dls.index(new_shape[s]) if not i in dimension_map: # only add once dimension_map[s] = dls.index(new_shape[s]) # create a map between dimension and new shape else: dimension_map[s] = -1 else: dimension_map[s] = -1 # now create the new dimensions new_dimensions = [] n_new_dims = 0 for d in dimension_map: if d == -1: # create a new dimension if new_dim_names != []: nd_name = new_dim_names[n_new_dims] else: nd_name = "null_dim_"+str(n_new_dims) if new_dim_axes != []: nd_axis = new_dim_axes[n_new_dims] else: nd_axis = "N" new_dim = cpdn_boxdim(name=nd_name, vals=[0.0], attrs={}, axis=nd_axis) new_dimensions.append(new_dim) n_new_dims += 1 else: new_dimensions.append(box_dims[d]) # reshape the data tgt_data = numpy.reshape(src_data, new_shape) # create the box tgt_box = cpdn_box(name=box.get_name(), dims=new_dimensions, var_attrs=box.get_attributes(), glob_attrs=box.get_global_attributes(), off=box.get_off(), sf=box.get_sf(), data=tgt_data, rotated_grid=box.get_rotated_grid()) return tgt_box
def cpdn_period_mean(box, period): # get the axis number used for the "T" dimension T_axis = box.get_dimension_axes().index("T") src_data = box.__getitem__().get_values().squeeze() src_times = box.get_dimension("T").get_values("true_values") src_T_dim = box.get_dimension("T") # create target dimension sizes tgt_data_size = list(src_data.shape) tgt_data_size[T_axis] = tgt_data_size[T_axis]/period # create target data tgt_data = numpy.zeros(tgt_data_size, 'f') tgt_times = numpy.zeros([tgt_data_size[T_axis]], 'f') # create a source index src_idx = [] tgt_idx = [] for d in range(0, src_data.ndim): src_idx.append(slice(None, None, None)) tgt_idx.append(slice(None, None, None)) # amend the T dimension sd, tu, nd = src_T_dim.get_time_details() # now loop over the data for t in range(0, tgt_data_size[T_axis]): t0 = t * period t1 = t0 + period src_idx[T_axis] = slice(t0, t1) tgt_idx[T_axis] = t tgt_data[tgt_idx] = numpy.mean(src_data.__getitem__(src_idx), axis=T_axis) tgt_times[t] = src_times[t0] * tu + (src_times[t1-1] - src_times[t0]) * tu / 2.0 - sd # create the target dimensions tgt_dims = create_tgt_dims(box, ["T"]) tgt_T_dim = cpdn_boxdim(name=src_T_dim.get_name(), vals=tgt_times, attrs=src_T_dim.get_attributes(), axis="T", start_date=sd, time_units=tu, n_days_per_year=nd) tgt_dims[T_axis] = tgt_T_dim # amend the attributes to add the mean to the cell methods and to update the history method_str = "time: period mean" history_str = datetime.now().isoformat() + " altered by CPDN: temporal period mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=box.get_name(), glob_attrs=tgt_glob_attrs, off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_remap_latitude(box): # create an iterator over the source box it_dims = [] for d in box.get_dimensions(): axis = d.get_axis() if axis != "X" and axis != "Y": it_dims.append(axis) # create an iterator box_it = cpdn_box_iterator(box, it_dims) src_idx, tgt_idx = box_it.begin(True) # create the target data - same shape as the source box tgt_data = numpy.zeros(box.get_dimension_lengths(),'f') # loop through while not box_it.end(): data = box[src_idx].get_values().squeeze() # flip in the Y direction tgt_data[tgt_idx] = data[::-1,:] # move onto next src_idx, tgt_idx = box_it.next(True) # get the lat dim and flip the lat values LAT_dim = box.get_dimension("Y") LAT_dim_vals = LAT_dim.get_values()[::-1] # create the output dimensions out_dims = [] for d in box.get_dimensions(): axis = d.get_axis() if axis == "Y": lat_dim = cpdn_boxdim(LAT_dim.get_name(), LAT_dim_vals, LAT_dim.get_attributes(), "Y") out_dims.append(lat_dim) else: out_dims.append(d) # return a box out_box = cpdn_box(name=box.get_name(), dims=out_dims, var_attrs=box.get_attributes(), glob_attrs = box.get_global_attributes(), off=0.0, sf=1.0, data=tgt_data, rotated_grid=box.get_rotated_grid()) return out_box
def cpdn_meridional_mean(box, mask=None): """Create a meridional mean from a cpdn_box. The meaning will always work on the Y dimension, operating over the other dimensions. For example, if the box contains a time dimensions, then a time-series will be created. If the box contains a Z dimension then a multi-level time-series will be created. Returns : a box containing the meridional means""" # get the the X and Y bounds - only need the first X bounds as the meaning # is over a longitude line X_bounds = box.get_dimension("X").get_bounds()[0:1] Y_bounds = box.get_dimension("Y").get_bounds() # calculate the weights from these bounds wts = calc_aa_weights(X_bounds, Y_bounds, mask) # squeeze the weights as we will be squeezing the data later - no need to # transpose in this case wts = wts.squeeze() # get the source data src_data = box.__getitem__().get_values() # get the Y_axis index Y_axis = box.get_dimension_axes().index("Y") # create the target data tgt_data = numpy.average(src_data, axis=Y_axis, weights=wts) # create the target dimensions tgt_dims = create_tgt_dims(box, ["Y"]) # now create a box to put the data in - all dimensions except X # amend the attributes to add the mean to the cell methods and to update the history method_str = "latitude: meridional mean" history_str = datetime.now().isoformat() + " altered by CPDN: meridional mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=box.get_name(), glob_attrs=tgt_glob_attrs, off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def create_tgt_box(src_box, tgt_data, tgt_X, tgt_Y, method_str, history_str, mv=numpy.inf): # helper function to create the tgt output box # get the length of the dimensions first src_X = src_box.get_dimension("X").get_values() src_Y = src_box.get_dimension("Y").get_values() # create the output box tgt_attrs = amend_attributes(src_box.get_attributes(), method_str, history_str) # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 # copy the dimensions tgt_dims = [] for d in src_box.get_dimensions(): if d.get_axis() == "X": # create a new dimension with the tgt_X dimension values X_dim = cpdn_boxdim(name=d.get_name(), vals=tgt_X, attrs=d.get_attributes(), axis="X") tgt_dims.append(X_dim) elif d.get_axis() == "Y": # same as above for Y dim Y_dim = cpdn_boxdim(name=d.get_name(), vals=tgt_Y, attrs=d.get_attributes(), axis="Y") tgt_dims.append(Y_dim) else: tgt_dims.append(d) # amend the missing value if not numpy.isinf(mv): tgt_attrs["missing_value"] = mv # create the target box to return into tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=src_box.get_name(), off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_temporal_sum(box): """Create a temporal mean from a cpdn_box.""" # get the axis number used for the "T" dimension T_axis = box.get_dimension_axes().index("T") src_data = box.__getitem__().get_values() tgt_data = numpy.sum(src_data, axis=T_axis) # create target dimensions tgt_dims = create_tgt_dims(box, ["T"]) # now create a box to put the data in - all dimensions except T # get the attributes first and the scale factor and offset (if any) from them # amend the attributes to add the mean to the cell methods and to update the history method_str = "time: sum" history_str = datetime.now().isoformat() + " altered by CPDN: temporal sum." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, glob_attrs=tgt_glob_attrs, name=box.get_name(), off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_replace_mv(box, new_mv): # replace a missing_value with a new missing value # this is useful if the missing value isn't massively massive (e.g. -54 instead of 2e20) # get data values src_data = box.get_values() # get missing value and indices into data mv = box.get_missing_value() mv_idx = numpy.where(src_data == mv) # replace missing values src_data[mv_idx] = new_mv # create new box var_attrs = box.get_attributes() if not isinstance(var_attrs, NoneType): if "missing_value" in var_attrs.keys(): var_attrs["missing_value"] = new_mv if "_FillValue" in var_attrs.keys(): var_attrs["_FillValue"] = new_mv out_box = cpdn_box(name=box.get_name(), dims=box.get_dimensions(), var_attrs=var_attrs, glob_attrs=box.get_global_attributes(), off=0.0, sf=1.0, data=src_data, rotated_grid=box.get_rotated_grid()) return out_box
def get_values(self): """Return the box's values.""" temp_box = cpdn_box() temp_box.load(self.__filename, self.__varname) return temp_box.get_values()
def __getitem__(self, idx): """Return the box's getitem as the ensemble member getitem""" # have to load the box to get the item temp_box = cpdn_box() temp_box.load(self.__filename, self.__varname) return temp_box.__getitem__(idx)
def cpdn_remap_longitude(box): # get the box longitude and decide what to do LON_dim = box.get_dimension("X") LON = LON_dim.get_values() # create an iterator over the source box it_dims = [] for d in box.get_dimensions(): axis = d.get_axis() if axis != "X" and axis != "Y": it_dims.append(axis) # create an iterator box_it = cpdn_box_iterator(box, it_dims) src_idx, tgt_idx = box_it.begin(True) # create the target data - same shape as the source box tgt_data = numpy.zeros(box.get_dimension_lengths(),'f') # get where the longitude is less or greater than the date line / meridion if LON[0] < 0.0: lon_under_0 = numpy.where(LON < 0) s = lon_under_0[0][0] e = lon_under_0[0][-1] + 1 LON[lon_under_0] += 360 LON.sort() under_0 = True p = box.get_dimension("X").get_length() - e else: lon_over_180 = numpy.where(LON > 180) s = lon_over_180[0][0] e = lon_over_180[0][-1] + 1 LON[lon_over_180] -= 360 LON.sort() under_0 = False p = box.get_dimension("X").get_length() - s # loop through while not box_it.end(): data = box[src_idx].get_values().squeeze() if under_0: # remap to 0 to 360 tgt_full_idx = list(tgt_idx) tgt_full_idx.append(slice(None, None, None)) tgt_full_idx.append(slice(p, None, None)) tgt_data[tgt_full_idx] = data[:,s:e] tgt_full_idx = list(tgt_idx) tgt_full_idx.append(slice(None, None, None)) tgt_full_idx.append(slice(0, p, None)) tgt_data[tgt_full_idx] = data[:,e:] else: # remap to -180 to 180 tgt_full_idx = list(tgt_idx) tgt_full_idx.append(slice(None, None, None)) tgt_full_idx.append(slice(0, p, None)) tgt_data[tgt_full_idx] = data[:,s:e] tgt_full_idx = list(tgt_idx) tgt_full_idx.append(slice(None, None, None)) tgt_full_idx.append(slice(p, None, None)) tgt_data[tgt_full_idx] = data[:,0:s] # move onto next src_idx, tgt_idx = box_it.next(True) # create the output dimensions out_dims = [] for d in box.get_dimensions(): axis = d.get_axis() if axis == "X": lon_dim = cpdn_boxdim(LON_dim.get_name(), LON, LON_dim.get_attributes(), "X") out_dims.append(lon_dim) else: out_dims.append(d) # return a box out_box = cpdn_box(name=box.get_name(), dims=out_dims, var_attrs=box.get_attributes(), glob_attrs = box.get_global_attributes(), off=0.0, sf=1.0, data=tgt_data, rotated_grid=box.get_rotated_grid()) return out_box
def cpdn_ensemble_mean(ensemble): """Create an ensemble mean from a cpdn_ensemble. Each ensemble member is equally weighted.""" # get the start date, end date and time period from the ensemble sd = ensemble.get_start_date(t_mode="value") ed = ensemble.get_end_date(t_mode="value") pd = ensemble.get_time_period() out_data = None time_data = [] # iterate over the days for d in numpy.arange(sd, ed+pd, pd): sub_ens = ensemble.subset_by_date(d) ens_mems = sub_ens.get_members() # need the start date of the ensemble member sde, tue, n_days_py = ens_mems[0].get_t_dim().get_time_details() if debug==True: # debug output print "Date " + float_to_daytime(d, n_days_py).isoformat(" ") +\ " ensemble members " + str(len(ens_mems)) # get the weight - 1.0 / number of ensemble members ew = 1.0 / len(ens_mems) # get the time dimension axis number T_axis = ens_mems[0].get_box().get_dimension_axes().index("T") # now do for the rest of the ensemble members - but adding to the weighted sum c_data = None for e in range(0, len(ens_mems)): # let the current index be the ensemble member c_idx = e # set a flag that a value hasn't been assigned val_assigned = False # keep going until a value is assigned while not val_assigned: # try getting a value try: val = ew * ens_mems[c_idx][d-sde].get_values() val_assigned = True except: # if there is an exception then the value could not be assigned # and so the ensemble member is invalid # try again with a random ensemble member c_idx = int(random.uniform(0, len(ens_mems))) # assign to c_data if isinstance(c_data, NoneType): c_data = val else: c_data += val # do we have to create the data (in the timeseries) or just append to it? if isinstance(out_data, NoneType): out_data = c_data else: out_data = numpy.concatenate([out_data, c_data], axis=T_axis) time_data.append(d) # now create a box containing the data and the new T axis # use first ensemble members box as the template src_box = ensemble.subset_by_date(sd).get_members()[0].get_box() # get the dimensions - and find the time axis dimension tgt_dims = src_box.get_dimensions() T_axis = src_box.get_dimension_axes().index("T") ##### steps to create a new time dimension # get the time units, start date and number of days per year - start date is 0, though sdate, timeu, n_days_py = ens_mems[0].get_t_dim().get_time_details() # get the old attributes and modify the source date time_attrs = tgt_dims[T_axis].get_attributes() # get the start_date as a daytime sd_dt = float_to_daytime(sd, n_days_py) time_attrs['units'] = "days since " + sd_dt.isoformat(" ") # minus sd from time_data after converting to numpy array time_data = numpy.array(time_data)-sd # create a new time dimension and overwrite the existing dimension tgt_t_dim = cpdn_boxdim(name=tgt_dims[T_axis].get_name(), vals=time_data, attrs=time_attrs, axis="T", start_date=sd, time_units=timeu, n_days_per_year=n_days_py) tgt_dims[T_axis] = tgt_t_dim ##### # modify the variables attributes - add a cell method and history method_str = "ensemble mean" history_str = datetime.now().isoformat() + " altered by CPDN: ensemble mean." tgt_attrs = amend_attributes(src_box.get_attributes(), method_str, history_str) tgt_glob_attrs = src_box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 # create the box and return tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, name=src_box.get_name(), glob_attrs=tgt_glob_attrs, off=tgt_off, sf=tgt_sf, data=out_data) return tgt_box
def cpdn_climatological_seasonal_mean(box): """Create a climatological seasonal mean from a cpdn_box. i.e. all DJF meaned, all MAM, all JJA, all SON meaned.""" # get the time dimension and its values T_axis = box.get_dimension_axes().index("T") T_vals = box.get_dimension("T").get_values("daytime") time_vals_list = [[] for m in range(0, 4)] # loop through the T_vals and build 4 lists (one for each month) of time values for t_val in T_vals: # DJF if t_val.month in [12, 1, 2]: time_vals_list[0].append(t_val) # MAM elif t_val.month in [3, 4, 5]: time_vals_list[1].append(t_val) # JJA elif t_val.month in [6, 7, 8]: time_vals_list[2].append(t_val) # DJF elif t_val.month in [9, 10, 11]: time_vals_list[3].append(t_val) # now produce the seasonal means, need an array to store them in - first # determine shape tgt_shape = [] z_dim = "Z" in box.get_dimension_axes() for d in box.get_dimensions(): if d.get_axis() != "T": tgt_shape.append(d.get_len()) else: tgt_shape.append(4) tgt_data = numpy.zeros(tgt_shape, 'f') tgt_sum = numpy.zeros(tgt_shape, 'f') mv = box.get_missing_value() # now loop through and add to the target data, weighted by 1.0/number of jans etc. src_idx = [slice(None, None, None) for d in range(0,box.get_ndims())] for m in range(0, 4): for sm in range(0, len(time_vals_list[m])): # get the data src_idx[T_axis] = time_vals_list[m][sm] data = box.__getitem__(src_idx).get_values().squeeze() # get where the data does not equal the mv non_mv_idx = numpy.where(data != mv) # add the data and sum if z_dim: tgt_data[m][0][non_mv_idx] += data[non_mv_idx] tgt_sum[m][0][non_mv_idx] += 1 else: tgt_data[m][non_mv_idx] += data[non_mv_idx] tgt_sum[m][non_mv_idx] += 1 # divide through by the sum to get the mean tgt_data[m] = tgt_data[m] / tgt_sum[m] # set where the sum is 0 to be the mv if not numpy.isinf(mv): tgt_data[m][tgt_sum[m] == 0] = mv # create the target dimensions tgt_dims = [] start_year = T_vals[0].year end_year = T_vals[-1].year for d in box.get_dimensions(): if d.get_axis() == "T": # get the start date, units and number of days in the year sd, un, nd = d.get_time_details() # create the values and the bounds vals = [(x*90*un)+15 for x in range(0, 4)] bnds = numpy.array([[vals[x], vals[x]+(end_year-start_year)*nd+90*un] for x in range(0, 4)]) # create the time dimension and append td = cpdn_boxdim(name=d.get_name(), vals=vals, attrs=d.get_attributes(), axis="T", bounds=bnds, start_date=sd, time_units=un, n_days_per_year=nd) tgt_dims.append(td) else: tgt_dims.append(d) # create the return box method_str = "time: climatological seasonal mean" history_str = datetime.now().isoformat() + " altered by CPDN: climatological seasonal mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, glob_attrs=tgt_glob_attrs, name=box.get_name(), off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_climatological_month_mean(box): """Create a climatological month mean from a cpdn_box. i.e. all Jans meaned, all Febs meaned etc.""" # get the time dimension and its values T_axis = box.get_dimension_axes().index("T") T_vals = box.get_dimension("T").get_values("daytime") time_vals_list = [[] for m in range(0, 12)] # get missing value mv = box.get_missing_value() # get missing value indices - assume same across all time periods mv_idx = numpy.where(box[0].get_values().squeeze() == mv) # loop through the T_vals and build 12 lists (one for each month) of time values for t_val in T_vals: time_vals_list[t_val.month-1].append(t_val) # now produce the month means, need an array to store them in - first # determine shape tgt_shape = [] for d in box.get_dimensions(): if d.get_axis() != "T": tgt_shape.append(d.get_len()) else: tgt_shape.append(12) tgt_data = numpy.zeros(tgt_shape, 'f') # now loop through and add to the target data, weighted by 1.0/number of jans etc. src_idx = [slice(None, None, None) for d in range(0,box.get_ndims())] for m in range(0, 12): mw = 1.0 / len(time_vals_list[m]) for sm in range(0, len(time_vals_list[m])): src_idx[T_axis] = time_vals_list[m][sm] v = numpy.mean(box.__getitem__(src_idx).get_values(), axis=T_axis) tgt_data[m] += mw * v # reinstate missing values tgt_data[m][mv_idx] = mv # create the target dimensions tgt_dims = [] start_year = T_vals[0].year end_year = T_vals[-1].year for d in box.get_dimensions(): if d.get_axis() == "T": # get the start date, units and number of days in the year sd, un, nd = d.get_time_details() # create the values and the bounds vals = [] for m in range(0, 12): if nd == 360: vals.append(m*30*un) else: vals.append(days_elapsed[m]*un) bnds = numpy.array([[vals[x], vals[x]+(end_year-start_year)*nd+30] for x in range(0, 12)]) # create the time dimension and append td = cpdn_boxdim(name=d.get_name(), vals=vals, attrs=d.get_attributes(), axis="T", bounds=bnds, start_date=sd, time_units=un, n_days_per_year=nd) tgt_dims.append(td) else: tgt_dims.append(d) # create the return box method_str = "time: climatological month mean" history_str = datetime.now().isoformat() + " altered by CPDN: climatological month mean." tgt_attrs = amend_attributes(box.get_attributes(), method_str, history_str) tgt_glob_attrs = box.get_global_attributes() # overwrite tgt_off and tgt_sf as get_values scales by these tgt_off = 0.0 tgt_sf = 1.0 tgt_box = cpdn_box(dims=tgt_dims, var_attrs=tgt_attrs, glob_attrs=tgt_glob_attrs, name=box.get_name(), off=tgt_off, sf=tgt_sf, data=tgt_data) return tgt_box
def cpdn_spatial_smooth(src_box, w_x=3, w_y=None, type="flat"): """Spatially smooth (in X and Y dimensions) data contained within a box.""" # create or copy the window if isinstance(w_x, numpy.ndarray): win = w_x / numpy.sum(w_x) elif isinstance(w_x, int): if isinstance(w_y, NoneType): w_y = w_x if type == "flat": win = numpy.ones([w_x*2+1,w_y*2+1], 'f') / (w_x*w_y*4) win = win / numpy.sum(win) elif type == "gauss": x,y = numpy.mgrid[-w_x:w_x+1, -w_y:w_y+1] win = numpy.exp(-(x**2/float(w_x)+y**2/float(w_y))) win = win / numpy.sum(win) else: raise Exception("Unknown window type") # create the target data - same shape as the source box tgt_data = numpy.zeros(src_box.get_dimension_lengths(),'f') # create an iterator over the source box it_dims = [] for d in src_box.get_dimensions(): axis = d.get_axis() if axis != "X" and axis != "Y": it_dims.append(axis) # create an iterator box_it = cpdn_box_iterator(src_box, it_dims) src_idx, tgt_idx = box_it.begin(True) # get missing value - don't want to convolve this mv = src_box.get_missing_value() # loop through while not box_it.end(): # get the source data src_data = src_box[src_idx].get_values().squeeze() # check whether the data can be smoothed quickly using convolve if not numpy.isinf(mv): mv_in_src = numpy.where(numpy.abs(src_data) > mv*0.9)[0].shape[0] > 0 else: mv_in_src = False if not numpy.isinf(mv) and not mv_in_src: # convolve the data with the window - ensure it's the same size and wrap around # the date line (and the poles, unfortunately) c_data = convolve2d(win, src_data, mode="same", boundary="wrap") tgt_data[tgt_idx] = numpy.reshape(c_data, src_data.shape) else: # do the slow dance with missing data win_f = win.flatten() for j in range(0, src_data.shape[0]): for i in range(0, src_data.shape[1]): if abs(src_data[j,i]) > 1000: #abs(mv*0.9): tgt_data[tgt_idx][j,i] = mv else: # get the list of indices idx_list = get_smoothing_window(j, i, src_data.shape[0], src_data.shape[1], w_x, w_y) # average using the list of indices sum = 0.0 n = 0 for k in range(0, len(idx_list)): idx = idx_list[k] y = idx[0] x = idx[1] v = src_data[y,x] if abs(v) < 1000: #abs(mv*0.9): sum += v * win_f[k] n += win_f[k] if n == 0: tgt_data[tgt_idx][j,i] = mv else: tgt_data[tgt_idx][j,i] = sum/n # iterate to next time / z / etc. src_idx, tgt_idx = box_it.next(True) # smoothing does weird things to the poles (due to wrap around boundary conditions) # so reinstate the poles Y_axis = src_box.get_dimension_axes().index("Y") idxs = [] for x in range(0, src_box.get_ndims()): idxs.append(slice(None, None, None)) # north pole / top idxs[Y_axis] = 0 tgt_data[idxs] = src_box.get_values()[idxs] # south pole / bottom idxs[Y_axis] = -1 tgt_data[idxs] = src_box.get_values()[idxs] # create return box, all the same except for attributes and history method_str = "longitude: latitude: kernel smoothing" history_str = datetime.now().isoformat() + " altered by CPDN: area-weighted mean." tgt_attrs = amend_attributes(src_box.get_attributes(), method_str, history_str) # create return box tgt_box = cpdn_box(dims=src_box.get_dimensions(), var_attrs=tgt_attrs, name=src_box.get_name(), glob_attrs=src_box.get_global_attributes(), off=0.0, sf=1.0, data=tgt_data) return tgt_box
def get_box(self): """Return the box associated with the ensemble member""" # have to load the box first temp_box = cpdn_box() temp_box.load(self.__filename, self.__varname) return temp_box
from cpdn_smooth import * from map_plot import map_plot from os.path import expanduser as EU import matplotlib.cm as cm ############################################################################### ### tests ### if __name__ == "__main__": test_number = 16 if test_number == 0: path = EU("/Volumes/Macintosh HD2/shared/sas_test2/") file = "nhqhma.pcg7oct.nc" box = cpdn_box() box.load(path+file, "field16") print box.get_dimension("Y").get_values() # # file with no bounds data so will guess bounds # path = "/home/orwell/cpdn/massey/HadAM3P_Output/1960/hadam3p_m001_1960_2_006101835_0/" # file = "m001ma.pag0dec.nc" # box = cpdn_box() # box.load(path+file, "field16") # x = box["1960-12-01":"1961-01-02", 0, :, :] # print x.get_values() elif test_number == 1: # file with bounds data so will read them in path = "/home/ares/mad/rye/data/cmip5/CMIP5/output/MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/tas/"