def test_stack_fails(): # Should use concatenate instead, because axis is not new ! a = DimArray([1,2,3], dims=['x0']) b = DimArray([11,22,33], dims=['x0']) with pytest.raises(ValueError): c_got = stack([a, b], axis='x0')
def test_stack_align(): a = DimArray([1,2,3], axes=[[0,1,2]], dims=['x0']) b = DimArray([33,11], axes=[[2,0]], dims=['x0']) c_got = stack([b, a], axis='stackdim', align=True, sort=True, keys=['a','b']) c_got_ds = stack_ds(_make_datasets(b, a), axis='stackdim', align=True, sort=True, keys=['a','b']) c = DimArray([[11., np.nan, 33.], [ 1., 2., 3.]], axes=[['a', 'b'], [0, 1, 2]], dims=['stackdim', 'x0']) assert_equal_dimarrays(c_got, c) assert_equal_dimarrays(c_got_ds['a'], c)
def test_stack(): a = DimArray([1,2,3], dims=['x0']) b = DimArray([11,22,33], dims=['x0']) c = DimArray([[ 1, 2, 3], [11, 22, 33]], axes=[['a', 'b'], [0, 1, 2]], dims=['stackdim', 'x0']) c_got = stack([a, b], axis='stackdim', keys=['a','b']) c_got_ds = stack_ds(_make_datasets(a, b), axis='stackdim', keys=['a','b']) assert_equal_dimarrays(c_got, c) assert_equal_dimarrays(c_got_ds['a'], c)
def percentile(a, pct, axis=0, newaxis=None, out=None, overwrite_input=False): """ calculate percentile along an axis Parameters ---------- pct: float, percentile or sequence of percentiles (0< <100) axis, optional, default 0: axis along which to compute percentiles newaxis, optional: name of the new percentile axis, if more than one pct. By default, append "_percentile" to the axis name on which the transformation is applied. out, overwrite_input: passed to numpy's percentile method (see documentation) Returns ------- pctiles: DimArray or scalar whose required axis has been reduced or replaced by percentiles Examples -------- >>> from dimarray import DimArray >>> np.random.seed(0) # for reproductibility of results >>> a = DimArray(np.random.randn(1000), dims=['sample']) >>> percentile(a, 50) -0.058028034799627745 >>> percentile(a, [50, 95]) dimarray: 2 non-null elements (0 null) 0 / sample_percentile (2): 50 to 95 array([-0.05802803, 1.66012041]) """ if not isinstance(a, da.DimArray): raise TypeError("Expected DimArray instance got {} of type {}".format(a, type(a))) pos, nm = a._get_axis_info(axis) results = np.percentile(a.values, pct, axis=pos, out=out, overwrite_input=overwrite_input) # If the result is scalar (pct is scalar and ), just return it if np.isscalar(results): return results # for scalar pct, results is a numpy array. Just reduce the axis. subaxes = [ax for ax in a.axes if ax.name != nm] if np.isscalar(pct): results = da.DimArray(results, axes=subaxes) # pct is array-like, recreate a Dimarray else: if newaxis is None: newaxis = nm + '_percentile' results = [da.DimArray(res, axes=subaxes) for res in results] # list of DimArrays results = da.stack(results, keys=pct, axis=newaxis) # stack in a larger DimArray return results
def cflon(f, altmin, latbounds): # altmin is an array v = vcm.VCM(f, verbose=False) out = da.Dataset() lon_axis = da.Axis(lonbins[:-1], 'lon') # number of profiles per lon bin h, xx = np.histogram(v.lon, bins=lonbins) out['nprof'] = da.DimArray(h, [lon_axis]) out['nprof'].longname = 'Number of measured profiles' for n in names: cv = v.get_vcm(n) assert cv is not None # clip latitudes latidx = (v.lat >= latbounds[0]) & (v.lat < latbounds[1]) cv = np.take(cv, latidx, axis=0) lon = np.take(v.lon, latidx, axis=0) outdict = dict() for a in altmin: idx = np.where(v.altitude >= a)[0] cloudy = np.take(cv, idx, axis=1) cloudy = np.sum(cloudy, axis=1) np.clip(cloudy, 0, 1, out=cloudy) h, xx = np.histogram(v.lon, bins=lonbins, weights=cloudy) outdict[a] = da.DimArray(h, [ lon_axis, ]) outname = n + '_cprof' out[outname] = da.stack(outdict, axis='altmin') out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + n return out
def cflon(f, altmin, latbounds): # altmin is an array v = vcm.VCM(f, verbose=False) out = da.Dataset() lon_axis = da.Axis(lonbins[:-1], 'lon') # number of profiles per lon bin h, xx = np.histogram(v.lon, bins=lonbins) out['nprof'] = da.DimArray(h, [lon_axis]) out['nprof'].longname = 'Number of measured profiles' for n in names: cv = v.get_vcm(n) assert cv is not None # clip latitudes latidx = (v.lat >= latbounds[0]) & (v.lat < latbounds[1]) cv = np.take(cv, latidx, axis=0) lon = np.take(v.lon, latidx, axis=0) outdict = dict() for a in altmin: idx = np.where(v.altitude >= a)[0] cloudy = np.take(cv, idx, axis=1) cloudy = np.sum(cloudy, axis=1) np.clip(cloudy, 0, 1, out=cloudy) h, xx = np.histogram(v.lon, bins=lonbins, weights=cloudy) outdict[a] = da.DimArray(h, [lon_axis,]) outname = n + '_cprof' out[outname] = da.stack(outdict, axis='altmin') out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + n return out
print(model) tmp_1 = {} for state, style in state_dict.items(): print(state) tmp_2 = {} for corWith_name in ['EKE', 'SPI3']: all_files = glob.glob(working_path + 'reg_cor/' + model + '/cor_' + corWith_name + '*' + scenario + '*_' + state + '.nc') tmp_3 = {} for file_name in all_files: region = file_name.split('_')[-2] if region != 'NHml': tmp = da.stack(da.read_nc(file_name), axis='statistic', align=True) tmp_3[region] = tmp.mean(axis=(-2, -1)) tmp_3[region].values = np.nanmean(tmp, axis=(-2, -1)) all_files = glob.glob(working_path + 'reg_stats/' + model + '/stats_' + corWith_name + '*' + scenario + '*_' + state + '.nc') tmp_4 = {} for file_name in all_files: region = file_name.split('_')[-2] if region != 'NHml': tmp = da.stack(da.read_nc(file_name), axis='statistic', align=True) tmp_4[region] = tmp.mean(axis=(-2, -1))
tmp_1 = {} for state, style in state_dict.items(): print(state) tmp_2 = {} for corWith_name in ['EKE', 'SPI3']: print(corWith_name) hist_files = glob.glob(working_path + model + '/cor_' + corWith_name + '_' + '_'.join([model, 'All-Hist', '*', state]) + '.nc') hist = {} for hist_file in hist_files: region = hist_file.split('_')[-2] hist[region] = da.stack(da.read_nc(hist_file), axis='statistic', align=True) hist[region].lat = np.round(hist[region].lat, 02) hist[region].lon = np.round(hist[region].lon, 02) hist = da.stack(hist, align=True, axis='region') fut_files = glob.glob( working_path + model + '/cor_' + corWith_name + '_' + '_'.join([model, 'Plus20-Future', '*', state]) + '.nc') fut = {} for fut_file in fut_files: region = fut_file.split('_')[-2] fut[region] = da.stack(da.read_nc(fut_file), axis='statistic', align=True) fut[region].lat = np.round(fut[region].lat, 02)
def transform_vectors(u, v, to_crs, from_crs=None, \ xt=None, yt=None, **kwargs): """ Transform vector field array into a new coordinate system and \ interpolate values onto a new regular grid Assume the vector field is represented by an array of shape (2, Ny, Nx) Parameters ---------- u, v : GeoArray or other DimArray instances x- and y- vector components to_crs : str or dict or cartopy.crs.CRS instance grid mapping onto which the transformation should be done str : PROJ.4 str or cartopy.crs.CRS class name dict : CF parameters from_crs : idem, optional original grid mapping. Can be omitted if the grid_mapping attribute already contains the appropriate information, or if the horizontal coordinates are longitude and latitude. xt, yt : array-like (1-D), optional new coordinates to interpolate the array on will be deduced as min and max of new coordinates if not provided **kwargs: passed to scipy.interpolate.RegularGridInterpolator error_bounds (True), method (linear), fill_value (np.nan) Returns ------- transformed : GeoArray new 3-D GeoArray transformed and interpolated """ # back compat _masked = kwargs.pop('masked', None) if _masked is not None: warnings.warn('masked is deprecated.', DeprecationWarning) if not isinstance(u, DimArray) or not isinstance(v, DimArray): raise TypeError("u and v must be DimArray instances") if not isinstance(u, GeoArray): u = GeoArray(u) if not isinstance(v, GeoArray): v = GeoArray(v) # consistency check between u and v assert u.axes == v.axes , "u and v must have the same axes" if from_crs is None and hasattr(u, 'grid_mapping'): assert hasattr(v, 'grid_mapping') and u.grid_mapping == v.grid_mapping, 'u and v must have the same grid mapping' # get grid mapping instances from_crs = _get_crs(from_crs, u) to_crs = _get_crs(to_crs) # find horizontal coordinates x0, y0 = _check_horizontal_coordinates(u) # Transform coordinates and prepare regular grid for interpolation x0_interp, y0_interp, xt, yt = _inverse_transform_coords(from_crs, to_crs, xt, yt, x0, y0) # Transform vector components x0_2d, y0_2d = np.meshgrid(x0, y0) _new_points = np.array([y0_interp.flatten(), x0_interp.flatten()]).T def _interp_map(x, y, z): f = RegularGridInterpolator((y, x), z, **kwargs) return f(_new_points).reshape(x0_interp.shape) _constructor = u._constructor if u.ndim == 2: # First transform vector components onto the new coordinate system _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, u.values, v.values) # Then interpolate onto regular grid #_ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked) _ui = _interp_map(x0.values, y0.values, _ut) ut = _constructor(_ui, [yt, xt]) #_vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked) _vi = _interp_map(x0.values, y0.values, _vt) vt = _constructor(_vi, [yt, xt]) else: # first reshape to 3-D components, flattening everything except horizontal coordinates # TODO: optimize by computing and re-using weights? obj = stack([u, v], axis='vector_components', keys=['u','v']) obj = obj.flatten(('vector_components', x0.name, y0.name), reverse=True, insert=0) # newvalues = [] for k, suba in obj.iter(axis=0): # iterate over the first dimension # First transform vector components onto the new coordinate system _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, suba.values[0], suba.values[1]) # Then interpolate onto regular grid #_ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked) _ui = _interp_map(x0.values, y0.values, _ut) #_vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked) _vi = _interp_map(x0.values, y0.values, _vt) newvalues.append(np.array([_ui, _vi])) # stack the arrays together newvalues = np.array(newvalues) # 4-D : flattened, vector_components, y, x flattened_obj = _constructor(newvalues, [obj.axes[0], obj.axes[1], yt, xt]) ut, vt = flattened_obj.unflatten(axis=0).swapaxes('vector_components',0) # add metadata ut.attrs.update(u.attrs) vt.attrs.update(v.attrs) _add_grid_mapping_metadata(ut, to_crs) _add_grid_mapping_metadata(vt, to_crs) return ut, vt
def transform_vectors(u, v, to_crs, from_crs=None, \ xt=None, yt=None, masked=np.nan): """ Transform vector field array into a new coordinate system and \ interpolate values onto a new regular grid Assume the vector field is represented by an array of shape (2, Ny, Nx) Parameters ---------- u, v : GeoArray or other DimArray instances x- and y- vector components to_crs : str or dict or cartopy.crs.CRS instance grid mapping onto which the transformation should be done str : PROJ.4 str or cartopy.crs.CRS class name dict : CF parameters from_crs : idem, optional original grid mapping. Can be omitted if the grid_mapping attribute already contains the appropriate information, or if the horizontal coordinates are longitude and latitude. xt, yt : array-like (1-D), optional new coordinates to interpolate the array on will be deduced as min and max of new coordinates if not provided masked : bool or number, optional If False, interpolated values outside the range of input grid will be clipped to values on boundary of input grid If True, points outside the range of input grid are masked (set to NaN) If masked is set to a number, then points outside the range of xin and yin will be set to that number. Default is nan. Returns ------- transformed : GeoArray new 3-D GeoArray transformed and interpolated """ if not isinstance(u, DimArray) or not isinstance(v, DimArray): raise TypeError("u and v must be DimArray instances") if not isinstance(u, GeoArray): u = GeoArray(u) if not isinstance(v, GeoArray): v = GeoArray(v) # consistency check between u and v assert u.axes == v.axes , "u and v must have the same axes" if from_crs is None and hasattr(u, 'grid_mapping'): assert hasattr(v, 'grid_mapping') and u.grid_mapping == v.grid_mapping, 'u and v must have the same grid mapping' # get grid mapping instances from_crs = _get_crs(from_crs, u) to_crs = _get_crs(to_crs) # find horizontal coordinates x0, y0 = _check_horizontal_coordinates(u) # Transform coordinates and prepare regular grid for interpolation x0_interp, y0_interp, xt, yt = _inverse_transform_coords(from_crs, to_crs, xt, yt, x0, y0) # Transform vector components x0_2d, y0_2d = np.meshgrid(x0, y0) if masked is True: masked = np.nan # use NaN instead of MaskedArray _constructor = u._constructor if u.ndim == 2: # First transform vector components onto the new coordinate system _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, u.values, v.values) # Then interpolate onto regular grid _ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked) ut = _constructor(_ui, [yt, xt]) _vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked) vt = _constructor(_vi, [yt, xt]) else: # first reshape to 3-D components, flattening everything except horizontal coordinates # TODO: optimize by computing and re-using weights? obj = stack([u, v], axis='vector_components', keys=['u','v']) obj = obj.group(('vector_components', x0.name, y0.name), reverse=True, insert=0) # newvalues = [] for k, suba in obj.iter(axis=0): # iterate over the first dimension # First transform vector components onto the new coordinate system _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, suba.values[0], suba.values[1]) # Then interpolate onto regular grid _ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked) _vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked) newvalues.append(np.array([_ui, _vi])) # stack the arrays together newvalues = np.array(newvalues) # 4-D : grouped, vector_components, y, x grouped_obj = _constructor(newvalues, [obj.axes[0], obj.axes[1], yt, xt]) ut, vt = grouped_obj.ungroup(axis=0).swapaxes('vector_components',0) # add metadata ut._metadata(u._metadata()) vt._metadata(v._metadata()) _add_grid_mapping_metadata(ut, to_crs) _add_grid_mapping_metadata(vt, to_crs) return ut, vt