'mapas/BR_Municipios_2019.shp')) # centroides lon = brasil.geometry.centroid.geometry.x.values lat = brasil.geometry.centroid.geometry.y.values # set correct path of the variables path_var = '/home/alexandre/Dropbox/ParaUbuntu/netcdfgrid3/' # getting NetCDF files prec = xr.open_mfdataset(path_var + 'prec_daily_UT_Brazil_v2*.nc')['prec'] Tmax = xr.open_mfdataset(path_var + 'Tmax_daily_UT_Brazil_v2*.nc')['Tmax'] Tmin = xr.open_mfdataset(path_var + 'Tmin_daily_UT_Brazil_v2*.nc')['Tmin'] # exportando pd.DataFrame(prec.sel(longitude=xr.DataArray(lon, dims='z'), latitude=xr.DataArray(lat, dims='z'), method='nearest').values, columns=brasil.CD_MUN, index=prec.time.values).to_csv('prec_mun.csv', float_format='%.1f') pd.DataFrame(Tmax.sel(longitude=xr.DataArray(lon, dims='z'), latitude=xr.DataArray(lat, dims='z'), method='nearest').values, columns=brasil.CD_MUN, index=Tmax.time.values).to_csv('Tmax_mun.csv', float_format='%.1f') pd.DataFrame(Tmin.sel(longitude=xr.DataArray(lon, dims='z'), latitude=xr.DataArray(lat, dims='z'),
def reproject( self, dst_crs, resolution=None, shape=None, transform=None, resampling=Resampling.nearest, nodata=None, **kwargs, ): """ Reproject :obj:`xarray.DataArray` objects Powered by :func:`rasterio.warp.reproject` .. note:: Only 2D/3D arrays with dimensions 'x'/'y' are currently supported. Requires either a grid mapping variable with 'spatial_ref' or a 'crs' attribute to be set containing a valid CRS. If using a WKT (e.g. from spatiareference.org), make sure it is an OGC WKT. .. versionadded:: 0.0.27 shape .. versionadded:: 0.0.28 transform .. versionadded:: 0.5.0 nodata, kwargs Parameters ---------- dst_crs: str OGC WKT string or Proj.4 string. resolution: float or tuple(float, float), optional Size of a destination pixel in destination projection units (e.g. degrees or metres). shape: tuple(int, int), optional Shape of the destination in pixels (dst_height, dst_width). Cannot be used together with resolution. transform: Affine, optional The destination transform. resampling: rasterio.enums.Resampling, optional See :func:`rasterio.warp.reproject` for more details. nodata: float, optional The nodata value used to initialize the destination; it will remain in all areas not covered by the reprojected source. Defaults to the nodata value of the source image if none provided and exists or attempts to find an appropriate value by dtype. **kwargs: dict Additional keyword arguments to pass into :func:`rasterio.warp.reproject`. To override: - src_transform: `rio.write_transform` - src_crs: `rio.write_crs` - src_nodata: `rio.write_nodata` Returns ------- :obj:`xarray.DataArray`: The reprojected DataArray. """ if resolution is not None and (shape is not None or transform is not None): raise RioXarrayError( "resolution cannot be used with shape or transform.") if self.crs is None: raise MissingCRS( "CRS not found. Please set the CRS with 'rio.write_crs()'." f"{_get_data_var_message(self._obj)}") gcps = self.get_gcps() if gcps: kwargs.setdefault("gcps", gcps) src_affine = None if "gcps" in kwargs else self.transform(recalc=True) if transform is None: dst_affine, dst_width, dst_height = _make_dst_affine( self._obj, self.crs, dst_crs, resolution, shape, **kwargs) else: dst_affine = transform if shape is not None: dst_height, dst_width = shape else: dst_height, dst_width = self.shape dst_data = self._create_dst_data(dst_height, dst_width) dst_nodata = self._get_dst_nodata(nodata) rasterio.warp.reproject( source=self._obj.values, destination=dst_data, src_transform=src_affine, src_crs=self.crs, src_nodata=self.nodata, dst_transform=dst_affine, dst_crs=dst_crs, dst_nodata=dst_nodata, resampling=resampling, **kwargs, ) # add necessary attributes new_attrs = _generate_attrs(self._obj, dst_nodata) # make sure dimensions with coordinates renamed to x,y dst_dims = [] for dim in self._obj.dims: if dim == self.x_dim: dst_dims.append("x") elif dim == self.y_dim: dst_dims.append("y") else: dst_dims.append(dim) xda = xarray.DataArray( name=self._obj.name, data=dst_data, coords=_make_coords(self._obj, dst_affine, dst_width, dst_height), dims=tuple(dst_dims), attrs=new_attrs, ) xda.encoding = self._obj.encoding xda.rio.write_transform(dst_affine, inplace=True) xda.rio.write_crs(dst_crs, inplace=True) xda.rio.write_coordinate_system(inplace=True) return xda
def generate_terrain(agg: xr.DataArray, x_range: tuple = (0, 500), y_range: tuple = (0, 500), seed: int = 10, zfactor: int = 4000, full_extent: Optional[Union[Tuple, List]] = None, name: str = 'terrain' ) -> xr.DataArray: """ Generates a pseudo-random terrain which can be helpful for testing raster functions. Parameters ---------- x_range : tuple, default=(0, 500) Range of x values. x_range : tuple, default=(0, 500) Range of y values. seed : int, default=10 Seed for random number generator. zfactor : int, default=4000 Multipler for z values. full_extent : str, default=None bbox<xmin, ymin, xmax, ymax>. Full extent of coordinate system. Returns ------- terrain : xr.DataArray 2D array of generated terrain values. References ---------- - Michael McHugh: https://www.youtube.com/watch?v=O33YV4ooHSo - Red Blob Games: https://www.redblobgames.com/maps/terrain-from-noise/ Examples -------- .. plot:: :include-source: >>> import numpy as np >>> import xarray as xr >>> from xrspatial import generate_terrain >>> W = 400 >>> H = 300 >>> data = np.zeros((H, W), dtype=np.float32) >>> raster = xr.DataArray(data, dims=['y', 'x']) >>> xrange = (-20e6, 20e6) >>> yrange = (-20e6, 20e6) >>> seed = 2 >>> zfactor = 10 >>> terrain = generate_terrain(raster, xrange, yrange, seed, zfactor) >>> terrain.plot.imshow() """ height, width = agg.shape if full_extent is None: full_extent = (x_range[0], y_range[0], x_range[1], y_range[1]) elif not isinstance(full_extent, (list, tuple)) and len(full_extent) != 4: raise TypeError('full_extent must be tuple(4)') full_xrange = (full_extent[0], full_extent[2]) full_yrange = (full_extent[1], full_extent[3]) x_range_scaled = (_scale(x_range[0], full_xrange, (0.0, 1.0)), _scale(x_range[1], full_xrange, (0.0, 1.0))) y_range_scaled = (_scale(y_range[0], full_yrange, (0.0, 1.0)), _scale(y_range[1], full_yrange, (0.0, 1.0))) mapper = ArrayTypeFunctionMapping( numpy_func=_terrain_numpy, cupy_func=_terrain_cupy, dask_func=_terrain_dask_numpy, dask_cupy_func=lambda *args: not_implemented_func( *args, messages='generate_terrain() does not support dask with cupy backed DataArray' # noqa ) ) out = mapper(agg)(agg.data, seed, x_range_scaled, y_range_scaled, zfactor) canvas = ds.Canvas( plot_width=width, plot_height=height, x_range=x_range, y_range=y_range ) # DataArray coords were coming back different from cvs.points... hack_agg = canvas.points(pd.DataFrame({'x': [], 'y': []}), 'x', 'y') res = get_dataarray_resolution(hack_agg) result = xr.DataArray(out, name=name, coords=hack_agg.coords, dims=hack_agg.dims, attrs={'res': res}) return result
def read_ndbc(filename, dirs=np.arange(0, 360, 10)): """Read spectra from NDBC wave buoy ASCII files. Both the history and realtime formats are supported. Realtime formats are decribed at https://www.ndbc.noaa.gov/measdes.shtml. Args: - filename (str) or filenames (list): filename of 1D spectral density file or list of the five component files for directional spectra as [`spec`, `swdir`, `swdir2`, `swr1`, `swr2`]. There is no way to verify the component files for the historical directional spectra, so the order entered in the list is what is used. The history and realtime formats are automatically detected. - dirs (array): vector of directional bins for spectral reconstruction. - attrs (dict): additional global attributes. Returns: - dset (SpecDataset): spectra dataset object read from NDBC buoy file(s). """ if isinstance(filename, str): filename = [filename] elif isinstance(filename, list): if not len(filename) == 5: raise ValueError( "filename argument for NDBC directional spectra must be list with 5 files [spden,swdir,swdir2,swr1,swr2]" ) else: raise TypeError("filename argument must be string or list") # Get the spectra density df_spden = read_file(filename[0]) if "Sep_Freq" in df_spden.columns: sep_freq = df_spden["Sep_Freq"].values df_spden.drop(columns=["Sep_Freq"], inplace=True) else: sep_freq = None times = df_spden.index freqs = df_spden.columns.astype("f") spshape = (len(times), len(freqs), 1) specdens = df_spden.values.reshape(spshape) if len(filename) == 1: dirs = [0.0] else: df_swdir = read_file(filename[1]) df_swdir2 = read_file(filename[2]) df_swr1 = read_file(filename[3]) df_swr2 = read_file(filename[4]) dirs = np.array(dirs) specdens = construct_spectra( specdens, df_swdir.values.reshape(spshape), df_swdir2.values.reshape(spshape), 0.01 * df_swr1.values.reshape( spshape), # these values are stored with a factor of 100 0.01 * df_swr2.values.reshape(spshape), dirs, ) coords = OrderedDict(((attrs.TIMENAME, times), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))) dims = (attrs.TIMENAME, attrs.FREQNAME, attrs.DIRNAME) dset = xr.DataArray(data=specdens, coords=coords, dims=dims, name=attrs.SPECNAME).to_dataset() if sep_freq is not None: sfreq = xr.DataArray( data=sep_freq, coords={attrs.TIMENAME: times}, dims=(attrs.TIMENAME), name=attrs.SPECNAME, ) dset[ "Sep_Freq"] = sfreq # Add the NDBC defined separation frequency for realtime diagnostics dset = dset.sortby( "time", ascending=True) # Realtime data is in reversed time order return dset
def plot_xco2_diff(name_month, xco2_mod_slice, xco2_oco_slice, diff_before, diff_after, lat_slice, lon_slice, imonth, mod_dir): '''Produce plots of XCO2 differences inputs (1d arrays): diff in XCO2, before and after lat_slice, lon_slice, lat and lon for each data point imonth, month number, for plot title outputs: plots ''' nobs = len(diff_before) # Creat target dataarray and dataset lat_res = 2. # horizontal resolution of lat and lon you would want lon_res = 2.5 lat = np.linspace(-90, 90, int(180 / lat_res + 1)) # grid lon = np.linspace(-180, 177.5, int(360 / lon_res)) diff_1 = xr.DataArray(data=np.nan, dims=('lat', 'lon'), coords={ 'lat': lat, 'lon': lon }, name='diff') diff_2 = xr.DataArray(data=np.nan, dims=('lat', 'lon'), coords={ 'lat': lat, 'lon': lon }, name='diff') # map 1d data onto dataarray for i in range(nobs): lat = int((lat_slice[i].values + 90) / 2) # lat index lon = int((lon_slice[i].values + 180) / 2.5) diff_1[lat, lon] = -diff_before[i] diff_2[lat, lon] = -diff_after[i] print('y diff before:', "{:.2f}".format(diff_1.mean().values)) print('y diff after:', "{:.2f}".format(diff_2.mean().values)) # figure 1, distribution fig, axes = plt.subplots(1, 2, figsize=[14, 6], subplot_kw={'projection': ccrs.PlateCarree()}, gridspec_kw={ 'hspace': 0.2, 'wspace': 0 }) # before diff_1.plot(ax=axes[0], vmax=4, add_labels=False, cbar_kwargs={'shrink': 0.5}) axes[0].set_title(short_name_of_month(imonth) + ' XCO2: a prior - OCO2', loc='left') axes[0].set_title('ppm', loc='right') axes[0].coastlines() axes[0].gridlines(linestyle='--') # after diff_2.plot(ax=axes[1], vmax=4, add_labels=False, cbar_kwargs={'shrink': 0.5}) axes[1].set_title(short_name_of_month(imonth) + ' XCO2: a posterior - OCO2', loc='left') axes[1].set_title('ppm', loc='right') axes[1].coastlines() axes[1].gridlines(linestyle='--') fig.savefig(mod_dir + 'bio_results_map_diff_' + str(imonth) + '.png', dpi=300) obs_series = xco2_oco_slice.values model_series = xco2_mod_slice.values format_R1, format_rmse1 = r_rmse(obs_series, model_series) #R = ma.corrcoef(ma.masked_invalid(obs_series), ma.masked_invalid(model_series)) #x = obs_series[~np.isnan(obs_series)] #y = model_series[~np.isnan(model_series)] #rmse = np.sqrt(((y - x) ** 2).mean()) #format_R1 = float("{0:.2f}".format(R[0,1])) #format_rmse1 = float("{0:.2f}".format(rmse)) print('R1 is:', format_R1, ' RMSE1 is: ', format_rmse1) obs_series = xco2_oco_slice.values model_series = xco2_oco_slice.values - diff_after format_R2, format_rmse2 = r_rmse(obs_series, model_series) print('R2 is:', format_R2, ' RMSE2 is: ', format_rmse2) # figure 2, scatter plot fig = plt.figure(figsize=[5, 5]) plt.plot([300, 450], [300, 450], c='black') plt.scatter(xco2_oco_slice, xco2_mod_slice, s=0.7, label='A prior') plt.scatter(xco2_oco_slice, xco2_oco_slice - diff_after, s=0.7, label='A posterior') plt.ylim(top=420, bottom=395) plt.xlim(right=420, left=395) plt.text(x=405, y=397.5, s='R1: ' + str(format_R1) + ' RMSE1: ' + str(format_rmse1), size=12) plt.text(x=405, y=396, s='R2: ' + str(format_R2) + ' RMSE2: ' + str(format_rmse2), size=12) plt.title(name_month + ' XCO2 (ppm)') plt.ylabel('GEOS-Chem') plt.xlabel('OCO2') plt.legend(markerscale=4) fig.savefig(mod_dir + 'bio_results_scatter_diff_' + str(imonth) + '.png', dpi=300)
def _read_from_band_structure(self, band_structure, eigenstate_map): """ Uses a sisl's `BandStructure` object to calculate the bands. """ import xarray as xr if band_structure is None: raise ValueError("No band structure (k points path) was provided") if not isinstance(getattr(band_structure, "parent", None), sisl.Hamiltonian): self.setup_hamiltonian() band_structure.set_parent(self.H) else: self.H = band_structure.parent # Define the spin class of this calculation. self.spin = self.H.spin self.ticks = band_structure.lineartick() self.kPath = band_structure._k # We define a wrapper to get the values out of the eigenstates # to give the possibility to the user to do something inbetween # NOTE THAT THIS IS USED BY FAT BANDS TO GET THE WEIGHTS SIMULTANEOUSLY eig_map = eigenstate_map # Also, in this wrapper we will get the spin moments in case it is a non_colinear # calculation if self.spin.is_noncolinear: self.spin_moments = [] elif hasattr(self, "spin_moments"): del self.spin_moments def bands_wrapper(eigenstate, spin_index): if callable(eig_map): eig_map(eigenstate, self, spin_index) if hasattr(self, "spin_moments"): self.spin_moments.append(eigenstate.spin_moment()) return eigenstate.eig # Define the available spins spin_indices = [0] if self.spin.is_polarized: spin_indices = [0, 1] # Get the eigenstates for all the available spin components bands_arrays = [] for spin_index in spin_indices: spin_bands = band_structure.apply.dataarray.eigenstate( wrap=partial(bands_wrapper, spin_index=spin_index), spin=spin_index, coords=('band', ), ) bands_arrays.append(spin_bands) # Merge everything into a single dataarray with a spin dimension self.bands = xr.concat(bands_arrays, "spin").assign_coords({ "spin": spin_indices }).transpose("k", "spin", "band") self.bands['k'] = band_structure.lineark() self.bands.attrs = { "ticks": self.ticks[0], "ticklabels": self.ticks[1], **bands_arrays[0].attrs } if hasattr(self, "spin_moments"): self.spin_moments = xr.DataArray(self.spin_moments, coords={ "k": self.bands.k, "band": self.bands.band, "axis": ["x", "y", "z"] }, dims=("k", "band", "axis"))
def test_tapper_1d(window): dummy_array = xr.DataArray(da.ones((10), chunks=(3, )), dims='x') win = dummy_array.window win.set(window=window, dim='x') assert np.array_equal(win.tapper(), sig.get_window(window, 10))
def xr_calcule_stat_climat(l_ds): """fonction qui fait les stat climato pour chaque dataset dans la liste Entree : l_ds : liste de dataset dont on veut calculer les statistiques climatologiques #TODO Changer les dataset pour des dataarray """ # boucle sur les periode dans l_ds for n_d, ds in enumerate(l_ds): tdeb = ds.time[0] tfin = ds.time[-1] def _mon_strftime(time): """fonction qui transforme une valeur de ds.time en string yyyy-mm-dd """ assert (time.size == 1) yyyy = time.dt.year.item() mm = time.dt.month.item() dd = time.dt.day.item() return f"{yyyy}-{mm}-{dd}" sdeb = _mon_strftime(tdeb) sfin = _mon_strftime(tfin) msg = (f"calcul des statistiques climatiques pour periode " f"{n_d+1:03n}/{len(l_ds):03n} [ {sdeb} @ {sfin} ])'") print(msg) # calcul des stat moy_per = ds.mean('time') std_per = ds.std('time') min_per = ds.min('time') max_per = ds.max('time') # ajout de la dimension time et calcul climatology_bnds time0 = ds.time[:1] time1 = ds.time[0] time2 = ds.time[-1] # on defini le poids comme le nb d'annees dans la periode climatique time1.coords['poids'] = ds.time.size # ajustement moy_per moy_per = moy_per.expand_dims('time') moy_per['time'] = ('time', time0) moy_per.attrs = ds.attrs cm = moy_per.attrs.get("cell_methods") or "" moy_per.attrs.update(cell_methods=(cm + ' time: mean over years').strip()) # ajustement std_per std_per = std_per.expand_dims('time') std_per['time'] = ('time', time0) std_per.attrs = ds.attrs cm = std_per.attrs.get("cell_methods") or "" std_per.attrs.update(cell_methods=(cm + ' time: std over years').strip()) # cumul des mois if n_d == 0: moy_clim = moy_per.copy() std_clim = std_per.copy() time_bnds1 = time1.copy() time_bnds2 = time2.copy() poids_clim = time1.poids.copy() else: moy_clim = xr.concat((moy_clim, moy_per), dim='time') std_clim = xr.concat((std_clim, std_per), dim='time') time_bnds1 = xr.concat((time_bnds1, time1), dim='time') time_bnds2 = xr.concat((time_bnds2, time2), dim='time') poids_clim = xr.concat((poids_clim, time1.coords['poids']), dim='time') # calcul des time_bnds time_bnds = np.vstack((time_bnds1.values, time_bnds2.values)).transpose() # on met tout ensemble nom_var = ds.name dso = xr.Dataset() dso[nom_var + '_moy_clim'] = moy_clim dso[nom_var + '_std_clim'] = std_clim dso['poids'] = poids_clim # ajout des climatology_bounds au dataset tampon = xr.DataArray(time_bnds, dims=['time', 'bnds']) dso['climatology_bounds'] = tampon # ajout de l'attribut climatology a time dso.time.attrs['climatology'] = 'climatology_bounds' # on met tout en ordre temporel dso = dso.sortby('time') # on enleve la variable etiquette if 'etiquette' in dso: dso = dso.drop('etiquette') # on renvoit la sortie return dso
def xr_genere_series(ds, freq_entree, freq, poids=None): """ fonction qui genere une serie pour un certain type de periode a partir de donnees a une certaine frequence. ds : DataFrame xarray freq_entree : string frequence des donnees d'entree parmi 'jour', 'mois' et 'annee' """ dates = ds.time # # tous les poids sont egaux par defaut # if poids is None: poids = xr.DataArray(np.ones(ds.time.size), coord=[ds.time]) # # on assigne les etiquettes de la periode a laquelle appartient # chaque dates selon la valeur de freq # if freq == 'annee': # etiquette = [str(d.year) for d in dates] etiquette = dates.dt.year.values elif freq == 'saison': etiquette = trouve_saison_annee(dates) else: msg = f"frequence {freq} non prévue" raise RuntimeError(msg) # ajout des etiquettes a poids poids['etiquette'] = ('time', etiquette) # # calcul de la moy. ponderee par le poids # num = (poids * ds).groupby('etiquette').sum('time', skipna=False) denom = poids.groupby('etiquette').sum('time') ds_sortie = num / denom # on copie les attributs for k in ds.attrs: ds_sortie.attrs[k] = ds.attrs[k] # changement du cell_method if 'cell_methods' in ds_sortie.attrs: ds_sortie.attrs['cell_methods'] = ds_sortie.attrs['cell_methods'] + \ ' time: mean over days' else: ds_sortie.attrs['cell_methods'] = 'time: mean over days' # on recopie le nom de la variable ds_sortie = ds_sortie.rename(ds.name) # on ajoute la valeur de time du debut de chaque periodes tampon = poids.groupby('etiquette').groups l_ind = [tampon[k][0] for k in ds_sortie.etiquette.values] ds_sortie = ds_sortie.rename({'etiquette': 'time'}) ds_sortie['time'] = poids.time[l_ind] # on garde la somme des poids pour chaque periode ds_sortie['poids'] = ('time', denom) # on reordonne en ordre chronologique ds_sortie = ds_sortie.sortby('time') return ds_sortie
boc = BrainObservatoryCache() # Download a list of all targeted areas targeted_structures = boc.get_all_targeted_structures() # Download cells for a set of experiments and convert to DataFrame cells = boc.get_cell_specimens() cells = pd.DataFrame.from_records(cells) dsi_cells = cells.query('area == "VISp" & g_dsi_dg >= 1 & p_dg < 0.05') # find experiment containers for those cells dsi_ec_ids = dsi_cells['experiment_container_id'].unique() # Download the ophys experiments containing the static gratings stimulus for VISp experiment containers dsi_exps = boc.get_ophys_experiments(experiment_container_ids=dsi_ec_ids, stimuli=[stim_info.DRIFTING_GRATINGS]) exp_id = dsi_exps[0]['id'] data_set = boc.get_ophys_experiment_data(exp_id) dg = DriftingGratings(data_set) mean_sweeps = dg.mean_sweep_response.values d = xr.DataArray(mean_sweeps, dims=("stim", "cell"), coords={'cell': [str(x) for x in dg.cell_id] + ['dx']}) d.to_dataframe(name='value').reset_index().to_feather('cells_dg.feather') dg.stim_table.to_feather('stim_table_dg.feather')
def select_coordinates(self, source_coordinates, eval_coordinates, index_type="numpy"): """ Select a subset or coordinates if interpolator can downselect. At this point in the execution process, podpac has selected a subset of source_coordinates that intersects with the requested coordinates, dropped extra dimensions from requested coordinates, and confirmed source coordinates are not missing any dimensions. Parameters ---------- source_coordinates : :class:`podpac.Coordinates` Intersected source coordinates eval_coordinates : :class:`podpac.Coordinates` Requested coordinates to evaluate Returns ------- (:class:`podpac.Coordinates`, list) Returns tuple with the first element subset of selected coordinates and the second element the indicies of the selected coordinates """ # TODO: short circuit if source_coordinates contains eval_coordinates # short circuit if source and eval coordinates are the same if source_coordinates == eval_coordinates: return source_coordinates, tuple([slice(0, None)] * len(source_coordinates.shape)) interpolator_queue = self._select_interpolator_queue(source_coordinates, eval_coordinates, "can_select") self._last_select_queue = interpolator_queue # For heterogeneous selections, we need to select and then recontruct each set of dimensions selected_coords = {} selected_coords_idx = {k: np.arange(source_coordinates[k].size) for k in source_coordinates.dims} for udims in interpolator_queue: interpolator = interpolator_queue[udims] extra_dims = [d for d in source_coordinates.udims if d not in udims] sc = source_coordinates.udrop(extra_dims) # run interpolation. mutates selected coordinates and selected coordinates index sel_coords, sel_coords_idx = interpolator.select_coordinates( udims, sc, eval_coordinates, index_type=index_type ) # Save individual 1-D coordinates for later reconstruction for i, k in enumerate(sel_coords.dims): selected_coords[k] = sel_coords[k] selected_coords_idx[k] = sel_coords_idx[i] # Reconstruct dimensions for d in source_coordinates.dims: if d not in selected_coords: # Some coordinates may not have a selector when heterogeneous selected_coords[d] = source_coordinates[d] # np.ix_ call doesn't work with slices, and fancy numpy indexing does not work well with mixed slice/index if isinstance(selected_coords_idx[d], slice) and index_type != "slice": selected_coords_idx[d] = np.arange(source_coordinates[d].size)[selected_coords_idx[d]] selected_coords = Coordinates( [selected_coords[k] for k in source_coordinates.dims], source_coordinates.dims, crs=source_coordinates.crs, validate_crs=False, ) if index_type == "numpy": npcoords = [] has_stacked = False for k in source_coordinates.dims: # Deal with nD stacked source coords (marked by coords being in tuple) if isinstance(selected_coords_idx[k], tuple): has_stacked = True npcoords.extend([sci for sci in selected_coords_idx[k]]) else: npcoords.append(selected_coords_idx[k]) if has_stacked: # When stacked coordinates are nD we cannot use the catchall of the next branch selected_coords_idx2 = npcoords else: # This would not be needed if everything went as planned in # interpolator.select_coordinates, but this is a catchall that works # for 90% of the cases selected_coords_idx2 = np.ix_(*[np.ravel(npc) for npc in npcoords]) elif index_type == "xarray": selected_coords_idx2 = [] for i in selected_coords.dims: # Deal with nD stacked source coords (marked by coords being in tuple) if isinstance(selected_coords_idx[i], tuple): selected_coords_idx2.extend([xr.DataArray(sci, dims=[i]) for sci in selected_coords_idx[i]]) else: selected_coords_idx2.append(selected_coords_idx[i]) selected_coords_idx2 = tuple(selected_coords_idx2) elif index_type == "slice": selected_coords_idx2 = [] for i in selected_coords.dims: # Deal with nD stacked source coords (marked by coords being in tuple) if isinstance(selected_coords_idx[i], tuple): selected_coords_idx2.extend(selected_coords_idx[i]) else: selected_coords_idx2.append(selected_coords_idx[i]) selected_coords_idx2 = tuple(selected_coords_idx2) else: raise ValueError("Unknown index_type '%s'" % index_type) return selected_coords, tuple(selected_coords_idx2)
end_date='20141231', min_lat=-10, max_lat=10, pressure=1) # get values at 10 hPa nox_10_hpa = np.zeros(len(nox.time)) for i in range(len(nox.time)): pressure_i = pres_nox[i, :] nox_i = nox[i, :] n, l = helper_functions.interpolate_to_mls_pressure(pressure_i, nox_i) # only keep vmr at 10 hPa nox_10_hpa[i] = n[np.where(l == 10)] nox_10_hpa_dataset = xr.DataArray(nox_10_hpa, coords=[nox.time], dims=["time"]) monthlymeans = nox_10_hpa_dataset.groupby('time.month').mean('time') anomalies_nox = nox_10_hpa_dataset.groupby('time.month') - monthlymeans # Load O3 o3, pres_o3 = open_data.load_osiris_ozone_monthly(start_date='20050101', end_date='20141231', min_lat=-10, max_lat=10, pressure=1) # get values at 10 hPa o3_10_hpa = np.zeros(len(o3.time)) for i in range(len(o3.time)): pressure_i = pres_o3[i, :]
axes[2].set_xlabel('# days') #fig.suptitle('Cloud Radiative Forcing & Surface Fluxes') plt.subplots_adjust(hspace=.3) #plt.show() plt.savefig('../Figures/10_' + PCT + 'CTR_TOPO_SWCF_LWCF_SHFLX_LHFLX_time_series.png') #plt.savefig('../Figures/10_'+PCT+'CTR_TOPO_SWCF_LWCF_SHFLX_LHFLX_time_series.pdf') # output SWCF, LWCF, CF, SHFLX, LHFLX, FLX average day 3-7 for CTR-TOPOX for # further usage # Note that (1) index starting from 0 and slide(2,7) does not include the last # 7 SWCF_avg = xr.DataArray(np.mean(Vars[0, 2, 2:7])) SWCF_avg.name = 'SWCF' SWCF_avg.attrs['units'] = 'W/m2' LWCF_avg = xr.DataArray(np.mean(Vars[1, 2, 2:7])) LWCF_avg.name = 'LWCF' LWCF_avg['units'] = 'W/m2' CF_avg = xr.DataArray(np.mean(Vars[2, 2, 2:7])) CF_avg.name = 'CF' CF_avg['units'] = 'W/m2' SHFLX_avg = xr.DataArray(np.mean(Vars[3, 2, 2:7])) SHFLX_avg.name = 'SHFLX' SHFLX_avg['units'] = 'W/m2'
def create_nc_variable_files_on_regular_grid_from_mds(mds_var_dir, mds_files_to_load, mds_grid_dir, output_dir, output_freq_code, vars_to_load = 'all', tiles_to_load = [0,1,2,3,4,5,6,7,8,9,10,11,12], time_steps_to_load = [], meta_variable_specific = dict(), meta_common = dict(), mds_datatype = '>f4', dlon=0.5, dlat=0.5, radius_of_influence = 120000, express=1, kvarnmidx = 2, # coordinate idx for vertical axis # method now is only a place holder. # This can be expanded. For example, # the global interpolated fields can # split to tiles, similarly to # the tiled native fields, to # reduce the size of each file. verbose=True, method = ''): #%% # force mds_files_to_load to be a list (if str is passed) if isinstance(mds_files_to_load, str): mds_files_to_load = [mds_files_to_load] # force time_steps_to_load to be a list (if int is passed) if isinstance(time_steps_to_load, int): time_steps_to_load = [time_steps_to_load] # for ce tiles_to_load to be a list (if int is passed) if isinstance(tiles_to_load, int): tiles_to_load = [tiles_to_load] # if no specific file data passed, read default metadata from json file # -- variable specific meta data script_dir = os.path.dirname(__file__) # <-- absolute dir the script is in if not meta_variable_specific: meta_variable_rel_path = '../meta_json/ecco_meta_variable.json' abs_meta_variable_path = os.path.join(script_dir, meta_variable_rel_path) with open(abs_meta_variable_path, 'r') as fp: meta_variable_specific = json.load(fp) # --- common meta data if not meta_common: meta_common_rel_path = '../meta_json/ecco_meta_common.json' abs_meta_common_path = os.path.join(script_dir, meta_common_rel_path) with open(abs_meta_common_path, 'r') as fp: meta_common = json.load(fp) # info for the regular grid new_grid_min_lat = -90+dlat/2. new_grid_max_lat = 90-dlat/2. new_grid_min_lon = -180+dlon/2. new_grid_max_lon = 180-dlon/2. new_grid_ny = np.int((new_grid_max_lat-new_grid_min_lat)/dlat + 1 + 1e-4*dlat) new_grid_nx = np.int((new_grid_max_lon-new_grid_min_lon)/dlon + 1 + 1e-4*dlon) j_reg = new_grid_min_lat + np.asarray(range(new_grid_ny))*dlat i_reg = new_grid_min_lon + np.asarray(range(new_grid_nx))*dlon j_reg_idx = np.asarray(range(new_grid_ny)) i_reg_idx = np.asarray(range(new_grid_nx)) if (new_grid_ny < 1) or (new_grid_nx < 1): raise ValueError('You need to have at least one grid point for the new grid.') # loop through each mds file in mds_files_to_load for mds_file in mds_files_to_load: # if time steps to load is empty, load all time steps if len(time_steps_to_load ) == 0: # go through each file, pull out the time step, add the time step to a list, # and determine the start and end time of each record. time_steps_to_load = \ get_time_steps_from_mds_files(mds_var_dir, mds_file) first_meta_fname = mds_file + '.' + \ str(time_steps_to_load[0]).zfill(10) + '.meta' # get metadata for the first file and determine which variables # are present meta = xm.utils.parse_meta_file(mds_var_dir + '/' + first_meta_fname) vars_here = meta['fldList'] if not isinstance(vars_to_load, list): vars_to_load = [vars_to_load] if 'all' not in vars_to_load: num_vars_matching = len(np.intersect1d(vars_to_load, vars_here)) print ('num vars matching ', num_vars_matching) # only proceed if we are sure that the variable we want is in this # mds file if num_vars_matching == 0: print ('none of the variables you want are in ', mds_file) print (vars_to_load) print (vars_here) break #%% # load the MDS fields ecco_dataset_all = \ load_ecco_vars_from_mds(mds_var_dir, \ mds_grid_dir, mds_file, vars_to_load = vars_to_load, tiles_to_load=tiles_to_load, model_time_steps_to_load=time_steps_to_load, output_freq_code = \ output_freq_code, meta_variable_specific = \ meta_variable_specific, meta_common=meta_common, mds_datatype=mds_datatype, llc_method = 'bigchunks') # do the actual loading. Otherwise, the code may be slow. ecco_dataset_all.load() # print(ecco_dataset_all.keys()) # loop through each variable in this dataset, for var in ecco_dataset_all.keys(): print (' ' + var) # obtain the grid information (use fields from time=0) # Note that nrtmp would always equal to one, # since each outfile will include only one time-record (e.g. daily, monthly avgs.). ecco_dataset = ecco_dataset_all.isel(time=[0]) var_ds = ecco_dataset[var] shapetmp = var_ds.shape lenshapetmp = len(shapetmp) nttmp = 0 nrtmp = 0 if(lenshapetmp==4): nttmp = shapetmp[0] nrtmp = 0 elif(lenshapetmp==5): nttmp = shapetmp[0] nrtmp = shapetmp[1] else: print('Error! ', var_ds.shape) sys.exit() # Get X,Y of the original grid. They could be XC/YC, XG/YC, XC/YG, etc. # Similar for mask. # default is XC, YC if 'i' in var_ds.coords.keys(): XX = ecco_dataset['XC'] XXname = 'XC' if 'j' in var_ds.coords.keys(): YY = ecco_dataset['YC'] YYname = 'YC' varmask = 'maskC' iname = 'i' jname = 'j' if 'i_g' in var_ds.coords.keys(): XX = ecco_dataset['XG'] XXname = 'XG' varmask = 'maskW' iname = 'i_g' if 'j_g' in var_ds.coords.keys(): YY = ecco_dataset['YG'] YYname = 'YG' varmask = 'maskS' jname = 'j_g' # interpolation # To do it fast, set express==1 (default) if(express==1): orig_lons_1d = XX.values.ravel() orig_lats_1d = YY.values.ravel() orig_grid = pr.geometry.SwathDefinition(lons=orig_lons_1d, lats=orig_lats_1d) if (new_grid_ny > 0) and (new_grid_nx > 0): # 1D grid values new_grid_lon, new_grid_lat = np.meshgrid(i_reg, j_reg) # define the lat lon points of the two parts. new_grid = pr.geometry.GridDefinition(lons=new_grid_lon, lats=new_grid_lat) # Get the neighbor info once. # It will be used repeatedly late to resample data # fast for each of the datasets that is based on # the same swath, e.g. for a model variable at different times. valid_input_index, valid_output_index, index_array, distance_array = \ pr.kd_tree.get_neighbour_info(orig_grid, new_grid, radius_of_influence, neighbours=1) # loop through time steps, one at a time. for time_step in time_steps_to_load: i, = np.where(ecco_dataset_all.timestep == time_step) if(verbose): print (ecco_dataset_all.timestep.values) print ('time step ', time_step, i) # load the dataset ecco_dataset = ecco_dataset_all.isel(time=i) # pull out the year, month day, hour, min, sec associated with # this time step if type(ecco_dataset.time.values) == np.ndarray: cur_time = ecco_dataset.time.values[0] else: cur_time = ecco_dataset.time.values #print (type(cur_time)) year, mon, day, hh, mm, ss = \ extract_yyyy_mm_dd_hh_mm_ss_from_datetime64(cur_time) print(year, mon, day) # if the field comes from an average, # extract the time bounds -- we'll use it before we save # the variable if 'AVG' in output_freq_code: tb = ecco_dataset.time_bnds tb.name = 'tb' var_ds = ecco_dataset[var] # 3d fields (with Z-axis) for each time record if(nttmp != 0 and nrtmp != 0): tmpall = np.zeros((nttmp, nrtmp,new_grid_ny,new_grid_nx)) for ir in range(nrtmp): # Z-loop # mask maskloc = ecco_dataset[varmask].values[ir,:] for it in range(nttmp): # time loop # one 2d field at a time var_ds_onechunk = var_ds[it,ir,:] # apply mask var_ds_onechunk.values[maskloc==0]=np.nan orig_field = var_ds_onechunk.values if(express==1): tmp = pr.kd_tree.get_sample_from_neighbour_info( 'nn', new_grid.shape, orig_field, valid_input_index, valid_output_index, index_array) else: new_grid_lon, new_grid_lat, tmp = resample_to_latlon(XX, YY, orig_field, new_grid_min_lat, new_grid_max_lat, dlat, new_grid_min_lon, new_grid_max_lon, dlon, nprocs_user=1, mapping_method = 'nearest_neighbor', radius_of_influence=radius_of_influence) tmpall[it,ir,:] = tmp # 2d fields (without Z-axis) for each time record elif(nttmp != 0): tmpall = np.zeros((nttmp, new_grid_ny,new_grid_nx)) # mask maskloc = ecco_dataset[varmask].values[0,:] for it in range(nttmp): # time loop var_ds_onechunk = var_ds[it,:] var_ds_onechunk.values[maskloc==0]=np.nan orig_field = var_ds_onechunk.values if(express==1): tmp = pr.kd_tree.get_sample_from_neighbour_info( 'nn', new_grid.shape, orig_field, valid_input_index, valid_output_index, index_array) else: new_grid_lon, new_grid_lat, tmp = resample_to_latlon(XX, YY, orig_field, new_grid_min_lat, new_grid_max_lat, dlat, new_grid_min_lon, new_grid_max_lon, dlon, nprocs_user=1, mapping_method = 'nearest_neighbor', radius_of_influence=radius_of_influence) tmpall[it,:] = tmp else: print('Error! both nttmp and nrtmp are zeros.') sys.exit() # set the coordinates for the new (regular) grid # 2d fields if(lenshapetmp==4): var_ds_reg = xr.DataArray(tmpall, coords = {'time': var_ds.coords['time'].values, 'j': j_reg_idx, 'i': i_reg_idx},\ dims = ('time', 'j', 'i')) # 3d fields elif(lenshapetmp==5): # Get the variable name (kvarnm) for Z-axis: k, k_l kvarnm = var_ds.coords.keys()[kvarnmidx] if(kvarnm[0]!='k'): kvarnmidxnew = kvarnmidx for iktmp, ktmp in enumerate(var_ds.coords.keys()): if(ktmp[0]=='k'): kvarnmidxnew = iktmp if(kvarnmidxnew==kvarnmidx): print('Error! Seems ', kvarnm, ' is not the vertical axis.') print(var_ds) sys.exit() else: kvarnmidx = kvarnmidxnew kvarnm = var_ds.coords.keys()[kvarnmidx] var_ds_reg = xr.DataArray(tmpall, coords = {'time': var_ds.coords['time'].values, kvarnm: var_ds.coords[kvarnm].values, 'j': j_reg_idx, 'i': i_reg_idx},\ dims = ('time', kvarnm,'j', 'i')) # set the attrs for the new (regular) grid var_ds_reg['j'].attrs = var_ds[jname].attrs var_ds_reg['i'].attrs = var_ds[iname].attrs var_ds_reg['j'].attrs['long_name'] = 'y-dimension' var_ds_reg['i'].attrs['long_name'] = 'x-dimension' var_ds_reg['j'].attrs['swap_dim'] = 'latitude' var_ds_reg['i'].attrs['swap_dim'] = 'longitude' var_ds_reg['latitude'] = (('j'), j_reg) var_ds_reg['longitude'] = (('i'), i_reg) var_ds_reg['latitude'].attrs = ecco_dataset[YYname].attrs var_ds_reg['longitude'].attrs = ecco_dataset[XXname].attrs var_ds_reg['latitude'].attrs['long_name'] = "latitude at center of grid cell" var_ds_reg['longitude'].attrs['long_name'] = "longitude at center of grid cell" var_ds_reg.name = var_ds.name #keys_to_drop = ['tile','j','i','XC','YC','XG','YG'] # drop these ancillary fields -- they are in grid anyway keys_to_drop = ['CS','SN','Depth','rA','PHrefC','hFacC',\ 'maskC','drF', 'dxC', 'dyG', 'rAw', 'hFacW',\ 'rAs','hFacS','maskS','dxG','dyC', 'maskW', \ 'tile','XC','YC','XG','YG'] for key_to_drop in keys_to_drop: #print (key_to_drop) if key_to_drop in var_ds.coords.keys(): var_ds = var_ds.drop(key_to_drop) # any remaining fields, e.g. time, would be included in the interpolated fields. for key_to_add in var_ds.coords.keys(): if(key_to_add not in var_ds_reg.coords.keys()): if(key_to_add != 'i_g' and key_to_add != 'j_g'): var_ds_reg[key_to_add] = var_ds[key_to_add] # use the same global attributs var_ds_reg.attrs = var_ds.attrs #print(var_ds.coords.keys()) #%% # create the new file path name if 'MON' in output_freq_code: fname = var + '_' + str(year) + '_' + str(mon).zfill(2) + '.nc' newpath = output_dir + '/' + var + '/' + \ str(year) + '/' elif ('WEEK' in output_freq_code) or \ ('DAY' in output_freq_code): fname = var + '_' + \ str(year) + '_' + \ str(mon).zfill(2) + '_' + \ str(day).zfill(2) + '.nc' d0 = datetime.datetime(year, 1,1) d1 = datetime.datetime(year, mon, day) doy = (d1-d0).days + 1 newpath = output_dir + '/' + var + '/' + \ str(year) + '/' + str(doy).zfill(3) elif 'YEAR' in output_freq_code: fname = var + '_' + str(year) + '.nc' newpath = output_dir + '/' + var + '/' + \ str(year) else: print ('no valid output frequency code specified') print ('saving to year/mon/day/tile') fname = var + '_' + \ str(year) + '_' + \ str(mon).zfill(2) + '_' + \ str(day).zfill(2) + '.nc' d0 = datetime.datetime(year, 1,1) d1 = datetime.datetime(year, mon, day) doy = (d1-d0).days + 1 newpath = output_dir + '/' + var + '/' + \ str(year) + '/' + str(doy).zfill(3) # create the path if it does not exist/ if not os.path.exists(newpath): os.makedirs(newpath) # convert the data array to a dataset. tmp = var_ds_reg.to_dataset() # add the time bounds field back in if we have an # average field if 'AVG' in output_freq_code: tmp = xr.merge((tmp, tb)) tmp = tmp.drop('tb') # put the metadata back in tmp.attrs = ecco_dataset.attrs # update the temporal and geospatial metadata tmp = update_ecco_dataset_geospatial_metadata(tmp) tmp = update_ecco_dataset_temporal_coverage_metadata(tmp) # save to netcdf. it's that simple. if(verbose): print ('saving to %s' % newpath + '/' + fname) # do not include _FillValue encoding = {i: {'_FillValue': False} for i in tmp.variables.keys()} tmp.to_netcdf(newpath + '/' + fname, engine='netcdf4',encoding=encoding) #%% ecco_dataset_all.close() return ecco_dataset, tmp
def kmeans_reduce_ensemble( data: xarray.DataArray, *, method: dict = None, make_graph: bool = MPL_INSTALLED, max_clusters: Optional[int] = None, variable_weights: Optional[np.ndarray] = None, model_weights: Optional[np.ndarray] = None, sample_weights: Optional[np.ndarray] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> Tuple[list, np.ndarray, dict]: """Return a sample of ensemble members using k-means clustering. The algorithm attempts to reduce the total number of ensemble members while maintaining adequate coverage of the ensemble uncertainty in a N-dimensional data space. K-Means clustering is carried out on the input selection criteria data-array in order to group individual ensemble members into a reduced number of similar groups. Subsequently a single representative simulation is retained from each group. Parameters ---------- data : xr.DataArray Selecton criteria data : 2-D xr.DataArray with dimensions 'realization' (N) and 'criteria' (P). These are the values used for clustering. Realizations represent the individual original ensemble members and criteria the variables/indicators used in the grouping algorithm. method : dict Dictionary defining selection method and associated value when required. See Notes. max_clusters : Optional[int] Maximum number of members to include in the output ensemble selection. When using 'rsq_optimize' or 'rsq_cutoff' methods, limit the final selection to a maximum number even if method results indicate a higher value. Defaults to N. variable_weights: Optional[np.ndarray] An array of size P. This weighting can be used to influence of weight of the climate indices (criteria dimension) on the clustering itself. model_weights: Optional[np.ndarray] An array of size N. This weighting can be used to influence which realization is selected from within each cluster. This parameter has no influence on the clustering itself. sample_weights: Optional[np.ndarray] An array of size N. sklearn.cluster.KMeans() sample_weights parameter. This weighting can be used to influence of weight of simulations on the clustering itself. See: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html random_state: Optional[Union[int, np.random.RandomState]] sklearn.cluster.KMeans() random_state parameter. Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html make_graph: bool output a dictionary of input for displays a plot of R² vs. the number of clusters. Defaults to True if matplotlib is installed in runtime environment. Notes ----- Parameters for method in call must follow these conventions: rsq_optimize Calculate coefficient of variation (R²) of cluster results for n = 1 to N clusters and determine an optimal number of clusters that balances cost / benefit tradeoffs. This is the default setting. See supporting information S2 text in https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0152495 method={'rsq_optimize':None} rsq_cutoff Calculate Coefficient of variation (R²) of cluster results for n = 1 to N clusters and determine the minimum numbers of clusters needed for R² > val. val : float between 0 and 1. R² value that must be exceeded by clustering results. method={'rsq_cutoff': val} n_clusters Create a user determined number of clusters. val : integer between 1 and N method={'n_clusters': val} Returns ------- list Selected model indexes (positions) np.ndarray KMeans clustering results dict Dictionary of input data for creating R² profile plot. 'None' when make_graph=False References ---------- Casajus et al. 2016. https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0152495 Examples -------- >>> import xclim >>> from xclim.ensembles import create_ensemble, kmeans_reduce_ensemble >>> from xclim.indices import hot_spell_frequency Start with ensemble datasets for temperature: >>> ensTas = create_ensemble(temperature_datasets) Calculate selection criteria -- Use annual climate change Δ fields between 2071-2100 and 1981-2010 normals. First, average annual temperature: >>> tg = xclim.atmos.tg_mean(tas=ensTas.tas) >>> his_tg = tg.sel(time=slice('1990','2019')).mean(dim='time') >>> fut_tg = tg.sel(time=slice('2020','2050')).mean(dim='time') >>> dtg = fut_tg - his_tg Then, Hotspell frequency as second indicator: >>> hs = hot_spell_frequency(tasmax=ensTas.tas, window=2, thresh_tasmax='10 degC') >>> his_hs = hs.sel(time=slice('1990','2019')).mean(dim='time') >>> fut_hs = hs.sel(time=slice('2020','2050')).mean(dim='time') >>> dhs = fut_hs - his_hs Create a selection criteria xr.DataArray: >>> from xarray import concat >>> crit = concat((dtg, dhs), dim='criteria') Finally, create clusters and select realization ids of reduced ensemble: >>> ids, cluster, fig_data = kmeans_reduce_ensemble(data=crit, method={'rsq_cutoff':0.9}, random_state=42, make_graph=False) >>> ids, cluster, fig_data = kmeans_reduce_ensemble(data=crit, method={'rsq_optimize':None}, random_state=42, make_graph=True) """ if make_graph: fig_data = {} if max_clusters is not None: fig_data["max_clusters"] = max_clusters else: fig_data = None data = data.transpose("realization", "criteria") # initialize the variables n_sim = np.shape(data)[0] # number of simulations n_idx = np.shape(data)[1] # number of indicators # normalize the data matrix z = xarray.DataArray( scipy.stats.zscore(data, axis=0, ddof=1), coords=data.coords ) # ddof=1 to be the same as Matlab's zscore if sample_weights is None: sample_weights = np.ones(n_sim) else: # KMeans sample weights of zero cause errors occasionally - set to 1e-15 for now sample_weights[sample_weights == 0] = 1e-15 if model_weights is None: model_weights = np.ones(n_sim) if variable_weights is None: variable_weights = np.ones(shape=(1, n_idx)) if max_clusters is None: max_clusters = n_sim if method is None: method = {"rsq_optimize": None} # normalize the weights (note: I don't know if this is really useful... this was in the MATLAB code) sample_weights = sample_weights / np.sum(sample_weights) model_weights = model_weights / np.sum(model_weights) variable_weights = variable_weights / np.sum(variable_weights) z = z * variable_weights rsq = _calc_rsq(z, method, make_graph, n_sim, random_state, sample_weights) n_clusters = _get_nclust(method, n_sim, rsq, max_clusters) if make_graph: fig_data["method"] = method fig_data["rsq"] = rsq fig_data["n_clusters"] = n_clusters fig_data["realizations"] = n_sim # Final k-means clustering with 1000 iterations to avoid instabilities in the choice of final scenarios kmeans = KMeans( n_clusters=n_clusters, n_init=1000, max_iter=600, random_state=random_state ) # we use 'fit_' only once, otherwise it computes everything again clusters = kmeans.fit_predict(z, sample_weight=sample_weights) # squared distance to centroids d = np.square( kmeans.transform(z) ) # squared distance between each point and each centroid out = np.empty( shape=n_clusters ) # prepare an empty array in which to store the results r = np.arange(n_sim) # in each cluster, find the closest (weighted) simulation and select it for i in range(n_clusters): d_i = d[ clusters == i, i ] # distance to the centroid for all simulations within the cluster 'i' if d_i.shape[0] >= 2: if d_i.shape[0] == 2: sig = 1 else: sig = np.std( d_i, ddof=1 ) # standard deviation of those distances (ddof = 1 gives the same as Matlab's std function) like = ( scipy.stats.norm.pdf(d_i, 0, sig) * model_weights[clusters == i] ) # weighted likelihood argmax = np.argmax(like) # index of the maximum likelihood else: argmax = 0 r_clust = r[ clusters == i ] # index of the cluster simulations within the full ensemble out[i] = r_clust[argmax] out = sorted(out.astype(int)) # display graph - don't block code execution return out, clusters, fig_data
def run(self, hotstart=False): '''Performs an AWARE simulation and returns the results (time series). Returns ---- pandas data frame including the results ''' if not self.is_initialized and not self.is_ready: print( 'Error: Model has not been initialized or prepared with initial states.' ) return if hotstart: if self.is_ready: print('AWARE hotstart: Resuming last run ...') else: print( 'Waring: cannot resume run in hotstart mode. Using default initialisation!' ) else: self.reset_state_vars() start_date = pd.Timestamp( self.config.start_date).to_period('M').to_timestamp('M') end_date = pd.Timestamp( self.config.end_date).to_period('M').to_timestamp('M') assert start_date in self.meteo.dates assert end_date in self.meteo.dates dates = pd.date_range(start=start_date, end=end_date, freq='M') rts_catchments = collections.OrderedDict( ) # results including upstream areas rts_catchments_sub_mean = collections.OrderedDict( ) # results sub-catchment only rts = pd.DataFrame(index=dates, columns=self._results_time_series_columns, dtype=float) for cid in self.catchment_ids: rts_catchments[cid] = rts.copy() rts_catchments_sub_mean[cid] = rts.copy() for date in dates: print(date) temp, precip = self.meteo.get_meteo(date) glaciers = self.state_glacierarea.get_state( ) # couple glacier model here! for cid in self.computation_order: params = self.config.params.catchments[cid] catchment = self.catchments[cid] cpx = self.catchments[cid].pixels cswe, snowmelt, snow_outflow, snowfall, rainfall, melt_avail = catchment.melt.melt( self.state_swe.get_state(cpx), precip[cpx], temp[cpx], params.ddf_snow, glacier_fraction=None) self.state_swe.set_state(cswe, cpx) snow_outflow_unglacierized = snow_outflow * (1. - glaciers[cpx]) snow_outflow_glacierized = np.zeros(self.input_grids.dtm.shape) snow_outflow_glacierized = snow_outflow * glaciers[cpx] ice_melt_factor = np.minimum( glaciers[cpx], glaciers[cpx] * melt_avail * params.ddf_ice / params.ddf_snow) cicewe, icemelt, ice_outflow, _, _, _ = catchment.melt.melt( self.state_icewe.get_state(cpx), precip[cpx] * 0.0, temp[cpx], params.ddf_ice, glacier_fraction=ice_melt_factor) self.state_icewe.set_state(cicewe, cpx) glacier_outflow = ice_outflow + snow_outflow_glacierized # get groundwater state gw_storage = self.state_groundwater.get_state(cpx) if self.config.enable_soil_model: et_pot = catchment.evapotranspiration.monthly_evapotranspiration( date, temp[cpx], n_etp_summer=params.factor_etp_summer) et_pot *= (1. - glaciers[cpx]) csms, perc, runoff_d, et_act = catchment.soil.soil_water_balance( snow_outflow_unglacierized, et_pot, self.state_soilmoisture.get_state(cpx)) # sms[cpx] = csms self.state_soilmoisture.set_state(csms, cpx) #percolation = perc.mean() direct_runoff = runoff_d.mean() baseflow, gw_storage = catchment.groundwater.groundwater_model( gw_storage, perc) runoff = (baseflow + direct_runoff) + glacier_outflow else: melt_only = snow_outflow_glacierized + glacier_outflow baseflow, gw_storage = catchment.groundwater.groundwater_model( gw_storage, params.gw_n * melt_only) runoff = baseflow + (1 - params.gw_n) * melt_only et_pot = np.zeros(cswe.shape) * np.nan et_act = np.zeros(cswe.shape) * np.nan direct_runoff = np.nan self.state_groundwater.set_state(gw_storage, cpx) rts_cur = rts_catchments_sub_mean[cid].loc[date] # calculate averages for sub-catchment without tributaries rts_cur.temp = temp[cpx].mean() rts_cur.precip = precip[cpx].mean() rts_cur.snowfall = snowfall.mean() rts_cur.rainfall = rainfall.mean() rts_cur.swe = self.state_swe.get_state(cpx).mean() rts_cur.snowmelt = snowmelt.mean() rts_cur.icemelt = icemelt.mean() rts_cur.melt = rts_cur.snowmelt + rts_cur.icemelt rts_cur.snow_outflow = snow_outflow.mean() rts_cur.ice_outflow = ice_outflow.mean() rts_cur.glacier_outflow = glacier_outflow.mean() rts_cur.runoff = runoff.mean() rts_cur.sms = self.state_soilmoisture.get_state(cpx).mean() rts_cur.et_pot = et_pot.mean() rts_cur.et = et_act.mean() rts_cur.baseflow = baseflow.mean() rts_cur.direct_runoff = direct_runoff # rts_cur.icewe = self.state_icewe.get_state(cpx).mean() # activate if required # prepare averages rts_catchments[cid].loc[date] = self.catchments[ cid].area * rts_catchments_sub_mean[cid].loc[date] # add results of tributaries for ii in range(0, len(self.catchments[cid].upstream_ids)): sub_id = self.catchments[cid].upstream_ids[ii] sub_n = self.catchments[cid].upstream_areas[ii] # tributaries rts_cur_sub = rts_catchments_sub_mean[sub_id].loc[date] # set results to total upstream area rts_cur = rts_catchments[cid].loc[date] rts_cur.temp += sub_n * rts_cur_sub.temp rts_cur.precip += sub_n * rts_cur_sub.precip rts_cur.snowfall += sub_n * rts_cur_sub.snowfall rts_cur.rainfall += sub_n * rts_cur_sub.rainfall rts_cur.swe += sub_n * rts_cur_sub.swe rts_cur.snowmelt += sub_n * rts_cur_sub.snowmelt rts_cur.icemelt += sub_n * rts_cur_sub.icemelt rts_cur.melt += sub_n * rts_cur_sub.melt rts_cur.snow_outflow += sub_n * rts_cur_sub.snow_outflow rts_cur.ice_outflow += sub_n * rts_cur_sub.ice_outflow rts_cur.glacier_outflow += sub_n * rts_cur_sub.glacier_outflow rts_cur.runoff += sub_n * rts_cur_sub.runoff rts_cur.sms += sub_n * rts_cur_sub.sms rts_cur.et_pot += sub_n * rts_cur_sub.et_pot rts_cur.et += sub_n * rts_cur_sub.et rts_cur.baseflow += sub_n * rts_cur_sub.baseflow rts_cur.direct_runoff += sub_n * rts_cur_sub.direct_runoff # remember timestamp self.timestamp = date # write stats if required if date in self.write_dates: self.write_states(add_timestamp=True, verbose=True) results = munch.Munch() variables = { k: xr.DataArray(v, dims=['time', 'var']) for k, v in rts_catchments.items() } results.ts = xr.Dataset(variables).to_array( dim='catchment').to_dataset('var') return results
def norm_topo(self, data, elev, solar_za, solar_az, slope=None, aspect=None, method='empirical-rotation', slope_thresh=2, nodata=0, elev_nodata=-32768, scale_factor=1, angle_scale=0.01, n_jobs=1, robust=False, min_samples=100, slope_kwargs=None, aspect_kwargs=None, band_coeffs=None): """ Applies topographic normalization Args: data (2d or 3d DataArray): The data to normalize, in the range 0-1. elev (2d DataArray): The elevation data. solar_za (2d DataArray): The solar zenith angles (degrees). solar_az (2d DataArray): The solar azimuth angles (degrees). slope (2d DataArray): The slope data. If not given, slope is calculated from ``elev``. aspect (2d DataArray): The aspect data. If not given, aspect is calculated from ``elev``. method (Optional[str]): The method to apply. Choices are ['c', 'empirical-rotation']. slope_thresh (Optional[float or int]): The slope threshold. Any samples with values < ``slope_thresh`` are not adjusted. nodata (Optional[int or float]): The 'no data' value for ``data``. elev_nodata (Optional[float or int]): The 'no data' value for ``elev``. scale_factor (Optional[float]): A scale factor to apply to the input data. angle_scale (Optional[float]): The angle scale factor. n_jobs (Optional[int]): The number of parallel workers for ``LinearRegression.fit``. robust (Optional[bool]): Whether to fit a robust regression. min_samples (Optional[int]): The minimum number of samples required to fit a regression. slope_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions`` to calculate the slope. aspect_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions`` to calculate the aspect. band_coeffs (Optional[dict]): Slope and intercept coefficients for each band. References: See :cite:`teillet_etal_1982` for the C-correction method. See :cite:`tan_etal_2010` for the Empirical Rotation method. Returns: ``xarray.DataArray`` Examples: >>> import geowombat as gw >>> from geowombat.radiometry import Topo >>> >>> topo = Topo() >>> >>> # Example where pixel angles are stored in separate GeoTiff files >>> with gw.config.update(sensor='l7', scale_factor=0.0001, nodata=0): >>> >>> with gw.open('landsat.tif') as src, >>> gw.open('srtm') as elev, >>> gw.open('solarz.tif') as solarz, >>> gw.open('solara.tif') as solara: >>> >>> src_norm = topo.norm_topo(src, elev, solarz, solara, n_jobs=-1) """ method = method.strip().lower() if method not in ['c', 'empirical-rotation']: logger.exception( " Currently, the only supported methods are 'c' and 'empirical-rotation'." ) raise NameError attrs = data.attrs.copy() if not nodata: nodata = data.gw.nodata if scale_factor == 1.0: scale_factor = data.gw.scale_factor # Scale the reflectance data if scale_factor != 1: data = data * scale_factor if not slope_kwargs: slope_kwargs = dict(format='MEM', computeEdges=True, alg='ZevenbergenThorne', slopeFormat='degree') if not aspect_kwargs: aspect_kwargs = dict(format='MEM', computeEdges=True, alg='ZevenbergenThorne', trigonometric=False, zeroForFlat=True) slope_kwargs['format'] = 'MEM' slope_kwargs['slopeFormat'] = 'degree' aspect_kwargs['format'] = 'MEM' # Force to SRTM resolution proc_dims = (int((data.gw.ncols * data.gw.cellx) / 30.0), int((data.gw.nrows * data.gw.celly) / 30.0)) w = int((5 * 30.0) / data.gw.celly) if w % 2 == 0: w += 1 if isinstance(slope, xr.DataArray): slope_deg_fd = slope.squeeze().data else: slope_deg = calc_slope_delayed(elev.squeeze().data, proc_dims=proc_dims, w=w, **slope_kwargs) slope_deg_fd = da.from_delayed(slope_deg, (data.gw.nrows, data.gw.ncols), dtype='float64') if isinstance(aspect, xr.DataArray): aspect_deg_fd = aspect.squeeze().data else: aspect_deg = calc_aspect_delayed(elev.squeeze().data, proc_dims=proc_dims, w=w, **aspect_kwargs) aspect_deg_fd = da.from_delayed(aspect_deg, (data.gw.nrows, data.gw.ncols), dtype='float64') nodata_samps = da.where( (elev.data == elev_nodata) | (data.max(dim='band').data == nodata) | (slope_deg_fd < slope_thresh), 1, 0) slope_rad = da.deg2rad(slope_deg_fd) aspect_rad = da.deg2rad(aspect_deg_fd) # Convert degrees to radians solar_za = da.deg2rad(solar_za.squeeze().data * angle_scale) solar_az = da.deg2rad(solar_az.squeeze().data * angle_scale) cos_z = da.cos(solar_za) # Calculate the illumination angle il = da.cos(slope_rad) * cos_z + da.sin(slope_rad) * da.sin( solar_za) * da.cos(solar_az - aspect_rad) sr_adj = list() for band in data.band.values.tolist(): if method == 'c': sr_adj.append( self._method_c( data.sel(band=band).data, il, cos_z, nodata_samps, min_samples, n_jobs, robust, band_coeffs, band)) else: sr_adj.append( self._method_empirical_rotation( data.sel(band=band).data, il, cos_z, nodata_samps, min_samples, n_jobs, robust, band_coeffs, band)) adj_data = xr.DataArray(data=da.concatenate(sr_adj).reshape( (data.gw.nbands, data.gw.nrows, data.gw.ncols)), coords={ 'band': data.band.values.tolist(), 'y': data.y.values, 'x': data.x.values }, dims=('band', 'y', 'x'), attrs=data.attrs) attrs['calibration'] = 'Topographic-adjusted' attrs['nodata'] = nodata attrs['drange'] = (0, 1) adj_data.attrs = attrs return adj_data
def da1(): """Small plain two-dimensional xr.DataArray.""" return xr.DataArray([[0, 1], [3, 4], [6, 7]], dims=("x", "y"))
] #'gaussian', #'general_gaussian', 'chebwin', #'slepian', 'cosine', 'hann'] sig_xyt = example_xyt() sig_xyt_wth_coast = example_xyt(boundaries=True) # Testing array shape = (48, 30, 40) dims = ('time', 'y', 'x') ctime = pd.date_range('2000-01-01', periods=48, freq='M') cy = np.linspace(0.01, 0.5, 30) cx = np.pi * np.linspace(0, 2, 40) coords = {'time': ctime, 'y': cy, 'x': cx} dummy_array = xr.DataArray(np.random.random(shape), dims=dims, coords=coords) def test_set_nyquist(): w = dummy_array.window w.set(dim=['y', 'x']) assert w.fnyq == { 'x': 1. / (2. * (cx[1] - cx[0])), 'y': 1. / (2. * (cy[1] - cy[0])) } def test_init_window(): sig_xyt.window
def da2(): """Small plain two-dimensional xr.DataArray with different values compared to da1.""" return xr.DataArray([[0, 1], [5, 6], [6, 7]], dims=("x", "y"))
def spinup_fn(run, field, months_list, filenames=['atmos_pentad'], plevs=[0., 2000., 'all']): # Function to open files for a specfied month range and filename. # Takes annual means def open_files(run, months, filename): name_temp = '/scratch/rg419/Data_moist/' + run + '/run%03d/' + filename + '.nc' names = [name_temp % m for m in range(months[0], months[1])] #read data into xarray data = xr.open_mfdataset(names, decode_times=False, chunks={'time': 30}) data.coords['year'] = data.time // 360 + 1 field_yr = data[field].groupby('year').mean(('time')) return field_yr, data # Combine data from files with different names (eg. atmos_monthly and atmos_pentad) into one time series arrays = [] i = 0 for filename in filenames: field_yr, data = open_files(run, months_list[i], filename) arrays.append(field_yr) i = i + 1 field_yr = xr.concat(arrays, dim='year') # Check if data is 3D and if so integrate over specfied levels try: p_levs = data.pfull[(data.pfull >= plevs[0]) & (data.pfull <= plevs[1])] dp = xr.DataArray(np.diff(data.phalf), [('pfull', field_yr.pfull)]) * 100. field_yr = (field_yr * dp).sel(pfull=p_levs).sum('pfull') / 9.8 print '3D field, vertical integral taken' three_d = True except: print '2D field' three_d = False # Calculate cell areas and take area mean area = cell_area(42, '/scratch/rg419/GFDL_model/GFDLmoistModel/') area_xr = xr.DataArray(area, [('lat', data.lat), ('lon', data.lon)]) field_av = (field_yr * area_xr).sum(('lat', 'lon')) / area_xr.sum( ('lat', 'lon')) # Plot up result and save field_av.plot() plt.xlabel('Year') plt.ylabel(field) if three_d: plotname = '/scratch/rg419/plots/spinup/' + field + '_' + str( plevs[2]) + '_spinup_' + run + '.png' else: plotname = '/scratch/rg419/plots/spinup/' + field + '_spinup_' + run + '.png' plt.savefig(plotname) plt.close() return field_av
def small_verif_da(): """Very small simulation of a verification product.""" time = [1990, 1991, 1992, 1993, 1994] return xr.DataArray(np.random.rand(len(time)), dims=["time"], coords=[time])
def compare_XCO2(mod_dir, sat_dir, year, month, str1_mod, str1_sat, mode): mod_file = str1_mod + str(year) + month_string(month) + '.nc' sat_file = str1_sat + str(year) + month_string(month) + '.nc' ds_mod = xr.open_dataset(mod_dir + mod_file) ds_sat = xr.open_dataset(sat_dir + sat_file) # mask data: predefined region, land vs. ocean, latitudinal bands mask_directory = '/geos/u73/msadiq/GEOS-Chem/analysis/inversion/data/' mask_name = 'flatten_mask.nc' ds_mask = xr.open_dataset(mask_directory + mask_name) avg_kern = ds_sat['xco2_averaging_kernel'] co2_pr = ds_sat['co2_profile_apriori'] pres_wgt = ds_sat['pressure_weight'] op_mode = ds_sat['operation_mode'] # 0=Nadir, 1=Glint mode_mask = (op_mode - 1) * -1 # mask to mask out glint, 1=Nadir, 0=Glint # model simulated CO2 concentration co2_model = ds_mod['SpeciesConc_CO2'] * 1e-3 # unit: ppbv to ppmv co2_profile = pres_wgt * ( 1 - avg_kern) * co2_pr + pres_wgt * avg_kern * co2_model # sum up to get column CO2 xco2_model = co2_profile.sum( dim='levels') # sum along vertical axis, unit: ppbv to ppm xco2_model_mode = xco2_model * mode_mask # extract desired mode of observation: Nadir xco2_oco2_mode = ds_sat['xco2'] * mode_mask obs_series = xco2_oco2_mode.values model_series = xco2_model_mode.values obs_series[obs_series == 0] = 'nan' model_series[model_series == 0] = 'nan' format_R, format_rmse = r_rmse(obs_series, model_series) #R = ma.corrcoef(ma.masked_invalid(obs_series), ma.masked_invalid(model_series)) #x = obs_series[~np.isnan(obs_series)] #y = model_series[~np.isnan(model_series)] #rmse = np.sqrt(((y - x) ** 2).mean()) #format_R = float("{0:.2f}".format(R[0,1])) #format_rmse = float("{0:.2f}".format(rmse)) print('R is:', format_R, ' RMSE is: ', format_rmse) xco2_model_land = xco2_model_mode * ds_mask['land'][0:len(xco2_model)] xco2_model_land[xco2_model_land == 0] = 'nan' xco2_oco2_land = xco2_oco2_mode * ds_mask['land'][0:len(xco2_model)] xco2_oco2_land[xco2_oco2_land == 0] = 'nan' fig = plt.figure(figsize=[5, 5]) name_month = short_name_of_month(month) for region in ['high_lat', 'mid_lat', 'low_lat']: xco2_model_mask = xco2_model_land * ds_mask[region][ 0:len(xco2_model_land)] xco2_model_mask[xco2_model_mask == 0] = 'nan' xco2_oco2_mask = xco2_oco2_land * ds_mask[region][0:len(xco2_model_land )] xco2_oco2_mask[xco2_oco2_mask == 0] = 'nan' plt.scatter(xco2_oco2_mask, xco2_model_mask, s=0.7, label=region) plt.plot([300, 450], [300, 450], c='r') plt.ylim(top=420, bottom=395) plt.xlim(right=420, left=395) plt.title(name_month + ' XCO2 (ppm)') plt.ylabel('GEOS-Chem') plt.xlabel('OCO2') plt.legend(markerscale=4) plt.text(x=410, y=399, s='R: ' + str(format_R), size=12) plt.text(x=410, y=398, s='RMSE: ' + str(format_rmse), size=12) fig.savefig(mod_dir + '/mod_vs_obs_XCO2_latitudinal_' + mode + '_' + name_month + '.png', dpi=300) diff = xco2_oco2_land - xco2_model_land # diff to calculate a posteriori new_data = diff.to_dataset(name='diff') new_data['xco2_oco2'] = xco2_oco2_land new_data['xco2_model'] = xco2_model_land new_data['xco2_error'] = ds_sat['xco2_uncertainty'] new_data['lat'] = ds_mod['lat'] new_data['lon'] = ds_mod['lon'] new_data['date'] = ds_mod['date'] new_data.to_netcdf(mod_dir + 'XCO2_mod_and_oco2_' + mode + '_' + name_month + '.nc') # Creat target dataarray and dataset lat_res = 2 # horizontal resolution of lat and lon you would want lon_res = 2.5 lat = np.linspace(-90, 90, int(180 / lat_res + 1)) # grid lon = np.linspace(-180, 177.5, int(360 / lon_res)) days = len(diff) / (len(lat) * len(lon)) var_3d = xr.DataArray(data=np.nan, dims=('days', 'lat', 'lon'), coords={ 'days': range(int(days)), 'lat': lat, 'lon': lon }, name='diff') var_3d = var_3d.astype(dtype='float32') diff2 = xco2_model_land - xco2_oco2_land # diff to map onto global map var_3d.values = diff2.values.reshape((int(days), len(lat), len(lon))) var_2d = var_3d.mean(dim='days') # plot after mapping fig = plt.figure(figsize=[8, 8]) proj = ccrs.PlateCarree() ax = plt.subplot(111, projection=proj) # var_2d.plot(ax=ax, vmax=3, add_labels=False, cbar_kwargs={'shrink': 0.4}) ax.set_title(name_month + ' XCO2: a posterior - OCO2', loc='left') ax.set_title('ppm', loc='right') ax.coastlines() ax.gridlines(linestyle='--') fig.savefig(mod_dir + 'diff_map_' + name_month + '.png', dpi=300) ds_output = var_3d.to_dataset() var_3d.values = xco2_model_land.values.reshape( (int(days), len(lat), len(lon))) ds_output['mod'] = var_3d.copy() var_3d.values = xco2_oco2_land.values.reshape( (int(days), len(lat), len(lon))) ds_output['obs'] = var_3d.copy() ds_output.to_netcdf(mod_dir + 'XCO2_diff_' + str(month) + '.nc')
def xr_fix(dtset, model_name='NorESM'): """ :param dtset: :param model_name: :return: """ # print('xr_fix: Doing various fixes for %s' % model_name) log.ger.debug('xr_fix: Doing various fixes for %s' % model_name) # Rename stuff: # if (model_name != 'NorESM'): # for key in dtset.variables: # print(key) # if (not sizedistribution or key not in constants.list_sized_vars_noresm): # var_name_noresm = translate_var_names.model2NorESM(key, model_name) # # if 'orig_name' not in dtset[key].attrs: # dtset[key].attrs['orig_name'] = key # if (len(var_name_noresm) > 0): # print('Translate %s to %s ' % (key, var_name_noresm)) # dtset = dtset.rename({key: var_name_noresm}) ############################ # NorESM: ############################ if model_name == 'NorESM': # print('So far not much to do') # time = dtset['time'].values # do not cast to numpy array yet # # if isinstance(time[0], float): # time_unit = dtset['time'].attrs['units'] # time_convert = num2date(time[:] - 15, time_unit, dtset.time.attrs['calendar']) # dtset.coords['time'] = time_convert NCONC_noresm = constants.sized_varListNorESM['NCONC'] for nconc in NCONC_noresm: typ = 'numberconc' if nconc in dtset: # if (dtset[nconc].attrs['units'] = '#/m3'): _ch_unit(dtset, typ, nconc) NMR_noresm = constants.sized_varListNorESM['NMR'] for nmr in NMR_noresm: typ = 'NMR' if nmr in dtset: if dtset[nmr].attrs['units'] == 'm': _ch_unit(dtset, typ, nmr) if 'NNAT_0' in dtset.data_vars: dtset['SIGMA00'] = dtset['NNAT_0'] * 0 + 1.6 # Kirkevag et al 2018 dtset['SIGMA00'].attrs['units'] = '-' # Kirkevag et al 2018 dtset[ 'NMR00'] = dtset['NNAT_0'] * 0 + 62.6 # nm Kirkevag et al 2018 dtset['NMR00'].attrs['units'] = 'nm' # nm Kirkevag et al 2018 dtset['NCONC00'] = dtset['NNAT_0'] for cvar in ['AWNC']: if cvar in dtset: if dtset[cvar].units == 'm-3': dtset[cvar].values = 1.e-6 * dtset[cvar].values dtset[cvar].attrs['units'] = '#/cm^3' for cvar in ['ACTNI', 'ACTNL']: if cvar in dtset: if dtset[cvar].units != '#/cm^3': dtset[cvar].values = 1.e-6 * dtset[cvar].values dtset[cvar].attrs['units'] = '#/cm^3' #while cont: for i in range(10): typ = 'numberconc' varSEC = 'nrSO4_SEC%02.0f' % i if varSEC in dtset.data_vars: _ch_unit(dtset, typ, varSEC) for i in range(10): varSEC = 'nrSOA_SEC%02.0f' % i typ = 'numberconc' if varSEC in dtset.data_vars: _ch_unit(dtset, typ, varSEC) #for mm_var in ['SOA_NA','SO4_NA','SOA_A1','SO4_A1']: # typ='mixingratio' # if mm_var in dtset.data_vars: # _ch_unit(dtset,typ,mm_var) for sec_var in [ 'N_secmod', 'nrSO4_SEC_tot', 'nrSOA_SEC_tot', 'nrSEC_tot' ] + ['nrSEC%02.0f' % ii for ii in range(1, 6)]: typ = 'numberconc' if sec_var in dtset: if dtset[sec_var].attrs['units'] == 'unit': _ch_unit(dtset, typ, sec_var) for ii in np.arange(1, 6): typ = 'numberconc' sec_nr = 'nrSOA_SEC%02.0f' % ii if sec_nr in dtset: if dtset[sec_nr].attrs['units'] == 'unit': _ch_unit(dtset, typ, sec_nr) sec_nr = 'nrSO4_SEC%02.0f' % ii if sec_nr in dtset: typ = 'numberconc' if dtset[sec_nr].attrs['units'] == 'unit': _ch_unit(dtset, typ, sec_nr) # dtset[sec_nr].values = dtset[sec_nr].values * 1e-6 # dtset[sec_nr].attrs['units'] = 'cm-3' # get weights: if 'lat' in dtset: if 'gw' in dtset.data_vars: dtset['lat_wg'] = dtset['gw'] else: wgts_ = area_mod.get_wghts_v2(dtset) dtset['lat_wg'] = xr.DataArray(wgts_, coords=[dtset.coords['lat']], dims=['lat'], name='lat_wg') if 'lon' in dtset: if np.min(dtset['lon'].values) >= 0: log.ger.debug('xr_fix: shifting lon to -180-->180') dtset.coords['lon'] = (dtset['lon'] + 180) % 360 - 180 dtset = dtset.sortby('lon') # index = ['lev is dimension', 'orig_name', 'units'] # for var in dtset.data_vars: # keys = [] # var_entery = [] # if 'orig_name' in dtset[var].attrs: # keys.append('original_var_name') # var_entery.append(dtset[var].attrs['orig_name']) # if 'units' in dtset[var].attrs: # keys.append('units') # var_entery.append(dtset[var].attrs['units']) # keys.append('lev_is_dim') # var_entery.append(int('lev' in dtset[var].coords)) # var_overview_sql.open_and_create_var_entery(model_name, # dtset.attrs['case_name'], # var, var_entery, keys) # dtset.attrs['startyear'] = int(dtset['time.year'].min()) # dtset.attrs['endyear'] = int(dtset['time.year'].max()) return dtset
yrst = 1958 yrend = 2016 ventsec = 7 lastinit = 4217 # Ariane input ds_initial = xr.open_mfdataset(filepath_initial,combine='nested',concat_dim='ntraj') ds_initial.init_volume.name = 'init_volume' # Ariane output ds = xr.open_mfdataset(filepath,combine='nested',concat_dim='ntraj') ds = xr.merge([ds, ds_initial.init_volume]) ds['final_age'] = ds.final_age.astype('timedelta64[s]').astype('float64')/spy ds['final_dens'] = calc_sigmantr(ds.final_temp,ds.final_salt) # Model times time_vals = np.append(np.array([0]),sio.loadmat(filepath_time)['time'].squeeze()) time = xr.DataArray(time_vals,dims=['nfile'],coords={'nfile':np.arange(time_vals.size)}) # Reagion limits region_limits = np.loadtxt(filepath_region) # Bins years = np.arange(yrst,yrend+1) ages = np.arange(-3/12,yrend-yrst+9/12) densities = np.arange(27.7,28,0.01) init_t_unique = np.unique(ds.init_t) inits = np.append(init_t_unique-0.5,init_t_unique[-1]+0.5) xs = np.arange(region_limits[0,0],region_limits[0,1]) ys = np.arange(region_limits[1,0],region_limits[1,1]) var1 = ds.final_x var2 = ds.final_y var3 = ds.final_age
def construct_tripolar_grid( point_type="t", add_attrs=False, attr_fmt="gfdl", retain_coords=False ): """Generate a tripolar grid based on a real 5-degree MOM6 configuration Parameters ---------- point_type : str, optional Grid type. Options are t, u, v, c. By default "t" (tracer) add_attrs : bool, optional Include lat and lon variable attributes, by default False attr_fmt : str, optional Modeling center attribute format, by default "gfdl" retain_coords : bool, optional Keep geolon, geolat, and wet in the dataset, by default False Returns ------- xarray.Dataset Shell dataset with masked variable and ocean depth field """ ds_in = pkgr.resource_filename("mdtf_test_data", "resources/ocean_static_5deg.nc") ds_in = xr.open_dataset(ds_in) # -- if CMIP format is requested, use CESM version as output attr_fmt = "ncar" if attr_fmt == "cmip" else attr_fmt if point_type == "t": lat = ds_in["geolat"] lon = ds_in["geolon"] wet = ds_in["wet"] elif point_type == "u": lat = ds_in["geolat_u"] lon = ds_in["geolon_u"] wet = ds_in["wet_u"] elif point_type == "v": lat = ds_in["geolat_v"] lon = ds_in["geolon_v"] wet = ds_in["wet_v"] elif point_type == "c": lat = ds_in["geolat_c"] lon = ds_in["geolon_c"] wet = ds_in["wet_c"] dset = xr.Dataset() dset["mask"] = xr.where(wet == 0.0, np.nan, 1.0) if point_type == "t": dset["depth"] = xr.DataArray(ds_in["depth_ocean"]) if retain_coords is True: dset[lat.name] = lat dset[lon.name] = lon dset[wet.name] = wet if add_attrs is True: if attr_fmt == "gfdl": dset[lat.name].attrs = {} dset[lon.name].attrs = {} dset[wet.name].attrs = {} elif attr_fmt == "ncar": dset[lat.name].attrs = { "axis": "Y", "standard_name": "latitude", "title": "Latitude", "type": "double", "units": "degrees_north", "valid_max": 90.0, "valid_min": -90.0, } dset[lon.name].attrs = { "axis": "X", "standard_name": "longitude", "title": "Longitude", "type": "double", "units": "degrees_east", "valid_max": 360.0, "valid_min": 0.0, } dset[wet.name].attrs = {} else: raise ValueError("Unknown attribute format") else: dset[lat.name].attrs = {} dset[lon.name].attrs = {} dset[wet.name].attrs = {} if attr_fmt == "ncar": dset = dset.rename({"xh": "nlon", "yh": "nlat"}) lat_range = np.array(np.arange(1, len(dset["nlat"]) + 1, dtype=np.intc)) dset["nlat"] = xr.DataArray(lat_range, dims=("nlat")) dset["nlat"].attrs = { "long_name": "cell index along second dimension", "units": "1", } lon_range = np.array(np.arange(1, len(dset["nlon"]) + 1, dtype=np.intc)) dset["nlon"] = xr.DataArray(lon_range, dims=("nlon")) dset["nlon"].attrs = { "long_name": "cell index along first dimension", "units": "1", } dset = dset.rename({lat.name: "lat", lon.name: "lon"}) return dset
def read( self, name, tracdir, tracfile, varnames, dates, interpol_flx=False, tracer=None, model=None, **kwargs ): """Get fluxes from pre-computed fluxes and load them into a pyCIF variables Args: self: the fluxes Plugin name: the name of the component tracdir, tracfile: flux directory and file format dates: list of dates to extract interpol_flx (bool): if True, interpolates fluxes at time t from values of surrounding available files """ # Replace tracfile by available information from model if tracfile == "": tracfile = model.fluxes.file # Available files in the directory list_files = os.listdir(tracdir) list_available = [] for flx_file in list_files: try: list_available.append( datetime.datetime.strptime(flx_file, tracfile) ) except BaseException: continue list_available = np.array(list_available) # Reading required fluxes files trcr_flx = [] for dd in dates: delta = dd - list_available mask = delta >= datetime.timedelta(0) imin = np.argmin(delta[mask]) fdates = list_available[mask][imin] filein = fdates.strftime("{}/{}".format(tracdir, tracfile)) data, times = readnc(filein, [name, "Times"]) # Get the correct hour in the file times = [ datetime.datetime.strptime( str(b"".join(s), "utf-8"), "%Y-%m-%d_%H:%M:%S" ) for s in times ] hour = int((dd - times[0]).total_seconds() // 3600) trcr_flx.append(data[hour, ...]) # Building a xarray xmod = xr.DataArray( trcr_flx, coords={"time": dates}, dims=("time", "lev", "lat", "lon") ) return xmod
print('get alpha for init_time of ',cit) alp=alpha.sel(init_time=DOY[cit],nregions=cR) # add 1 for fore_time = 0 alp_zero = alp[0] # begin by stealing part of another dataarray alp_zero = alp_zero.expand_dims('fore_time') alp_zero.fore_time[0] = 0 alp_zero.load() alp_zero[0] = 1.0 alp = xr.concat([alp_zero,alp],dim='fore_time') damped = anom.isel(time=cit).values*alp damped.name = 'DampedAnom' tmp = anom['time'].isel(time=cit).values # +np.timedelta64(5,'D') damptimes = pd.to_datetime( damped.fore_time.values, unit='D', origin = tmp) damped.coords['time'] = xr.DataArray(damptimes, dims='fore_time', coords={'fore_time':damped.fore_time}) damped = damped.swap_dims({'fore_time':'time'}) #print('damped ',damped) damped = damped + ds_climo_reg damped = damped.where(damped>0, other=0) damped.plot(ax=ax1, color=cc, linewidth=2) damped.plot(ax=ax1, label=str(cdate.year)+' DampedAnom', color=cc, linewidth=2) ds_obs_reg.plot(ax=ax1, label=str(cdate.year)+' Observed', color='m', linewidth=8) ax1.set_ylabel('Sea Ice Extent\n [Millions of square km]') # # 1980-2010 Historical Interquartile Range # plt.fill_between(ds_per_mean.time.values, ds_per_mean + ds_per_std, # ds_per_mean - ds_per_std, alpha=0.35, label='1980-2010\nInterquartile Range', color='m') ax1.set_xlim(cxlims) # fix x limits cylims = ax1.get_ylim()
def get_lutevents(dataset, scale_factor=28e-6, event_dim='number_of_events', x_range=(-0.31, 0.31), y_range=(-0.31, 0.31)): """ Build an event lookup table. Assign each event location a "sort of" pixel ID based on its fixed grid coordinates, discretized to some step interval that is less than the minimum pixel spacing of 224 microrad=8 km at nadir. A new location is assigned to each discretized location (mean of the locations of the constituent events). The time is assigned, uniformly, to be the dataset's product time attribute. The event lookup table is accompanied by pre-accumulated data at each discretized location: the flash, group and event counts; total flash and group areas; total event energy. Returns a new dataset with dimension "lutevent_id", having an index of the same name. The dataset is a (shallow) copy, but a new xarray object. If needed, returned dataset lutevents can be added to the original dataset with dataset.update(lutevents). If the pixel ID were stored as a 32 bit unsigned integer, (0 to 4294967295) that is 65536 unique values for a square (x,y) grid, the minimum safe scale factor for the span of the full disk is (0.62e6 microradians)/65536 = 9.46 microradians which is a bit large. Therefore, the implementation uses 64 bit unsigned integers to be safe. Arguments: dataset: GLM dataset in xarray format Keyword arguments: scale_factor: discretization interval, radians (default 28e-6) x_range, y_range: range of possible fixed grid coordinate values (default -/+.31 radians, which is larger than the full disk at geo. Ref: GOES-R PUG Vol. 3, L1b data.) """ # Make a copy of the dataset so we can update it and return a copy. # xarray copys are shallow/cheap, and the xarray docs promote returning new # datasets http://xarray.pydata.org/en/stable/combining.html dataset = dataset.copy() event_x, event_y = dataset.event_x.data, dataset.event_y.data event_energy = dataset.event_energy.data product_time = dataset.product_time.data ev_flash_id = dataset.event_parent_flash_id.data ev_group_id = dataset.event_parent_group_id.data flash_area = dataset.flash_area.data group_area = dataset.group_area.data xy_id = discretize_2d_location(event_x, event_y, scale_factor, x_range, y_range) dataset['event_parent_lutevent_id'] = xr.DataArray(xy_id, dims=[ event_dim, ]) eventlut_groups = dataset.groupby('event_parent_lutevent_id') n_lutevents = len(eventlut_groups.groups) # Create a new dimension for the reduced set of events, with their # properties aggregated. # - Sum: event_energy, flash_area, group_area # - Mean: event_x, event_y # - Count: event_id; unique flash_id, group_id eventlut_dtype = [('lutevent_id', 'u8'), ('lutevent_x', 'f8'), ('lutevent_y', 'f8'), ('lutevent_energy', 'f8'), ('lutevent_count', 'f4'), ('lutevent_flash_count', 'f4'), ('lutevent_group_count', 'f4'), ('lutevent_total_flash_area', 'f8'), ('lutevent_total_group_area', 'f8'), ('lutevent_time_offset', '<M8[ns]')] def event_lut_iter(event_lut_groupby, flash_groupby, group_groupby): flash_groups = flash_groupby.groups group_groups = group_groupby.groups for xy_id, evids in event_lut_groupby.groups.items(): flash_ids = np.unique(ev_flash_id[evids]) group_ids = np.unique(ev_group_id[evids]) flash_count, group_count = len(flash_ids), len(group_ids) total_flash_area = sum( (flash_area[flash_groups[fid]].sum() for fid in flash_ids)) total_group_area = sum( (group_area[group_groups[gid]].sum() for gid in group_ids)) yield (xy_id, event_x[evids].mean(), event_y[evids].mean(), event_energy[evids].sum(), len(evids), flash_count, group_count, total_flash_area, total_group_area, product_time) lut_iter = event_lut_iter(eventlut_groups, dataset.groupby('flash_id'), dataset.groupby('group_id')) event_lut = np.fromiter(lut_iter, dtype=eventlut_dtype, count=n_lutevents) lutevents = xr.Dataset.from_dataframe( pd.DataFrame(event_lut).set_index('lutevent_id')) dataset.update(lutevents) return dataset
def numpy_to_xarray(array, geobox, name=None): """Utility to convert ndarray to DataArray, using a datacube.model.GeoBox""" coords=[xarray.IndexVariable(x, geobox.coords[x].values, attrs=dict(units=geobox.coords[x].units)) for x in geobox.dims] return xarray.DataArray(array, coords=coords, attrs=dict(crs=geobox.crs), name=name)