示例#1
0
                 'mapas/BR_Municipios_2019.shp'))

# centroides
lon = brasil.geometry.centroid.geometry.x.values
lat = brasil.geometry.centroid.geometry.y.values

# set correct path of the variables
path_var = '/home/alexandre/Dropbox/ParaUbuntu/netcdfgrid3/'

# getting NetCDF files
prec = xr.open_mfdataset(path_var + 'prec_daily_UT_Brazil_v2*.nc')['prec']
Tmax = xr.open_mfdataset(path_var + 'Tmax_daily_UT_Brazil_v2*.nc')['Tmax']
Tmin = xr.open_mfdataset(path_var + 'Tmin_daily_UT_Brazil_v2*.nc')['Tmin']

# exportando
pd.DataFrame(prec.sel(longitude=xr.DataArray(lon, dims='z'),
                      latitude=xr.DataArray(lat, dims='z'),
                      method='nearest').values,
             columns=brasil.CD_MUN,
             index=prec.time.values).to_csv('prec_mun.csv',
                                            float_format='%.1f')

pd.DataFrame(Tmax.sel(longitude=xr.DataArray(lon, dims='z'),
                      latitude=xr.DataArray(lat, dims='z'),
                      method='nearest').values,
             columns=brasil.CD_MUN,
             index=Tmax.time.values).to_csv('Tmax_mun.csv',
                                            float_format='%.1f')

pd.DataFrame(Tmin.sel(longitude=xr.DataArray(lon, dims='z'),
                      latitude=xr.DataArray(lat, dims='z'),
示例#2
0
    def reproject(
        self,
        dst_crs,
        resolution=None,
        shape=None,
        transform=None,
        resampling=Resampling.nearest,
        nodata=None,
        **kwargs,
    ):
        """
        Reproject :obj:`xarray.DataArray` objects

        Powered by :func:`rasterio.warp.reproject`

        .. note:: Only 2D/3D arrays with dimensions 'x'/'y' are currently supported.
            Requires either a grid mapping variable with 'spatial_ref' or
            a 'crs' attribute to be set containing a valid CRS.
            If using a WKT (e.g. from spatiareference.org), make sure it is an OGC WKT.

        .. versionadded:: 0.0.27 shape
        .. versionadded:: 0.0.28 transform
        .. versionadded:: 0.5.0 nodata, kwargs

        Parameters
        ----------
        dst_crs: str
            OGC WKT string or Proj.4 string.
        resolution: float or tuple(float, float), optional
            Size of a destination pixel in destination projection units
            (e.g. degrees or metres).
        shape: tuple(int, int), optional
            Shape of the destination in pixels (dst_height, dst_width). Cannot be used
            together with resolution.
        transform: Affine, optional
            The destination transform.
        resampling: rasterio.enums.Resampling, optional
            See :func:`rasterio.warp.reproject` for more details.
        nodata: float, optional
            The nodata value used to initialize the destination;
            it will remain in all areas not covered by the reprojected source.
            Defaults to the nodata value of the source image if none provided
            and exists or attempts to find an appropriate value by dtype.
        **kwargs: dict
            Additional keyword arguments to pass into :func:`rasterio.warp.reproject`.
            To override:
            - src_transform: `rio.write_transform`
            - src_crs: `rio.write_crs`
            - src_nodata: `rio.write_nodata`


        Returns
        -------
        :obj:`xarray.DataArray`:
            The reprojected DataArray.
        """
        if resolution is not None and (shape is not None
                                       or transform is not None):
            raise RioXarrayError(
                "resolution cannot be used with shape or transform.")
        if self.crs is None:
            raise MissingCRS(
                "CRS not found. Please set the CRS with 'rio.write_crs()'."
                f"{_get_data_var_message(self._obj)}")
        gcps = self.get_gcps()
        if gcps:
            kwargs.setdefault("gcps", gcps)

        src_affine = None if "gcps" in kwargs else self.transform(recalc=True)
        if transform is None:
            dst_affine, dst_width, dst_height = _make_dst_affine(
                self._obj, self.crs, dst_crs, resolution, shape, **kwargs)
        else:
            dst_affine = transform
            if shape is not None:
                dst_height, dst_width = shape
            else:
                dst_height, dst_width = self.shape

        dst_data = self._create_dst_data(dst_height, dst_width)

        dst_nodata = self._get_dst_nodata(nodata)

        rasterio.warp.reproject(
            source=self._obj.values,
            destination=dst_data,
            src_transform=src_affine,
            src_crs=self.crs,
            src_nodata=self.nodata,
            dst_transform=dst_affine,
            dst_crs=dst_crs,
            dst_nodata=dst_nodata,
            resampling=resampling,
            **kwargs,
        )
        # add necessary attributes
        new_attrs = _generate_attrs(self._obj, dst_nodata)
        # make sure dimensions with coordinates renamed to x,y
        dst_dims = []
        for dim in self._obj.dims:
            if dim == self.x_dim:
                dst_dims.append("x")
            elif dim == self.y_dim:
                dst_dims.append("y")
            else:
                dst_dims.append(dim)
        xda = xarray.DataArray(
            name=self._obj.name,
            data=dst_data,
            coords=_make_coords(self._obj, dst_affine, dst_width, dst_height),
            dims=tuple(dst_dims),
            attrs=new_attrs,
        )
        xda.encoding = self._obj.encoding
        xda.rio.write_transform(dst_affine, inplace=True)
        xda.rio.write_crs(dst_crs, inplace=True)
        xda.rio.write_coordinate_system(inplace=True)
        return xda
示例#3
0
def generate_terrain(agg: xr.DataArray,
                     x_range: tuple = (0, 500),
                     y_range: tuple = (0, 500),
                     seed: int = 10,
                     zfactor: int = 4000,
                     full_extent: Optional[Union[Tuple, List]] = None,
                     name: str = 'terrain'
                     ) -> xr.DataArray:
    """
    Generates a pseudo-random terrain which can be helpful for testing
    raster functions.

    Parameters
    ----------
    x_range : tuple, default=(0, 500)
        Range of x values.
    x_range : tuple, default=(0, 500)
        Range of y values.
    seed : int, default=10
        Seed for random number generator.
    zfactor : int, default=4000
        Multipler for z values.
    full_extent : str, default=None
        bbox<xmin, ymin, xmax, ymax>. Full extent of coordinate system.

    Returns
    -------
    terrain : xr.DataArray
        2D array of generated terrain values.

    References
    ----------
        - Michael McHugh: https://www.youtube.com/watch?v=O33YV4ooHSo
        - Red Blob Games: https://www.redblobgames.com/maps/terrain-from-noise/

    Examples
    --------
    .. plot::
       :include-source:

        >>> import numpy as np
        >>> import xarray as xr
        >>> from xrspatial import generate_terrain

        >>> W = 400
        >>> H = 300
        >>> data = np.zeros((H, W), dtype=np.float32)
        >>> raster = xr.DataArray(data, dims=['y', 'x'])
        >>> xrange = (-20e6, 20e6)
        >>> yrange = (-20e6, 20e6)
        >>> seed = 2
        >>> zfactor = 10

        >>> terrain = generate_terrain(raster, xrange, yrange, seed, zfactor)
        >>> terrain.plot.imshow()
    """

    height, width = agg.shape

    if full_extent is None:
        full_extent = (x_range[0], y_range[0],
                       x_range[1], y_range[1])

    elif not isinstance(full_extent, (list, tuple)) and len(full_extent) != 4:
        raise TypeError('full_extent must be tuple(4)')

    full_xrange = (full_extent[0], full_extent[2])
    full_yrange = (full_extent[1], full_extent[3])

    x_range_scaled = (_scale(x_range[0], full_xrange, (0.0, 1.0)),
                      _scale(x_range[1], full_xrange, (0.0, 1.0)))

    y_range_scaled = (_scale(y_range[0], full_yrange, (0.0, 1.0)),
                      _scale(y_range[1], full_yrange, (0.0, 1.0)))

    mapper = ArrayTypeFunctionMapping(
        numpy_func=_terrain_numpy,
        cupy_func=_terrain_cupy,
        dask_func=_terrain_dask_numpy,
        dask_cupy_func=lambda *args: not_implemented_func(
            *args, messages='generate_terrain() does not support dask with cupy backed DataArray'  # noqa
        )
    )
    out = mapper(agg)(agg.data, seed, x_range_scaled, y_range_scaled, zfactor)
    canvas = ds.Canvas(
        plot_width=width, plot_height=height, x_range=x_range, y_range=y_range
    )

    # DataArray coords were coming back different from cvs.points...
    hack_agg = canvas.points(pd.DataFrame({'x': [], 'y': []}), 'x', 'y')
    res = get_dataarray_resolution(hack_agg)
    result = xr.DataArray(out,
                          name=name,
                          coords=hack_agg.coords,
                          dims=hack_agg.dims,
                          attrs={'res': res})

    return result
示例#4
0
def read_ndbc(filename, dirs=np.arange(0, 360, 10)):
    """Read spectra from NDBC wave buoy ASCII files.

    Both the history and realtime formats are supported. Realtime formats are decribed
    at https://www.ndbc.noaa.gov/measdes.shtml.

    Args:
        - filename (str) or filenames (list): filename of 1D spectral density file or
          list of the five component files for directional spectra as
          [`spec`, `swdir`, `swdir2`, `swr1`, `swr2`].  There is no way to verify the
          component files for the historical directional spectra, so the order entered
          in the list is what is used. The history and realtime formats are
          automatically detected.
        - dirs (array): vector of directional bins for spectral reconstruction.
        - attrs (dict): additional global attributes.

    Returns:
        - dset (SpecDataset): spectra dataset object read from NDBC buoy file(s).

    """

    if isinstance(filename, str):
        filename = [filename]
    elif isinstance(filename, list):
        if not len(filename) == 5:
            raise ValueError(
                "filename argument for NDBC directional spectra must be list with 5 files [spden,swdir,swdir2,swr1,swr2]"
            )
    else:
        raise TypeError("filename argument must be string or list")

    # Get the spectra density
    df_spden = read_file(filename[0])

    if "Sep_Freq" in df_spden.columns:
        sep_freq = df_spden["Sep_Freq"].values
        df_spden.drop(columns=["Sep_Freq"], inplace=True)
    else:
        sep_freq = None

    times = df_spden.index
    freqs = df_spden.columns.astype("f")
    spshape = (len(times), len(freqs), 1)
    specdens = df_spden.values.reshape(spshape)

    if len(filename) == 1:
        dirs = [0.0]
    else:
        df_swdir = read_file(filename[1])
        df_swdir2 = read_file(filename[2])
        df_swr1 = read_file(filename[3])
        df_swr2 = read_file(filename[4])
        dirs = np.array(dirs)
        specdens = construct_spectra(
            specdens,
            df_swdir.values.reshape(spshape),
            df_swdir2.values.reshape(spshape),
            0.01 * df_swr1.values.reshape(
                spshape),  # these values are stored with a factor of 100
            0.01 * df_swr2.values.reshape(spshape),
            dirs,
        )
    coords = OrderedDict(((attrs.TIMENAME, times), (attrs.FREQNAME, freqs),
                          (attrs.DIRNAME, dirs)))
    dims = (attrs.TIMENAME, attrs.FREQNAME, attrs.DIRNAME)
    dset = xr.DataArray(data=specdens,
                        coords=coords,
                        dims=dims,
                        name=attrs.SPECNAME).to_dataset()
    if sep_freq is not None:
        sfreq = xr.DataArray(
            data=sep_freq,
            coords={attrs.TIMENAME: times},
            dims=(attrs.TIMENAME),
            name=attrs.SPECNAME,
        )
        dset[
            "Sep_Freq"] = sfreq  # Add the NDBC defined separation frequency for realtime diagnostics
    dset = dset.sortby(
        "time", ascending=True)  # Realtime data is in reversed time order
    return dset
def plot_xco2_diff(name_month, xco2_mod_slice, xco2_oco_slice, diff_before,
                   diff_after, lat_slice, lon_slice, imonth, mod_dir):
    '''Produce plots of XCO2 differences
    inputs (1d arrays): 
        diff in XCO2, before and after
        lat_slice, lon_slice, lat and lon for each data point
        imonth, month number, for plot title
    outputs: plots
        '''
    nobs = len(diff_before)
    # Creat target dataarray and dataset
    lat_res = 2.  # horizontal resolution of lat and lon you would want
    lon_res = 2.5
    lat = np.linspace(-90, 90, int(180 / lat_res + 1))  # grid
    lon = np.linspace(-180, 177.5, int(360 / lon_res))
    diff_1 = xr.DataArray(data=np.nan,
                          dims=('lat', 'lon'),
                          coords={
                              'lat': lat,
                              'lon': lon
                          },
                          name='diff')
    diff_2 = xr.DataArray(data=np.nan,
                          dims=('lat', 'lon'),
                          coords={
                              'lat': lat,
                              'lon': lon
                          },
                          name='diff')
    # map 1d data onto dataarray
    for i in range(nobs):
        lat = int((lat_slice[i].values + 90) / 2)  # lat index
        lon = int((lon_slice[i].values + 180) / 2.5)
        diff_1[lat, lon] = -diff_before[i]
        diff_2[lat, lon] = -diff_after[i]
    print('y diff before:', "{:.2f}".format(diff_1.mean().values))
    print('y diff after:', "{:.2f}".format(diff_2.mean().values))

    # figure 1, distribution
    fig, axes = plt.subplots(1,
                             2,
                             figsize=[14, 6],
                             subplot_kw={'projection': ccrs.PlateCarree()},
                             gridspec_kw={
                                 'hspace': 0.2,
                                 'wspace': 0
                             })
    # before
    diff_1.plot(ax=axes[0],
                vmax=4,
                add_labels=False,
                cbar_kwargs={'shrink': 0.5})
    axes[0].set_title(short_name_of_month(imonth) + ' XCO2: a prior - OCO2',
                      loc='left')
    axes[0].set_title('ppm', loc='right')
    axes[0].coastlines()
    axes[0].gridlines(linestyle='--')
    # after
    diff_2.plot(ax=axes[1],
                vmax=4,
                add_labels=False,
                cbar_kwargs={'shrink': 0.5})
    axes[1].set_title(short_name_of_month(imonth) +
                      ' XCO2: a posterior - OCO2',
                      loc='left')
    axes[1].set_title('ppm', loc='right')
    axes[1].coastlines()
    axes[1].gridlines(linestyle='--')

    fig.savefig(mod_dir + 'bio_results_map_diff_' + str(imonth) + '.png',
                dpi=300)

    obs_series = xco2_oco_slice.values
    model_series = xco2_mod_slice.values
    format_R1, format_rmse1 = r_rmse(obs_series, model_series)
    #R = ma.corrcoef(ma.masked_invalid(obs_series), ma.masked_invalid(model_series))
    #x = obs_series[~np.isnan(obs_series)]
    #y = model_series[~np.isnan(model_series)]
    #rmse = np.sqrt(((y - x) ** 2).mean())
    #format_R1 = float("{0:.2f}".format(R[0,1]))
    #format_rmse1 = float("{0:.2f}".format(rmse))
    print('R1 is:', format_R1, ' RMSE1 is: ', format_rmse1)

    obs_series = xco2_oco_slice.values
    model_series = xco2_oco_slice.values - diff_after
    format_R2, format_rmse2 = r_rmse(obs_series, model_series)
    print('R2 is:', format_R2, ' RMSE2 is: ', format_rmse2)

    # figure 2, scatter plot
    fig = plt.figure(figsize=[5, 5])
    plt.plot([300, 450], [300, 450], c='black')
    plt.scatter(xco2_oco_slice, xco2_mod_slice, s=0.7, label='A prior')
    plt.scatter(xco2_oco_slice,
                xco2_oco_slice - diff_after,
                s=0.7,
                label='A posterior')
    plt.ylim(top=420, bottom=395)
    plt.xlim(right=420, left=395)
    plt.text(x=405,
             y=397.5,
             s='R1: ' + str(format_R1) + ' RMSE1: ' + str(format_rmse1),
             size=12)
    plt.text(x=405,
             y=396,
             s='R2: ' + str(format_R2) + ' RMSE2: ' + str(format_rmse2),
             size=12)
    plt.title(name_month + ' XCO2 (ppm)')
    plt.ylabel('GEOS-Chem')
    plt.xlabel('OCO2')
    plt.legend(markerscale=4)

    fig.savefig(mod_dir + 'bio_results_scatter_diff_' + str(imonth) + '.png',
                dpi=300)
示例#6
0
    def _read_from_band_structure(self, band_structure, eigenstate_map):
        """
        Uses a sisl's `BandStructure` object to calculate the bands.
        """
        import xarray as xr

        if band_structure is None:
            raise ValueError("No band structure (k points path) was provided")

        if not isinstance(getattr(band_structure, "parent", None),
                          sisl.Hamiltonian):
            self.setup_hamiltonian()
            band_structure.set_parent(self.H)
        else:
            self.H = band_structure.parent

        # Define the spin class of this calculation.
        self.spin = self.H.spin

        self.ticks = band_structure.lineartick()
        self.kPath = band_structure._k

        # We define a wrapper to get the values out of the eigenstates
        # to give the possibility to the user to do something inbetween
        # NOTE THAT THIS IS USED BY FAT BANDS TO GET THE WEIGHTS SIMULTANEOUSLY
        eig_map = eigenstate_map

        # Also, in this wrapper we will get the spin moments in case it is a non_colinear
        # calculation
        if self.spin.is_noncolinear:
            self.spin_moments = []
        elif hasattr(self, "spin_moments"):
            del self.spin_moments

        def bands_wrapper(eigenstate, spin_index):
            if callable(eig_map):
                eig_map(eigenstate, self, spin_index)
            if hasattr(self, "spin_moments"):
                self.spin_moments.append(eigenstate.spin_moment())
            return eigenstate.eig

        # Define the available spins
        spin_indices = [0]
        if self.spin.is_polarized:
            spin_indices = [0, 1]

        # Get the eigenstates for all the available spin components
        bands_arrays = []
        for spin_index in spin_indices:

            spin_bands = band_structure.apply.dataarray.eigenstate(
                wrap=partial(bands_wrapper, spin_index=spin_index),
                spin=spin_index,
                coords=('band', ),
            )

            bands_arrays.append(spin_bands)

        # Merge everything into a single dataarray with a spin dimension
        self.bands = xr.concat(bands_arrays, "spin").assign_coords({
            "spin":
            spin_indices
        }).transpose("k", "spin", "band")

        self.bands['k'] = band_structure.lineark()
        self.bands.attrs = {
            "ticks": self.ticks[0],
            "ticklabels": self.ticks[1],
            **bands_arrays[0].attrs
        }

        if hasattr(self, "spin_moments"):
            self.spin_moments = xr.DataArray(self.spin_moments,
                                             coords={
                                                 "k": self.bands.k,
                                                 "band": self.bands.band,
                                                 "axis": ["x", "y", "z"]
                                             },
                                             dims=("k", "band", "axis"))
示例#7
0
def test_tapper_1d(window):
    dummy_array = xr.DataArray(da.ones((10), chunks=(3, )), dims='x')
    win = dummy_array.window
    win.set(window=window, dim='x')
    assert np.array_equal(win.tapper(), sig.get_window(window, 10))
示例#8
0
def xr_calcule_stat_climat(l_ds):
    """fonction qui fait les stat climato pour chaque
    dataset dans la liste

    Entree :

    l_ds : liste de dataset dont on veut calculer les statistiques
           climatologiques

    #TODO Changer les dataset pour des dataarray

    """

    # boucle sur les periode dans l_ds
    for n_d, ds in enumerate(l_ds):

        tdeb = ds.time[0]
        tfin = ds.time[-1]

        def _mon_strftime(time):
            """fonction qui transforme une valeur de ds.time en string
            yyyy-mm-dd
            """
            assert (time.size == 1)
            yyyy = time.dt.year.item()
            mm = time.dt.month.item()
            dd = time.dt.day.item()
            return f"{yyyy}-{mm}-{dd}"

        sdeb = _mon_strftime(tdeb)
        sfin = _mon_strftime(tfin)
        msg = (f"calcul des statistiques climatiques pour periode "
               f"{n_d+1:03n}/{len(l_ds):03n} [ {sdeb} @ {sfin} ])'")
        print(msg)

        # calcul des stat
        moy_per = ds.mean('time')
        std_per = ds.std('time')
        min_per = ds.min('time')
        max_per = ds.max('time')

        # ajout de la dimension time et calcul climatology_bnds
        time0 = ds.time[:1]
        time1 = ds.time[0]
        time2 = ds.time[-1]

        # on defini le poids comme le nb d'annees dans la periode climatique
        time1.coords['poids'] = ds.time.size

        # ajustement moy_per
        moy_per = moy_per.expand_dims('time')
        moy_per['time'] = ('time', time0)
        moy_per.attrs = ds.attrs
        cm = moy_per.attrs.get("cell_methods") or ""
        moy_per.attrs.update(cell_methods=(cm +
                                           ' time: mean over years').strip())

        # ajustement std_per
        std_per = std_per.expand_dims('time')
        std_per['time'] = ('time', time0)
        std_per.attrs = ds.attrs
        cm = std_per.attrs.get("cell_methods") or ""
        std_per.attrs.update(cell_methods=(cm +
                                           ' time: std over years').strip())

        # cumul des mois
        if n_d == 0:
            moy_clim = moy_per.copy()
            std_clim = std_per.copy()
            time_bnds1 = time1.copy()
            time_bnds2 = time2.copy()
            poids_clim = time1.poids.copy()
        else:
            moy_clim = xr.concat((moy_clim, moy_per), dim='time')
            std_clim = xr.concat((std_clim, std_per), dim='time')
            time_bnds1 = xr.concat((time_bnds1, time1), dim='time')
            time_bnds2 = xr.concat((time_bnds2, time2), dim='time')
            poids_clim = xr.concat((poids_clim, time1.coords['poids']),
                                   dim='time')

    # calcul des time_bnds
    time_bnds = np.vstack((time_bnds1.values, time_bnds2.values)).transpose()

    # on met tout ensemble
    nom_var = ds.name
    dso = xr.Dataset()
    dso[nom_var + '_moy_clim'] = moy_clim
    dso[nom_var + '_std_clim'] = std_clim
    dso['poids'] = poids_clim

    # ajout des climatology_bounds au dataset
    tampon = xr.DataArray(time_bnds, dims=['time', 'bnds'])
    dso['climatology_bounds'] = tampon

    # ajout de l'attribut climatology a time
    dso.time.attrs['climatology'] = 'climatology_bounds'

    # on met tout en ordre temporel
    dso = dso.sortby('time')

    # on enleve la variable etiquette
    if 'etiquette' in dso:
        dso = dso.drop('etiquette')

    # on renvoit la sortie
    return dso
示例#9
0
def xr_genere_series(ds, freq_entree, freq, poids=None):
    """
    fonction qui genere une serie pour un certain type de periode
    a partir de donnees a une certaine frequence.

    ds : DataFrame xarray

    freq_entree : string
        frequence des donnees d'entree parmi 'jour', 'mois' et
                                             'annee'
    
    """

    dates = ds.time
    #
    # tous les poids sont egaux par defaut
    #
    if poids is None:
        poids = xr.DataArray(np.ones(ds.time.size), coord=[ds.time])
        #
    # on assigne les etiquettes de la periode a laquelle appartient
    # chaque dates selon la valeur de freq
    #
    if freq == 'annee':
        # etiquette = [str(d.year) for d in dates]
        etiquette = dates.dt.year.values
    elif freq == 'saison':
        etiquette = trouve_saison_annee(dates)
    else:
        msg = f"frequence {freq} non prévue"
        raise RuntimeError(msg)

    # ajout des etiquettes a poids
    poids['etiquette'] = ('time', etiquette)

    #
    # calcul de la moy. ponderee par le poids
    #
    num = (poids * ds).groupby('etiquette').sum('time', skipna=False)
    denom = poids.groupby('etiquette').sum('time')
    ds_sortie = num / denom

    # on copie les attributs
    for k in ds.attrs:
        ds_sortie.attrs[k] = ds.attrs[k]

    # changement du cell_method
    if 'cell_methods' in ds_sortie.attrs:
        ds_sortie.attrs['cell_methods'] = ds_sortie.attrs['cell_methods'] + \
                                          ' time: mean over days'
    else:
        ds_sortie.attrs['cell_methods'] = 'time: mean over days'

    # on recopie le nom de la variable
    ds_sortie = ds_sortie.rename(ds.name)

    # on ajoute la valeur de time du debut de chaque periodes
    tampon = poids.groupby('etiquette').groups
    l_ind = [tampon[k][0] for k in ds_sortie.etiquette.values]
    ds_sortie = ds_sortie.rename({'etiquette': 'time'})
    ds_sortie['time'] = poids.time[l_ind]

    # on garde la somme des poids pour chaque periode
    ds_sortie['poids'] = ('time', denom)

    # on reordonne en ordre chronologique
    ds_sortie = ds_sortie.sortby('time')

    return ds_sortie
示例#10
0
boc = BrainObservatoryCache()

# Download a list of all targeted areas
targeted_structures = boc.get_all_targeted_structures()

# Download cells for a set of experiments and convert to DataFrame
cells = boc.get_cell_specimens()
cells = pd.DataFrame.from_records(cells)

dsi_cells = cells.query('area == "VISp" & g_dsi_dg >= 1 & p_dg < 0.05')

# find experiment containers for those cells
dsi_ec_ids = dsi_cells['experiment_container_id'].unique()

# Download the ophys experiments containing the static gratings stimulus for VISp experiment containers
dsi_exps = boc.get_ophys_experiments(experiment_container_ids=dsi_ec_ids,
                                     stimuli=[stim_info.DRIFTING_GRATINGS])

exp_id = dsi_exps[0]['id']
data_set = boc.get_ophys_experiment_data(exp_id)

dg = DriftingGratings(data_set)
mean_sweeps = dg.mean_sweep_response.values

d = xr.DataArray(mean_sweeps,
                 dims=("stim", "cell"),
                 coords={'cell': [str(x) for x in dg.cell_id] + ['dx']})
d.to_dataframe(name='value').reset_index().to_feather('cells_dg.feather')

dg.stim_table.to_feather('stim_table_dg.feather')
示例#11
0
    def select_coordinates(self, source_coordinates, eval_coordinates, index_type="numpy"):
        """
        Select a subset or coordinates if interpolator can downselect.

        At this point in the execution process, podpac has selected a subset of source_coordinates that intersects
        with the requested coordinates, dropped extra dimensions from requested coordinates, and confirmed
        source coordinates are not missing any dimensions.

        Parameters
        ----------
        source_coordinates : :class:`podpac.Coordinates`
            Intersected source coordinates
        eval_coordinates : :class:`podpac.Coordinates`
            Requested coordinates to evaluate

        Returns
        -------
        (:class:`podpac.Coordinates`, list)
            Returns tuple with the first element subset of selected coordinates and the second element the indicies
            of the selected coordinates
        """

        # TODO: short circuit if source_coordinates contains eval_coordinates
        # short circuit if source and eval coordinates are the same
        if source_coordinates == eval_coordinates:
            return source_coordinates, tuple([slice(0, None)] * len(source_coordinates.shape))

        interpolator_queue = self._select_interpolator_queue(source_coordinates, eval_coordinates, "can_select")

        self._last_select_queue = interpolator_queue

        # For heterogeneous selections, we need to select and then recontruct each set of dimensions
        selected_coords = {}
        selected_coords_idx = {k: np.arange(source_coordinates[k].size) for k in source_coordinates.dims}
        for udims in interpolator_queue:
            interpolator = interpolator_queue[udims]
            extra_dims = [d for d in source_coordinates.udims if d not in udims]
            sc = source_coordinates.udrop(extra_dims)
            # run interpolation. mutates selected coordinates and selected coordinates index
            sel_coords, sel_coords_idx = interpolator.select_coordinates(
                udims, sc, eval_coordinates, index_type=index_type
            )
            # Save individual 1-D coordinates for later reconstruction
            for i, k in enumerate(sel_coords.dims):
                selected_coords[k] = sel_coords[k]
                selected_coords_idx[k] = sel_coords_idx[i]

        # Reconstruct dimensions
        for d in source_coordinates.dims:
            if d not in selected_coords:  # Some coordinates may not have a selector when heterogeneous
                selected_coords[d] = source_coordinates[d]
            # np.ix_ call doesn't work with slices, and fancy numpy indexing does not work well with mixed slice/index
            if isinstance(selected_coords_idx[d], slice) and index_type != "slice":
                selected_coords_idx[d] = np.arange(source_coordinates[d].size)[selected_coords_idx[d]]

        selected_coords = Coordinates(
            [selected_coords[k] for k in source_coordinates.dims],
            source_coordinates.dims,
            crs=source_coordinates.crs,
            validate_crs=False,
        )
        if index_type == "numpy":
            npcoords = []
            has_stacked = False
            for k in source_coordinates.dims:
                # Deal with nD stacked source coords (marked by coords being in tuple)
                if isinstance(selected_coords_idx[k], tuple):
                    has_stacked = True
                    npcoords.extend([sci for sci in selected_coords_idx[k]])
                else:
                    npcoords.append(selected_coords_idx[k])
            if has_stacked:
                # When stacked coordinates are nD we cannot use the catchall of the next branch
                selected_coords_idx2 = npcoords
            else:
                # This would not be needed if everything went as planned in
                # interpolator.select_coordinates, but this is a catchall that works
                # for 90% of the cases
                selected_coords_idx2 = np.ix_(*[np.ravel(npc) for npc in npcoords])
        elif index_type == "xarray":
            selected_coords_idx2 = []
            for i in selected_coords.dims:
                # Deal with nD stacked source coords (marked by coords being in tuple)
                if isinstance(selected_coords_idx[i], tuple):
                    selected_coords_idx2.extend([xr.DataArray(sci, dims=[i]) for sci in selected_coords_idx[i]])
                else:
                    selected_coords_idx2.append(selected_coords_idx[i])
            selected_coords_idx2 = tuple(selected_coords_idx2)
        elif index_type == "slice":
            selected_coords_idx2 = []
            for i in selected_coords.dims:
                # Deal with nD stacked source coords (marked by coords being in tuple)
                if isinstance(selected_coords_idx[i], tuple):
                    selected_coords_idx2.extend(selected_coords_idx[i])
                else:
                    selected_coords_idx2.append(selected_coords_idx[i])

            selected_coords_idx2 = tuple(selected_coords_idx2)
        else:
            raise ValueError("Unknown index_type '%s'" % index_type)
        return selected_coords, tuple(selected_coords_idx2)
示例#12
0
                                                      end_date='20141231',
                                                      min_lat=-10,
                                                      max_lat=10,
                                                      pressure=1)

    # get values at 10 hPa
    nox_10_hpa = np.zeros(len(nox.time))
    for i in range(len(nox.time)):
        pressure_i = pres_nox[i, :]
        nox_i = nox[i, :]
        n, l = helper_functions.interpolate_to_mls_pressure(pressure_i, nox_i)
        # only keep vmr at 10 hPa
        nox_10_hpa[i] = n[np.where(l == 10)]

    nox_10_hpa_dataset = xr.DataArray(nox_10_hpa,
                                      coords=[nox.time],
                                      dims=["time"])
    monthlymeans = nox_10_hpa_dataset.groupby('time.month').mean('time')
    anomalies_nox = nox_10_hpa_dataset.groupby('time.month') - monthlymeans

    # Load O3
    o3, pres_o3 = open_data.load_osiris_ozone_monthly(start_date='20050101',
                                                      end_date='20141231',
                                                      min_lat=-10,
                                                      max_lat=10,
                                                      pressure=1)

    # get values at 10 hPa
    o3_10_hpa = np.zeros(len(o3.time))
    for i in range(len(o3.time)):
        pressure_i = pres_o3[i, :]
axes[2].set_xlabel('# days')

#fig.suptitle('Cloud Radiative Forcing & Surface Fluxes')

plt.subplots_adjust(hspace=.3)
#plt.show()

plt.savefig('../Figures/10_' + PCT +
            'CTR_TOPO_SWCF_LWCF_SHFLX_LHFLX_time_series.png')
#plt.savefig('../Figures/10_'+PCT+'CTR_TOPO_SWCF_LWCF_SHFLX_LHFLX_time_series.pdf')

# output SWCF, LWCF, CF, SHFLX, LHFLX, FLX average day 3-7 for CTR-TOPOX for
# further usage
# Note that (1) index starting from 0 and slide(2,7) does not include the last
# 7
SWCF_avg = xr.DataArray(np.mean(Vars[0, 2, 2:7]))
SWCF_avg.name = 'SWCF'
SWCF_avg.attrs['units'] = 'W/m2'

LWCF_avg = xr.DataArray(np.mean(Vars[1, 2, 2:7]))
LWCF_avg.name = 'LWCF'
LWCF_avg['units'] = 'W/m2'

CF_avg = xr.DataArray(np.mean(Vars[2, 2, 2:7]))
CF_avg.name = 'CF'
CF_avg['units'] = 'W/m2'

SHFLX_avg = xr.DataArray(np.mean(Vars[3, 2, 2:7]))
SHFLX_avg.name = 'SHFLX'
SHFLX_avg['units'] = 'W/m2'
def create_nc_variable_files_on_regular_grid_from_mds(mds_var_dir,
                                                     mds_files_to_load,
                                                     mds_grid_dir,
                                                     output_dir,
                                                     output_freq_code,
                                                     vars_to_load = 'all',
                                                     tiles_to_load = [0,1,2,3,4,5,6,7,8,9,10,11,12],
                                                     time_steps_to_load = [],
                                                     meta_variable_specific = dict(),
                                                     meta_common = dict(),
                                                     mds_datatype = '>f4',
                                                     dlon=0.5, dlat=0.5,
                                                     radius_of_influence = 120000,
                                                     express=1,
                                                     kvarnmidx = 2, # coordinate idx for vertical axis
                                                     # method now is only a place holder.
                                                     # This can be expanded. For example,
                                                     # the global interpolated fields can
                                                     # split to tiles, similarly to
                                                     # the tiled native fields, to
                                                     # reduce the size of each file.
                                                     verbose=True,
                                                     method = ''):
    #%%
    # force mds_files_to_load to be a list (if str is passed)
    if isinstance(mds_files_to_load, str):
        mds_files_to_load = [mds_files_to_load]

    # force time_steps_to_load to be a list (if int is passed)
    if isinstance(time_steps_to_load, int):
        time_steps_to_load = [time_steps_to_load]

    # for ce tiles_to_load to be a list (if int is passed)
    if isinstance(tiles_to_load, int):
        tiles_to_load = [tiles_to_load]

    # if no specific file data passed, read default metadata from json file
    # -- variable specific meta data
    script_dir = os.path.dirname(__file__)  # <-- absolute dir the script is in
    if not meta_variable_specific:
        meta_variable_rel_path = '../meta_json/ecco_meta_variable.json'
        abs_meta_variable_path = os.path.join(script_dir, meta_variable_rel_path)
        with open(abs_meta_variable_path, 'r') as fp:
            meta_variable_specific = json.load(fp)

    # --- common meta data
    if not meta_common:
        meta_common_rel_path = '../meta_json/ecco_meta_common.json'
        abs_meta_common_path = os.path.join(script_dir, meta_common_rel_path)
        with open(abs_meta_common_path, 'r') as fp:
            meta_common = json.load(fp)

    # info for the regular grid
    new_grid_min_lat = -90+dlat/2.
    new_grid_max_lat = 90-dlat/2.
    new_grid_min_lon = -180+dlon/2.
    new_grid_max_lon = 180-dlon/2.
    new_grid_ny = np.int((new_grid_max_lat-new_grid_min_lat)/dlat + 1 + 1e-4*dlat)
    new_grid_nx = np.int((new_grid_max_lon-new_grid_min_lon)/dlon + 1 + 1e-4*dlon)
    j_reg = new_grid_min_lat + np.asarray(range(new_grid_ny))*dlat
    i_reg = new_grid_min_lon + np.asarray(range(new_grid_nx))*dlon
    j_reg_idx = np.asarray(range(new_grid_ny))
    i_reg_idx = np.asarray(range(new_grid_nx))
    if (new_grid_ny < 1) or (new_grid_nx < 1):
        raise ValueError('You need to have at least one grid point for the new grid.')

    # loop through each mds file in mds_files_to_load
    for mds_file in mds_files_to_load:

        # if time steps to load is empty, load all time steps
        if len(time_steps_to_load ) == 0:
            # go through each file, pull out the time step, add the time step to a list,
            # and determine the start and end time of each record.

           time_steps_to_load = \
               get_time_steps_from_mds_files(mds_var_dir, mds_file)


        first_meta_fname  = mds_file + '.' + \
            str(time_steps_to_load[0]).zfill(10) + '.meta'


        # get metadata for the first file and determine which variables
        # are present
        meta = xm.utils.parse_meta_file(mds_var_dir + '/' + first_meta_fname)
        vars_here =  meta['fldList']

        if not isinstance(vars_to_load, list):
            vars_to_load = [vars_to_load]

        if 'all' not in vars_to_load:
            num_vars_matching = len(np.intersect1d(vars_to_load, vars_here))

            print ('num vars matching ', num_vars_matching)

            # only proceed if we are sure that the variable we want is in this
            # mds file
            if num_vars_matching == 0:
                print ('none of the variables you want are in ', mds_file)
                print (vars_to_load)
                print (vars_here)

                break
        #%%
        # load the MDS fields
        ecco_dataset_all =  \
                load_ecco_vars_from_mds(mds_var_dir, \
                                         mds_grid_dir,
                                         mds_file,
                                         vars_to_load = vars_to_load,
                                         tiles_to_load=tiles_to_load,
                                         model_time_steps_to_load=time_steps_to_load,
                                         output_freq_code = \
                                              output_freq_code,
                                         meta_variable_specific = \
                                              meta_variable_specific,
                                         meta_common=meta_common,
                                         mds_datatype=mds_datatype,
                                         llc_method = 'bigchunks')

        # do the actual loading. Otherwise, the code may be slow.
        ecco_dataset_all.load()

        # print(ecco_dataset_all.keys())
        # loop through each variable in this dataset,
        for var in ecco_dataset_all.keys():
            print ('    ' + var)
            # obtain the grid information (use fields from time=0)
            # Note that nrtmp would always equal to one,
            # since each outfile will include only one time-record (e.g. daily, monthly avgs.).

            ecco_dataset = ecco_dataset_all.isel(time=[0])

            var_ds = ecco_dataset[var]

            shapetmp = var_ds.shape

            lenshapetmp = len(shapetmp)
            nttmp = 0
            nrtmp = 0
            if(lenshapetmp==4):
                nttmp = shapetmp[0]
                nrtmp = 0
            elif(lenshapetmp==5):
                nttmp = shapetmp[0]
                nrtmp = shapetmp[1]
            else:
                print('Error! ', var_ds.shape)
                sys.exit()

            # Get X,Y of the original grid. They could be XC/YC, XG/YC, XC/YG, etc.
            # Similar for mask.
            # default is XC, YC
            if 'i' in var_ds.coords.keys():
                XX = ecco_dataset['XC']
                XXname = 'XC'
            if 'j' in var_ds.coords.keys():
                YY = ecco_dataset['YC']
                YYname = 'YC'
            varmask = 'maskC'
            iname = 'i'
            jname = 'j'

            if 'i_g' in var_ds.coords.keys():
                XX = ecco_dataset['XG']
                XXname = 'XG'
                varmask = 'maskW'
                iname = 'i_g'
            if 'j_g' in var_ds.coords.keys():
                YY = ecco_dataset['YG']
                YYname = 'YG'
                varmask = 'maskS'
                jname = 'j_g'

            # interpolation
            # To do it fast, set express==1 (default)
            if(express==1):
                orig_lons_1d = XX.values.ravel()
                orig_lats_1d = YY.values.ravel()
                orig_grid = pr.geometry.SwathDefinition(lons=orig_lons_1d,
                                                        lats=orig_lats_1d)

                if (new_grid_ny > 0) and (new_grid_nx > 0):
                    # 1D grid values
                    new_grid_lon, new_grid_lat = np.meshgrid(i_reg, j_reg)

                    # define the lat lon points of the two parts.
                    new_grid  = pr.geometry.GridDefinition(lons=new_grid_lon,
                                                           lats=new_grid_lat)

                    # Get the neighbor info once.
                    # It will be used repeatedly late to resample data
                    # fast for each of the datasets that is based on
                    # the same swath, e.g. for a model variable at different times.
                    valid_input_index, valid_output_index, index_array, distance_array = \
                    pr.kd_tree.get_neighbour_info(orig_grid,
                                               new_grid, radius_of_influence,
                                               neighbours=1)

            # loop through time steps, one at a time.
            for time_step in time_steps_to_load:

                i, = np.where(ecco_dataset_all.timestep == time_step)
                if(verbose):
                    print (ecco_dataset_all.timestep.values)
                    print ('time step ', time_step, i)

                # load the dataset
                ecco_dataset = ecco_dataset_all.isel(time=i)

                # pull out the year, month day, hour, min, sec associated with
                # this time step
                if type(ecco_dataset.time.values) == np.ndarray:
                    cur_time = ecco_dataset.time.values[0]
                else:
                    cur_time = ecco_dataset.time.values
                #print (type(cur_time))
                year, mon, day, hh, mm, ss  = \
                     extract_yyyy_mm_dd_hh_mm_ss_from_datetime64(cur_time)

                print(year, mon, day)

                # if the field comes from an average,
                # extract the time bounds -- we'll use it before we save
                # the variable
                if 'AVG' in output_freq_code:
                    tb = ecco_dataset.time_bnds
                    tb.name = 'tb'

                var_ds = ecco_dataset[var]

                # 3d fields (with Z-axis) for each time record
                if(nttmp != 0 and nrtmp != 0):
                    tmpall = np.zeros((nttmp, nrtmp,new_grid_ny,new_grid_nx))
                    for ir in range(nrtmp): # Z-loop
                        # mask
                        maskloc = ecco_dataset[varmask].values[ir,:]

                        for it in range(nttmp): # time loop
                            # one 2d field at a time
                            var_ds_onechunk = var_ds[it,ir,:]
                            # apply mask
                            var_ds_onechunk.values[maskloc==0]=np.nan
                            orig_field = var_ds_onechunk.values
                            if(express==1):
                                tmp = pr.kd_tree.get_sample_from_neighbour_info(
                                        'nn', new_grid.shape, orig_field,
                                        valid_input_index, valid_output_index,
                                        index_array)

                            else:
                                new_grid_lon, new_grid_lat, tmp = resample_to_latlon(XX, YY, orig_field,
                                                                  new_grid_min_lat,
                                                                  new_grid_max_lat, dlat,
                                                                  new_grid_min_lon,
                                                                  new_grid_max_lon, dlon,
                                                                  nprocs_user=1,
                                                                  mapping_method = 'nearest_neighbor',
                                                                  radius_of_influence=radius_of_influence)
                            tmpall[it,ir,:] = tmp
                # 2d fields (without Z-axis) for each time record
                elif(nttmp != 0):
                    tmpall = np.zeros((nttmp, new_grid_ny,new_grid_nx))
                    # mask
                    maskloc = ecco_dataset[varmask].values[0,:]
                    for it in range(nttmp): # time loop
                        var_ds_onechunk = var_ds[it,:]
                        var_ds_onechunk.values[maskloc==0]=np.nan
                        orig_field = var_ds_onechunk.values
                        if(express==1):
                            tmp = pr.kd_tree.get_sample_from_neighbour_info(
                                    'nn', new_grid.shape, orig_field,
                                    valid_input_index, valid_output_index,
                                    index_array)
                        else:
                            new_grid_lon, new_grid_lat, tmp = resample_to_latlon(XX, YY, orig_field,
                                                              new_grid_min_lat,
                                                              new_grid_max_lat, dlat,
                                                              new_grid_min_lon,
                                                              new_grid_max_lon, dlon,
                                                              nprocs_user=1,
                                                              mapping_method = 'nearest_neighbor',
                                                              radius_of_influence=radius_of_influence)
                        tmpall[it,:] = tmp

                else:
                    print('Error! both nttmp and nrtmp are zeros.')
                    sys.exit()
                # set the coordinates for the new (regular) grid
                # 2d fields
                if(lenshapetmp==4):
                    var_ds_reg = xr.DataArray(tmpall,
                                              coords = {'time': var_ds.coords['time'].values,
                                                        'j': j_reg_idx,
                                                        'i': i_reg_idx},\
                                              dims = ('time', 'j', 'i'))
                # 3d fields
                elif(lenshapetmp==5):
                    # Get the variable name (kvarnm) for Z-axis: k, k_l
                    kvarnm = var_ds.coords.keys()[kvarnmidx]

                    if(kvarnm[0]!='k'):
                        kvarnmidxnew = kvarnmidx
                        for iktmp, ktmp in enumerate(var_ds.coords.keys()):
                            if(ktmp[0]=='k'):
                                kvarnmidxnew = iktmp
                        if(kvarnmidxnew==kvarnmidx):
                            print('Error! Seems ', kvarnm, ' is not the vertical axis.')
                            print(var_ds)
                            sys.exit()
                        else:
                            kvarnmidx = kvarnmidxnew
                            kvarnm = var_ds.coords.keys()[kvarnmidx]

                    var_ds_reg = xr.DataArray(tmpall,
                                              coords = {'time': var_ds.coords['time'].values,
                                                        kvarnm: var_ds.coords[kvarnm].values,
                                                        'j': j_reg_idx,
                                                        'i': i_reg_idx},\
                                              dims = ('time', kvarnm,'j', 'i'))
                # set the attrs for the new (regular) grid
                var_ds_reg['j'].attrs = var_ds[jname].attrs
                var_ds_reg['i'].attrs = var_ds[iname].attrs
                var_ds_reg['j'].attrs['long_name'] = 'y-dimension'
                var_ds_reg['i'].attrs['long_name'] = 'x-dimension'
                var_ds_reg['j'].attrs['swap_dim'] = 'latitude'
                var_ds_reg['i'].attrs['swap_dim'] = 'longitude'

                var_ds_reg['latitude'] = (('j'), j_reg)
                var_ds_reg['longitude'] = (('i'), i_reg)
                var_ds_reg['latitude'].attrs = ecco_dataset[YYname].attrs
                var_ds_reg['longitude'].attrs = ecco_dataset[XXname].attrs
                var_ds_reg['latitude'].attrs['long_name'] = "latitude at center of grid cell"
                var_ds_reg['longitude'].attrs['long_name'] = "longitude at center of grid cell"

                var_ds_reg.name = var_ds.name

                #keys_to_drop = ['tile','j','i','XC','YC','XG','YG']
                # drop these ancillary fields -- they are in grid anyway
                keys_to_drop = ['CS','SN','Depth','rA','PHrefC','hFacC',\
                                'maskC','drF', 'dxC', 'dyG', 'rAw', 'hFacW',\
                                'rAs','hFacS','maskS','dxG','dyC', 'maskW', \
                                'tile','XC','YC','XG','YG']

                for key_to_drop in keys_to_drop:
                    #print (key_to_drop)
                    if key_to_drop in var_ds.coords.keys():
                        var_ds = var_ds.drop(key_to_drop)

                # any remaining fields, e.g. time, would be included in the interpolated fields.
                for key_to_add in var_ds.coords.keys():
                    if(key_to_add not in var_ds_reg.coords.keys()):
                        if(key_to_add != 'i_g' and key_to_add != 'j_g'):
                            var_ds_reg[key_to_add] = var_ds[key_to_add]

                # use the same global attributs
                var_ds_reg.attrs = var_ds.attrs


                #print(var_ds.coords.keys())
                #%%

                # create the new file path name
                if 'MON' in output_freq_code:

                    fname = var + '_' +  str(year) + '_' + str(mon).zfill(2) + '.nc'

                    newpath = output_dir + '/' + var + '/' + \
                        str(year) + '/'

                elif ('WEEK' in output_freq_code) or \
                     ('DAY' in output_freq_code):

                    fname = var + '_' + \
                            str(year) + '_' + \
                            str(mon).zfill(2) + '_' + \
                            str(day).zfill(2) +  '.nc'
                    d0 = datetime.datetime(year, 1,1)
                    d1 = datetime.datetime(year, mon, day)
                    doy = (d1-d0).days + 1

                    newpath = output_dir + '/' + var + '/' + \
                        str(year) + '/' + str(doy).zfill(3)

                elif 'YEAR' in output_freq_code:

                     fname = var + '_' + str(year) + '.nc'

                     newpath = output_dir + '/' + var + '/' + \
                        str(year)

                else:
                    print ('no valid output frequency code specified')
                    print ('saving to year/mon/day/tile')
                    fname = var + '_' + \
                        str(year) + '_' + \
                        str(mon).zfill(2) + '_' + \
                        str(day).zfill(2) + '.nc'
                    d0 = datetime.datetime(year, 1,1)
                    d1 = datetime.datetime(year, mon, day)
                    doy = (d1-d0).days + 1

                    newpath = output_dir + '/' + var + '/' + \
                        str(year) + '/' + str(doy).zfill(3)


                # create the path if it does not exist/
                if not os.path.exists(newpath):
                    os.makedirs(newpath)

                # convert the data array to a dataset.
                tmp = var_ds_reg.to_dataset()

                # add the time bounds field back in if we have an
                # average field
                if 'AVG' in output_freq_code:
                    tmp = xr.merge((tmp, tb))
                    tmp = tmp.drop('tb')

                # put the metadata back in
                tmp.attrs = ecco_dataset.attrs

                # update the temporal and geospatial metadata
                tmp = update_ecco_dataset_geospatial_metadata(tmp)
                tmp = update_ecco_dataset_temporal_coverage_metadata(tmp)

                # save to netcdf.  it's that simple.
                if(verbose):
                    print ('saving to %s' % newpath + '/' + fname)
                # do not include _FillValue
                encoding = {i: {'_FillValue': False} for i in tmp.variables.keys()}

                tmp.to_netcdf(newpath + '/' + fname, engine='netcdf4',encoding=encoding)



#%%
    ecco_dataset_all.close()
    return ecco_dataset, tmp
示例#15
0
文件: _reduce.py 项目: dgergel/xclim
def kmeans_reduce_ensemble(
    data: xarray.DataArray,
    *,
    method: dict = None,
    make_graph: bool = MPL_INSTALLED,
    max_clusters: Optional[int] = None,
    variable_weights: Optional[np.ndarray] = None,
    model_weights: Optional[np.ndarray] = None,
    sample_weights: Optional[np.ndarray] = None,
    random_state: Optional[Union[int, np.random.RandomState]] = None,
) -> Tuple[list, np.ndarray, dict]:
    """Return a sample of ensemble members using k-means clustering.

    The algorithm attempts to reduce the total number of ensemble members while maintaining adequate coverage of
    the ensemble uncertainty in a N-dimensional data space. K-Means clustering is carried out on the input
    selection criteria data-array in order to group individual ensemble members into a reduced number of similar groups.
    Subsequently a single representative simulation is retained from each group.

    Parameters
    ----------
    data : xr.DataArray
      Selecton criteria data : 2-D xr.DataArray with dimensions 'realization' (N) and
      'criteria' (P). These are the values used for clustering. Realizations represent the individual original
      ensemble members and criteria the variables/indicators used in the grouping algorithm.
    method : dict
      Dictionary defining selection method and associated value when required. See Notes.
    max_clusters : Optional[int]
      Maximum number of members to include in the output ensemble selection.
      When using 'rsq_optimize' or 'rsq_cutoff' methods, limit the final selection to a maximum number even if method
      results indicate a higher value. Defaults to N.
    variable_weights: Optional[np.ndarray]
      An array of size P. This weighting can be used to influence of weight of the climate indices (criteria dimension)
      on the clustering itself.
    model_weights: Optional[np.ndarray]
      An array of size N. This weighting can be used to influence which realization is selected
      from within each cluster. This parameter has no influence on the clustering itself.
    sample_weights: Optional[np.ndarray]
      An array of size N. sklearn.cluster.KMeans() sample_weights parameter. This weighting can be
      used to influence of weight of simulations on the clustering itself.
      See: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
    random_state: Optional[Union[int, np.random.RandomState]]
      sklearn.cluster.KMeans() random_state parameter. Determines random number generation for centroid
      initialization. Use an int to make the randomness deterministic.
      See: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
    make_graph: bool
      output a dictionary of input for displays a plot of R² vs. the number of clusters.
      Defaults to True if matplotlib is installed in runtime environment.

    Notes
    -----
    Parameters for method in call must follow these conventions:

    rsq_optimize
        Calculate coefficient of variation (R²) of cluster results for n = 1 to N clusters and determine
        an optimal number of clusters that balances cost / benefit tradeoffs. This is the default setting.
        See supporting information S2 text in https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0152495

        method={'rsq_optimize':None}

    rsq_cutoff
        Calculate Coefficient of variation (R²) of cluster results for n = 1 to N clusters and determine
        the minimum numbers of clusters needed for R² > val.

        val : float between 0 and 1. R² value that must be exceeded by clustering results.

        method={'rsq_cutoff': val}

    n_clusters
        Create a user determined number of clusters.

        val : integer between 1 and N

        method={'n_clusters': val}

    Returns
    -------
    list
      Selected model indexes (positions)
    np.ndarray
      KMeans clustering results
    dict
      Dictionary of input data for creating R² profile plot. 'None' when make_graph=False

    References
    ----------
    Casajus et al. 2016. https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0152495

    Examples
    --------
    >>> import xclim
    >>> from xclim.ensembles import create_ensemble, kmeans_reduce_ensemble
    >>> from xclim.indices import hot_spell_frequency

    Start with ensemble datasets for temperature:

    >>> ensTas = create_ensemble(temperature_datasets)

    Calculate selection criteria -- Use annual climate change Δ fields between 2071-2100 and 1981-2010 normals.
    First, average annual temperature:

    >>> tg = xclim.atmos.tg_mean(tas=ensTas.tas)
    >>> his_tg = tg.sel(time=slice('1990','2019')).mean(dim='time')
    >>> fut_tg = tg.sel(time=slice('2020','2050')).mean(dim='time')
    >>> dtg = fut_tg - his_tg

    Then, Hotspell frequency as second indicator:

    >>> hs = hot_spell_frequency(tasmax=ensTas.tas, window=2, thresh_tasmax='10 degC')
    >>> his_hs = hs.sel(time=slice('1990','2019')).mean(dim='time')
    >>> fut_hs = hs.sel(time=slice('2020','2050')).mean(dim='time')
    >>> dhs = fut_hs - his_hs

    Create a selection criteria xr.DataArray:

    >>> from xarray import concat
    >>> crit = concat((dtg, dhs), dim='criteria')

    Finally, create clusters and select realization ids of reduced ensemble:

    >>> ids, cluster, fig_data = kmeans_reduce_ensemble(data=crit, method={'rsq_cutoff':0.9}, random_state=42, make_graph=False)
    >>> ids, cluster, fig_data = kmeans_reduce_ensemble(data=crit, method={'rsq_optimize':None}, random_state=42, make_graph=True)
    """
    if make_graph:
        fig_data = {}
        if max_clusters is not None:
            fig_data["max_clusters"] = max_clusters
    else:
        fig_data = None

    data = data.transpose("realization", "criteria")
    # initialize the variables
    n_sim = np.shape(data)[0]  # number of simulations
    n_idx = np.shape(data)[1]  # number of indicators

    # normalize the data matrix
    z = xarray.DataArray(
        scipy.stats.zscore(data, axis=0, ddof=1), coords=data.coords
    )  # ddof=1 to be the same as Matlab's zscore

    if sample_weights is None:
        sample_weights = np.ones(n_sim)
    else:
        # KMeans sample weights of zero cause errors occasionally - set to 1e-15 for now
        sample_weights[sample_weights == 0] = 1e-15
    if model_weights is None:
        model_weights = np.ones(n_sim)
    if variable_weights is None:
        variable_weights = np.ones(shape=(1, n_idx))
    if max_clusters is None:
        max_clusters = n_sim
    if method is None:
        method = {"rsq_optimize": None}

    # normalize the weights (note: I don't know if this is really useful... this was in the MATLAB code)
    sample_weights = sample_weights / np.sum(sample_weights)
    model_weights = model_weights / np.sum(model_weights)
    variable_weights = variable_weights / np.sum(variable_weights)

    z = z * variable_weights
    rsq = _calc_rsq(z, method, make_graph, n_sim, random_state, sample_weights)

    n_clusters = _get_nclust(method, n_sim, rsq, max_clusters)

    if make_graph:
        fig_data["method"] = method
        fig_data["rsq"] = rsq
        fig_data["n_clusters"] = n_clusters
        fig_data["realizations"] = n_sim

    # Final k-means clustering with 1000 iterations to avoid instabilities in the choice of final scenarios
    kmeans = KMeans(
        n_clusters=n_clusters, n_init=1000, max_iter=600, random_state=random_state
    )
    # we use 'fit_' only once, otherwise it computes everything again
    clusters = kmeans.fit_predict(z, sample_weight=sample_weights)

    # squared distance to centroids
    d = np.square(
        kmeans.transform(z)
    )  # squared distance between each point and each centroid

    out = np.empty(
        shape=n_clusters
    )  # prepare an empty array in which to store the results
    r = np.arange(n_sim)

    # in each cluster, find the closest (weighted) simulation and select it
    for i in range(n_clusters):
        d_i = d[
            clusters == i, i
        ]  # distance to the centroid for all simulations within the cluster 'i'
        if d_i.shape[0] >= 2:
            if d_i.shape[0] == 2:
                sig = 1
            else:
                sig = np.std(
                    d_i, ddof=1
                )  # standard deviation of those distances (ddof = 1 gives the same as Matlab's std function)

            like = (
                scipy.stats.norm.pdf(d_i, 0, sig) * model_weights[clusters == i]
            )  # weighted likelihood

            argmax = np.argmax(like)  # index of the maximum likelihood

        else:
            argmax = 0

        r_clust = r[
            clusters == i
        ]  # index of the cluster simulations within the full ensemble

        out[i] = r_clust[argmax]

    out = sorted(out.astype(int))
    # display graph - don't block code execution

    return out, clusters, fig_data
示例#16
0
    def run(self, hotstart=False):
        '''Performs an AWARE simulation and returns the results (time series).
        
        Returns
        ----
        pandas data frame including the results
        '''
        if not self.is_initialized and not self.is_ready:
            print(
                'Error: Model has not been initialized or prepared with initial states.'
            )
            return
        if hotstart:
            if self.is_ready:
                print('AWARE hotstart: Resuming last run ...')
            else:
                print(
                    'Waring: cannot resume run in hotstart mode. Using default initialisation!'
                )
        else:
            self.reset_state_vars()

        start_date = pd.Timestamp(
            self.config.start_date).to_period('M').to_timestamp('M')
        end_date = pd.Timestamp(
            self.config.end_date).to_period('M').to_timestamp('M')
        assert start_date in self.meteo.dates
        assert end_date in self.meteo.dates

        dates = pd.date_range(start=start_date, end=end_date, freq='M')

        rts_catchments = collections.OrderedDict(
        )  # results including upstream areas
        rts_catchments_sub_mean = collections.OrderedDict(
        )  # results sub-catchment only
        rts = pd.DataFrame(index=dates,
                           columns=self._results_time_series_columns,
                           dtype=float)
        for cid in self.catchment_ids:
            rts_catchments[cid] = rts.copy()
            rts_catchments_sub_mean[cid] = rts.copy()

        for date in dates:
            print(date)

            temp, precip = self.meteo.get_meteo(date)
            glaciers = self.state_glacierarea.get_state(
            )  # couple glacier model here!

            for cid in self.computation_order:
                params = self.config.params.catchments[cid]
                catchment = self.catchments[cid]
                cpx = self.catchments[cid].pixels

                cswe, snowmelt, snow_outflow, snowfall, rainfall, melt_avail = catchment.melt.melt(
                    self.state_swe.get_state(cpx),
                    precip[cpx],
                    temp[cpx],
                    params.ddf_snow,
                    glacier_fraction=None)
                self.state_swe.set_state(cswe, cpx)

                snow_outflow_unglacierized = snow_outflow * (1. -
                                                             glaciers[cpx])
                snow_outflow_glacierized = np.zeros(self.input_grids.dtm.shape)
                snow_outflow_glacierized = snow_outflow * glaciers[cpx]

                ice_melt_factor = np.minimum(
                    glaciers[cpx], glaciers[cpx] * melt_avail *
                    params.ddf_ice / params.ddf_snow)
                cicewe, icemelt, ice_outflow, _, _, _ = catchment.melt.melt(
                    self.state_icewe.get_state(cpx),
                    precip[cpx] * 0.0,
                    temp[cpx],
                    params.ddf_ice,
                    glacier_fraction=ice_melt_factor)
                self.state_icewe.set_state(cicewe, cpx)

                glacier_outflow = ice_outflow + snow_outflow_glacierized

                # get groundwater state
                gw_storage = self.state_groundwater.get_state(cpx)

                if self.config.enable_soil_model:
                    et_pot = catchment.evapotranspiration.monthly_evapotranspiration(
                        date, temp[cpx], n_etp_summer=params.factor_etp_summer)
                    et_pot *= (1. - glaciers[cpx])

                    csms, perc, runoff_d, et_act = catchment.soil.soil_water_balance(
                        snow_outflow_unglacierized, et_pot,
                        self.state_soilmoisture.get_state(cpx))
                    # sms[cpx] = csms
                    self.state_soilmoisture.set_state(csms, cpx)
                    #percolation = perc.mean()
                    direct_runoff = runoff_d.mean()

                    baseflow, gw_storage = catchment.groundwater.groundwater_model(
                        gw_storage, perc)
                    runoff = (baseflow + direct_runoff) + glacier_outflow
                else:
                    melt_only = snow_outflow_glacierized + glacier_outflow
                    baseflow, gw_storage = catchment.groundwater.groundwater_model(
                        gw_storage, params.gw_n * melt_only)
                    runoff = baseflow + (1 - params.gw_n) * melt_only

                    et_pot = np.zeros(cswe.shape) * np.nan
                    et_act = np.zeros(cswe.shape) * np.nan
                    direct_runoff = np.nan

                self.state_groundwater.set_state(gw_storage, cpx)

                rts_cur = rts_catchments_sub_mean[cid].loc[date]

                # calculate averages for sub-catchment without tributaries
                rts_cur.temp = temp[cpx].mean()
                rts_cur.precip = precip[cpx].mean()
                rts_cur.snowfall = snowfall.mean()
                rts_cur.rainfall = rainfall.mean()
                rts_cur.swe = self.state_swe.get_state(cpx).mean()
                rts_cur.snowmelt = snowmelt.mean()
                rts_cur.icemelt = icemelt.mean()
                rts_cur.melt = rts_cur.snowmelt + rts_cur.icemelt
                rts_cur.snow_outflow = snow_outflow.mean()
                rts_cur.ice_outflow = ice_outflow.mean()
                rts_cur.glacier_outflow = glacier_outflow.mean()
                rts_cur.runoff = runoff.mean()
                rts_cur.sms = self.state_soilmoisture.get_state(cpx).mean()
                rts_cur.et_pot = et_pot.mean()
                rts_cur.et = et_act.mean()
                rts_cur.baseflow = baseflow.mean()
                rts_cur.direct_runoff = direct_runoff
                # rts_cur.icewe = self.state_icewe.get_state(cpx).mean() # activate if required

                # prepare averages
                rts_catchments[cid].loc[date] = self.catchments[
                    cid].area * rts_catchments_sub_mean[cid].loc[date]

                # add results of tributaries
                for ii in range(0, len(self.catchments[cid].upstream_ids)):
                    sub_id = self.catchments[cid].upstream_ids[ii]
                    sub_n = self.catchments[cid].upstream_areas[ii]

                    # tributaries
                    rts_cur_sub = rts_catchments_sub_mean[sub_id].loc[date]
                    # set results to total upstream area
                    rts_cur = rts_catchments[cid].loc[date]

                    rts_cur.temp += sub_n * rts_cur_sub.temp
                    rts_cur.precip += sub_n * rts_cur_sub.precip
                    rts_cur.snowfall += sub_n * rts_cur_sub.snowfall
                    rts_cur.rainfall += sub_n * rts_cur_sub.rainfall
                    rts_cur.swe += sub_n * rts_cur_sub.swe
                    rts_cur.snowmelt += sub_n * rts_cur_sub.snowmelt
                    rts_cur.icemelt += sub_n * rts_cur_sub.icemelt
                    rts_cur.melt += sub_n * rts_cur_sub.melt
                    rts_cur.snow_outflow += sub_n * rts_cur_sub.snow_outflow
                    rts_cur.ice_outflow += sub_n * rts_cur_sub.ice_outflow
                    rts_cur.glacier_outflow += sub_n * rts_cur_sub.glacier_outflow
                    rts_cur.runoff += sub_n * rts_cur_sub.runoff
                    rts_cur.sms += sub_n * rts_cur_sub.sms
                    rts_cur.et_pot += sub_n * rts_cur_sub.et_pot
                    rts_cur.et += sub_n * rts_cur_sub.et
                    rts_cur.baseflow += sub_n * rts_cur_sub.baseflow
                    rts_cur.direct_runoff += sub_n * rts_cur_sub.direct_runoff

            # remember timestamp
            self.timestamp = date

            # write stats if required
            if date in self.write_dates:
                self.write_states(add_timestamp=True, verbose=True)

        results = munch.Munch()
        variables = {
            k: xr.DataArray(v, dims=['time', 'var'])
            for k, v in rts_catchments.items()
        }
        results.ts = xr.Dataset(variables).to_array(
            dim='catchment').to_dataset('var')

        return results
示例#17
0
文件: topo.py 项目: rdenham/geowombat
    def norm_topo(self,
                  data,
                  elev,
                  solar_za,
                  solar_az,
                  slope=None,
                  aspect=None,
                  method='empirical-rotation',
                  slope_thresh=2,
                  nodata=0,
                  elev_nodata=-32768,
                  scale_factor=1,
                  angle_scale=0.01,
                  n_jobs=1,
                  robust=False,
                  min_samples=100,
                  slope_kwargs=None,
                  aspect_kwargs=None,
                  band_coeffs=None):
        """
        Applies topographic normalization

        Args:
            data (2d or 3d DataArray): The data to normalize, in the range 0-1.
            elev (2d DataArray): The elevation data.
            solar_za (2d DataArray): The solar zenith angles (degrees).
            solar_az (2d DataArray): The solar azimuth angles (degrees).
            slope (2d DataArray): The slope data. If not given, slope is calculated from ``elev``.
            aspect (2d DataArray): The aspect data. If not given, aspect is calculated from ``elev``.
            method (Optional[str]): The method to apply. Choices are ['c', 'empirical-rotation'].
            slope_thresh (Optional[float or int]): The slope threshold. Any samples with
                values < ``slope_thresh`` are not adjusted.
            nodata (Optional[int or float]): The 'no data' value for ``data``.
            elev_nodata (Optional[float or int]): The 'no data' value for ``elev``.
            scale_factor (Optional[float]): A scale factor to apply to the input data.
            angle_scale (Optional[float]): The angle scale factor.
            n_jobs (Optional[int]): The number of parallel workers for ``LinearRegression.fit``.
            robust (Optional[bool]): Whether to fit a robust regression.
            min_samples (Optional[int]): The minimum number of samples required to fit a regression.
            slope_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions``
                to calculate the slope.
            aspect_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions``
                to calculate the aspect.
            band_coeffs (Optional[dict]): Slope and intercept coefficients for each band.

        References:

            See :cite:`teillet_etal_1982` for the C-correction method.
            See :cite:`tan_etal_2010` for the Empirical Rotation method.

        Returns:
            ``xarray.DataArray``

        Examples:
            >>> import geowombat as gw
            >>> from geowombat.radiometry import Topo
            >>>
            >>> topo = Topo()
            >>>
            >>> # Example where pixel angles are stored in separate GeoTiff files
            >>> with gw.config.update(sensor='l7', scale_factor=0.0001, nodata=0):
            >>>
            >>>     with gw.open('landsat.tif') as src,
            >>>         gw.open('srtm') as elev,
            >>>             gw.open('solarz.tif') as solarz,
            >>>                 gw.open('solara.tif') as solara:
            >>>
            >>>         src_norm = topo.norm_topo(src, elev, solarz, solara, n_jobs=-1)
        """

        method = method.strip().lower()

        if method not in ['c', 'empirical-rotation']:

            logger.exception(
                "  Currently, the only supported methods are 'c' and 'empirical-rotation'."
            )
            raise NameError

        attrs = data.attrs.copy()

        if not nodata:
            nodata = data.gw.nodata

        if scale_factor == 1.0:
            scale_factor = data.gw.scale_factor

        # Scale the reflectance data
        if scale_factor != 1:
            data = data * scale_factor

        if not slope_kwargs:

            slope_kwargs = dict(format='MEM',
                                computeEdges=True,
                                alg='ZevenbergenThorne',
                                slopeFormat='degree')

        if not aspect_kwargs:

            aspect_kwargs = dict(format='MEM',
                                 computeEdges=True,
                                 alg='ZevenbergenThorne',
                                 trigonometric=False,
                                 zeroForFlat=True)

        slope_kwargs['format'] = 'MEM'
        slope_kwargs['slopeFormat'] = 'degree'
        aspect_kwargs['format'] = 'MEM'

        # Force to SRTM resolution
        proc_dims = (int((data.gw.ncols * data.gw.cellx) / 30.0),
                     int((data.gw.nrows * data.gw.celly) / 30.0))

        w = int((5 * 30.0) / data.gw.celly)

        if w % 2 == 0:
            w += 1

        if isinstance(slope, xr.DataArray):
            slope_deg_fd = slope.squeeze().data
        else:

            slope_deg = calc_slope_delayed(elev.squeeze().data,
                                           proc_dims=proc_dims,
                                           w=w,
                                           **slope_kwargs)
            slope_deg_fd = da.from_delayed(slope_deg,
                                           (data.gw.nrows, data.gw.ncols),
                                           dtype='float64')

        if isinstance(aspect, xr.DataArray):
            aspect_deg_fd = aspect.squeeze().data
        else:

            aspect_deg = calc_aspect_delayed(elev.squeeze().data,
                                             proc_dims=proc_dims,
                                             w=w,
                                             **aspect_kwargs)
            aspect_deg_fd = da.from_delayed(aspect_deg,
                                            (data.gw.nrows, data.gw.ncols),
                                            dtype='float64')

        nodata_samps = da.where(
            (elev.data == elev_nodata) | (data.max(dim='band').data == nodata)
            | (slope_deg_fd < slope_thresh), 1, 0)

        slope_rad = da.deg2rad(slope_deg_fd)
        aspect_rad = da.deg2rad(aspect_deg_fd)

        # Convert degrees to radians
        solar_za = da.deg2rad(solar_za.squeeze().data * angle_scale)
        solar_az = da.deg2rad(solar_az.squeeze().data * angle_scale)

        cos_z = da.cos(solar_za)

        # Calculate the illumination angle
        il = da.cos(slope_rad) * cos_z + da.sin(slope_rad) * da.sin(
            solar_za) * da.cos(solar_az - aspect_rad)

        sr_adj = list()
        for band in data.band.values.tolist():

            if method == 'c':

                sr_adj.append(
                    self._method_c(
                        data.sel(band=band).data, il, cos_z, nodata_samps,
                        min_samples, n_jobs, robust, band_coeffs, band))

            else:

                sr_adj.append(
                    self._method_empirical_rotation(
                        data.sel(band=band).data, il, cos_z, nodata_samps,
                        min_samples, n_jobs, robust, band_coeffs, band))

        adj_data = xr.DataArray(data=da.concatenate(sr_adj).reshape(
            (data.gw.nbands, data.gw.nrows, data.gw.ncols)),
                                coords={
                                    'band': data.band.values.tolist(),
                                    'y': data.y.values,
                                    'x': data.x.values
                                },
                                dims=('band', 'y', 'x'),
                                attrs=data.attrs)

        attrs['calibration'] = 'Topographic-adjusted'
        attrs['nodata'] = nodata
        attrs['drange'] = (0, 1)

        adj_data.attrs = attrs

        return adj_data
示例#18
0
def da1():
    """Small plain two-dimensional xr.DataArray."""
    return xr.DataArray([[0, 1], [3, 4], [6, 7]], dims=("x", "y"))
示例#19
0
]
#'gaussian',
#'general_gaussian', 'chebwin',
#'slepian', 'cosine', 'hann']

sig_xyt = example_xyt()
sig_xyt_wth_coast = example_xyt(boundaries=True)

# Testing array
shape = (48, 30, 40)
dims = ('time', 'y', 'x')
ctime = pd.date_range('2000-01-01', periods=48, freq='M')
cy = np.linspace(0.01, 0.5, 30)
cx = np.pi * np.linspace(0, 2, 40)
coords = {'time': ctime, 'y': cy, 'x': cx}
dummy_array = xr.DataArray(np.random.random(shape), dims=dims, coords=coords)


def test_set_nyquist():
    w = dummy_array.window
    w.set(dim=['y', 'x'])
    assert w.fnyq == {
        'x': 1. / (2. * (cx[1] - cx[0])),
        'y': 1. / (2. * (cy[1] - cy[0]))
    }


def test_init_window():
    sig_xyt.window

示例#20
0
def da2():
    """Small plain two-dimensional xr.DataArray with different values compared to
    da1."""
    return xr.DataArray([[0, 1], [5, 6], [6, 7]], dims=("x", "y"))
示例#21
0
def spinup_fn(run,
              field,
              months_list,
              filenames=['atmos_pentad'],
              plevs=[0., 2000., 'all']):

    # Function to open files for a specfied month range and filename.
    # Takes annual means
    def open_files(run, months, filename):
        name_temp = '/scratch/rg419/Data_moist/' + run + '/run%03d/' + filename + '.nc'
        names = [name_temp % m for m in range(months[0], months[1])]
        #read data into xarray
        data = xr.open_mfdataset(names,
                                 decode_times=False,
                                 chunks={'time': 30})
        data.coords['year'] = data.time // 360 + 1
        field_yr = data[field].groupby('year').mean(('time'))

        return field_yr, data

    # Combine data from files with different names (eg. atmos_monthly and atmos_pentad) into one time series
    arrays = []
    i = 0
    for filename in filenames:
        field_yr, data = open_files(run, months_list[i], filename)
        arrays.append(field_yr)
        i = i + 1

    field_yr = xr.concat(arrays, dim='year')

    # Check if data is 3D and if so integrate over specfied levels
    try:
        p_levs = data.pfull[(data.pfull >= plevs[0])
                            & (data.pfull <= plevs[1])]
        dp = xr.DataArray(np.diff(data.phalf),
                          [('pfull', field_yr.pfull)]) * 100.
        field_yr = (field_yr * dp).sel(pfull=p_levs).sum('pfull') / 9.8
        print '3D field, vertical integral taken'
        three_d = True

    except:
        print '2D field'
        three_d = False

    # Calculate cell areas and take area mean
    area = cell_area(42, '/scratch/rg419/GFDL_model/GFDLmoistModel/')
    area_xr = xr.DataArray(area, [('lat', data.lat), ('lon', data.lon)])
    field_av = (field_yr * area_xr).sum(('lat', 'lon')) / area_xr.sum(
        ('lat', 'lon'))

    # Plot up result and save
    field_av.plot()
    plt.xlabel('Year')
    plt.ylabel(field)
    if three_d:
        plotname = '/scratch/rg419/plots/spinup/' + field + '_' + str(
            plevs[2]) + '_spinup_' + run + '.png'
    else:
        plotname = '/scratch/rg419/plots/spinup/' + field + '_spinup_' + run + '.png'
    plt.savefig(plotname)
    plt.close()

    return field_av
示例#22
0
def small_verif_da():
    """Very small simulation of a verification product."""
    time = [1990, 1991, 1992, 1993, 1994]
    return xr.DataArray(np.random.rand(len(time)),
                        dims=["time"],
                        coords=[time])
示例#23
0
def compare_XCO2(mod_dir, sat_dir, year, month, str1_mod, str1_sat, mode):
    mod_file = str1_mod + str(year) + month_string(month) + '.nc'
    sat_file = str1_sat + str(year) + month_string(month) + '.nc'
    ds_mod = xr.open_dataset(mod_dir + mod_file)
    ds_sat = xr.open_dataset(sat_dir + sat_file)

    # mask data: predefined region, land vs. ocean, latitudinal bands
    mask_directory = '/geos/u73/msadiq/GEOS-Chem/analysis/inversion/data/'
    mask_name = 'flatten_mask.nc'
    ds_mask = xr.open_dataset(mask_directory + mask_name)

    avg_kern = ds_sat['xco2_averaging_kernel']
    co2_pr = ds_sat['co2_profile_apriori']
    pres_wgt = ds_sat['pressure_weight']
    op_mode = ds_sat['operation_mode']  # 0=Nadir, 1=Glint
    mode_mask = (op_mode - 1) * -1  # mask to mask out glint, 1=Nadir, 0=Glint

    # model simulated CO2 concentration
    co2_model = ds_mod['SpeciesConc_CO2'] * 1e-3  # unit: ppbv to ppmv
    co2_profile = pres_wgt * (
        1 - avg_kern) * co2_pr + pres_wgt * avg_kern * co2_model
    # sum up to get column CO2
    xco2_model = co2_profile.sum(
        dim='levels')  # sum along vertical axis, unit: ppbv to ppm

    xco2_model_mode = xco2_model * mode_mask  # extract desired mode of observation: Nadir
    xco2_oco2_mode = ds_sat['xco2'] * mode_mask

    obs_series = xco2_oco2_mode.values
    model_series = xco2_model_mode.values
    obs_series[obs_series == 0] = 'nan'
    model_series[model_series == 0] = 'nan'

    format_R, format_rmse = r_rmse(obs_series, model_series)
    #R = ma.corrcoef(ma.masked_invalid(obs_series), ma.masked_invalid(model_series))
    #x = obs_series[~np.isnan(obs_series)]
    #y = model_series[~np.isnan(model_series)]
    #rmse = np.sqrt(((y - x) ** 2).mean())
    #format_R = float("{0:.2f}".format(R[0,1]))
    #format_rmse = float("{0:.2f}".format(rmse))
    print('R is:', format_R, ' RMSE is: ', format_rmse)

    xco2_model_land = xco2_model_mode * ds_mask['land'][0:len(xco2_model)]
    xco2_model_land[xco2_model_land == 0] = 'nan'
    xco2_oco2_land = xco2_oco2_mode * ds_mask['land'][0:len(xco2_model)]
    xco2_oco2_land[xco2_oco2_land == 0] = 'nan'

    fig = plt.figure(figsize=[5, 5])
    name_month = short_name_of_month(month)
    for region in ['high_lat', 'mid_lat', 'low_lat']:
        xco2_model_mask = xco2_model_land * ds_mask[region][
            0:len(xco2_model_land)]
        xco2_model_mask[xco2_model_mask == 0] = 'nan'
        xco2_oco2_mask = xco2_oco2_land * ds_mask[region][0:len(xco2_model_land
                                                                )]
        xco2_oco2_mask[xco2_oco2_mask == 0] = 'nan'

        plt.scatter(xco2_oco2_mask, xco2_model_mask, s=0.7, label=region)
        plt.plot([300, 450], [300, 450], c='r')
        plt.ylim(top=420, bottom=395)
        plt.xlim(right=420, left=395)
        plt.title(name_month + ' XCO2 (ppm)')
        plt.ylabel('GEOS-Chem')
        plt.xlabel('OCO2')
        plt.legend(markerscale=4)

        plt.text(x=410, y=399, s='R: ' + str(format_R), size=12)
        plt.text(x=410, y=398, s='RMSE: ' + str(format_rmse), size=12)
        fig.savefig(mod_dir + '/mod_vs_obs_XCO2_latitudinal_' + mode + '_' +
                    name_month + '.png',
                    dpi=300)

    diff = xco2_oco2_land - xco2_model_land  # diff to calculate a posteriori
    new_data = diff.to_dataset(name='diff')
    new_data['xco2_oco2'] = xco2_oco2_land
    new_data['xco2_model'] = xco2_model_land
    new_data['xco2_error'] = ds_sat['xco2_uncertainty']
    new_data['lat'] = ds_mod['lat']
    new_data['lon'] = ds_mod['lon']
    new_data['date'] = ds_mod['date']
    new_data.to_netcdf(mod_dir + 'XCO2_mod_and_oco2_' + mode + '_' +
                       name_month + '.nc')

    # Creat target dataarray and dataset
    lat_res = 2  # horizontal resolution of lat and lon you would want
    lon_res = 2.5
    lat = np.linspace(-90, 90, int(180 / lat_res + 1))  # grid
    lon = np.linspace(-180, 177.5, int(360 / lon_res))
    days = len(diff) / (len(lat) * len(lon))

    var_3d = xr.DataArray(data=np.nan,
                          dims=('days', 'lat', 'lon'),
                          coords={
                              'days': range(int(days)),
                              'lat': lat,
                              'lon': lon
                          },
                          name='diff')
    var_3d = var_3d.astype(dtype='float32')

    diff2 = xco2_model_land - xco2_oco2_land  # diff to map onto global map
    var_3d.values = diff2.values.reshape((int(days), len(lat), len(lon)))

    var_2d = var_3d.mean(dim='days')
    # plot after mapping
    fig = plt.figure(figsize=[8, 8])
    proj = ccrs.PlateCarree()
    ax = plt.subplot(111, projection=proj)
    #
    var_2d.plot(ax=ax, vmax=3, add_labels=False, cbar_kwargs={'shrink': 0.4})
    ax.set_title(name_month + ' XCO2: a posterior - OCO2', loc='left')
    ax.set_title('ppm', loc='right')
    ax.coastlines()
    ax.gridlines(linestyle='--')

    fig.savefig(mod_dir + 'diff_map_' + name_month + '.png', dpi=300)

    ds_output = var_3d.to_dataset()
    var_3d.values = xco2_model_land.values.reshape(
        (int(days), len(lat), len(lon)))
    ds_output['mod'] = var_3d.copy()
    var_3d.values = xco2_oco2_land.values.reshape(
        (int(days), len(lat), len(lon)))
    ds_output['obs'] = var_3d.copy()
    ds_output.to_netcdf(mod_dir + 'XCO2_diff_' + str(month) + '.nc')
示例#24
0
def xr_fix(dtset, model_name='NorESM'):
    """

    :param dtset:
    :param model_name:
    :return:
    """
    # print('xr_fix: Doing various fixes for %s' % model_name)
    log.ger.debug('xr_fix: Doing various fixes for %s' % model_name)
    # Rename stuff:
    # if (model_name != 'NorESM'):
    #    for key in dtset.variables:
    #        print(key)
    #        if (not sizedistribution or key not in constants.list_sized_vars_noresm):
    #            var_name_noresm = translate_var_names.model2NorESM(key, model_name)
    #
    #            if 'orig_name' not in dtset[key].attrs:
    #                dtset[key].attrs['orig_name'] = key
    #            if (len(var_name_noresm) > 0):
    #                print('Translate %s to %s ' % (key, var_name_noresm))
    #                dtset = dtset.rename({key: var_name_noresm})

    ############################
    # NorESM:
    ############################
    if model_name == 'NorESM':
        # print('So far not much to do')
        # time = dtset['time'].values  # do not cast to numpy array yet
        #
        # if isinstance(time[0], float):
        #    time_unit = dtset['time'].attrs['units']
        #    time_convert = num2date(time[:] - 15, time_unit, dtset.time.attrs['calendar'])
        #    dtset.coords['time'] = time_convert
        NCONC_noresm = constants.sized_varListNorESM['NCONC']
        for nconc in NCONC_noresm:
            typ = 'numberconc'
            if nconc in dtset:
                # if (dtset[nconc].attrs['units'] = '#/m3'):
                _ch_unit(dtset, typ, nconc)
        NMR_noresm = constants.sized_varListNorESM['NMR']
        for nmr in NMR_noresm:
            typ = 'NMR'
            if nmr in dtset:
                if dtset[nmr].attrs['units'] == 'm':
                    _ch_unit(dtset, typ, nmr)
        if 'NNAT_0' in dtset.data_vars:
            dtset['SIGMA00'] = dtset['NNAT_0'] * 0 + 1.6  # Kirkevag et al 2018
            dtset['SIGMA00'].attrs['units'] = '-'  # Kirkevag et al 2018
            dtset[
                'NMR00'] = dtset['NNAT_0'] * 0 + 62.6  # nm Kirkevag et al 2018
            dtset['NMR00'].attrs['units'] = 'nm'  # nm Kirkevag et al 2018
            dtset['NCONC00'] = dtset['NNAT_0']
        for cvar in ['AWNC']:
            if cvar in dtset:
                if dtset[cvar].units == 'm-3':
                    dtset[cvar].values = 1.e-6 * dtset[cvar].values
                    dtset[cvar].attrs['units'] = '#/cm^3'
        for cvar in ['ACTNI', 'ACTNL']:
            if cvar in dtset:
                if dtset[cvar].units != '#/cm^3':
                    dtset[cvar].values = 1.e-6 * dtset[cvar].values
                    dtset[cvar].attrs['units'] = '#/cm^3'

        #while cont:
        for i in range(10):

            typ = 'numberconc'
            varSEC = 'nrSO4_SEC%02.0f' % i
            if varSEC in dtset.data_vars:
                _ch_unit(dtset, typ, varSEC)

        for i in range(10):
            varSEC = 'nrSOA_SEC%02.0f' % i
            typ = 'numberconc'
            if varSEC in dtset.data_vars:
                _ch_unit(dtset, typ, varSEC)
        #for mm_var in ['SOA_NA','SO4_NA','SOA_A1','SO4_A1']:
        #    typ='mixingratio'
        #    if mm_var in dtset.data_vars:
        #        _ch_unit(dtset,typ,mm_var)

        for sec_var in [
                'N_secmod', 'nrSO4_SEC_tot', 'nrSOA_SEC_tot', 'nrSEC_tot'
        ] + ['nrSEC%02.0f' % ii for ii in range(1, 6)]:
            typ = 'numberconc'
            if sec_var in dtset:
                if dtset[sec_var].attrs['units'] == 'unit':
                    _ch_unit(dtset, typ, sec_var)
        for ii in np.arange(1, 6):
            typ = 'numberconc'
            sec_nr = 'nrSOA_SEC%02.0f' % ii
            if sec_nr in dtset:
                if dtset[sec_nr].attrs['units'] == 'unit':
                    _ch_unit(dtset, typ, sec_nr)

            sec_nr = 'nrSO4_SEC%02.0f' % ii
            if sec_nr in dtset:
                typ = 'numberconc'
                if dtset[sec_nr].attrs['units'] == 'unit':
                    _ch_unit(dtset, typ, sec_nr)
                    # dtset[sec_nr].values = dtset[sec_nr].values * 1e-6
                    # dtset[sec_nr].attrs['units'] = 'cm-3'

    # get weights:
    if 'lat' in dtset:
        if 'gw' in dtset.data_vars:
            dtset['lat_wg'] = dtset['gw']
        else:
            wgts_ = area_mod.get_wghts_v2(dtset)
            dtset['lat_wg'] = xr.DataArray(wgts_,
                                           coords=[dtset.coords['lat']],
                                           dims=['lat'],
                                           name='lat_wg')
    if 'lon' in dtset:
        if np.min(dtset['lon'].values) >= 0:
            log.ger.debug('xr_fix: shifting lon to -180-->180')
            dtset.coords['lon'] = (dtset['lon'] + 180) % 360 - 180
            dtset = dtset.sortby('lon')

    # index = ['lev is dimension', 'orig_name', 'units']
    # for var in dtset.data_vars:
    #    keys = []
    #    var_entery = []
    #    if 'orig_name' in dtset[var].attrs:
    #        keys.append('original_var_name')
    #        var_entery.append(dtset[var].attrs['orig_name'])
    #    if 'units' in dtset[var].attrs:
    #        keys.append('units')
    #        var_entery.append(dtset[var].attrs['units'])
    #    keys.append('lev_is_dim')
    #    var_entery.append(int('lev' in dtset[var].coords))
    #    var_overview_sql.open_and_create_var_entery(model_name,
    #                                                               dtset.attrs['case_name'],
    #                                                               var, var_entery, keys)

    # dtset.attrs['startyear'] = int(dtset['time.year'].min())
    # dtset.attrs['endyear'] = int(dtset['time.year'].max())

    return dtset
示例#25
0
yrst = 1958
yrend = 2016
ventsec = 7
lastinit = 4217

# Ariane input
ds_initial = xr.open_mfdataset(filepath_initial,combine='nested',concat_dim='ntraj')
ds_initial.init_volume.name = 'init_volume'
# Ariane output
ds = xr.open_mfdataset(filepath,combine='nested',concat_dim='ntraj')
ds = xr.merge([ds, ds_initial.init_volume])
ds['final_age'] = ds.final_age.astype('timedelta64[s]').astype('float64')/spy
ds['final_dens'] = calc_sigmantr(ds.final_temp,ds.final_salt)
# Model times
time_vals = np.append(np.array([0]),sio.loadmat(filepath_time)['time'].squeeze())
time = xr.DataArray(time_vals,dims=['nfile'],coords={'nfile':np.arange(time_vals.size)})
# Reagion limits
region_limits = np.loadtxt(filepath_region)

# Bins
years = np.arange(yrst,yrend+1)
ages = np.arange(-3/12,yrend-yrst+9/12)
densities = np.arange(27.7,28,0.01)
init_t_unique = np.unique(ds.init_t)
inits = np.append(init_t_unique-0.5,init_t_unique[-1]+0.5)
xs = np.arange(region_limits[0,0],region_limits[0,1])
ys = np.arange(region_limits[1,0],region_limits[1,1])

var1 = ds.final_x
var2 = ds.final_y
var3 = ds.final_age
示例#26
0
def construct_tripolar_grid(
    point_type="t", add_attrs=False, attr_fmt="gfdl", retain_coords=False
):
    """Generate a tripolar grid based on a real 5-degree MOM6 configuration

    Parameters
    ----------
    point_type : str, optional
        Grid type. Options are t, u, v, c. By default "t" (tracer)
    add_attrs : bool, optional
        Include lat and lon variable attributes, by default False
    attr_fmt : str, optional
        Modeling center attribute format, by default "gfdl"
    retain_coords : bool, optional
        Keep geolon, geolat, and wet in the dataset, by default False

    Returns
    -------
    xarray.Dataset
        Shell dataset with masked variable and ocean depth field
    """

    ds_in = pkgr.resource_filename("mdtf_test_data", "resources/ocean_static_5deg.nc")
    ds_in = xr.open_dataset(ds_in)

    # -- if CMIP format is requested, use CESM version as output
    attr_fmt = "ncar" if attr_fmt == "cmip" else attr_fmt

    if point_type == "t":
        lat = ds_in["geolat"]
        lon = ds_in["geolon"]
        wet = ds_in["wet"]
    elif point_type == "u":
        lat = ds_in["geolat_u"]
        lon = ds_in["geolon_u"]
        wet = ds_in["wet_u"]
    elif point_type == "v":
        lat = ds_in["geolat_v"]
        lon = ds_in["geolon_v"]
        wet = ds_in["wet_v"]
    elif point_type == "c":
        lat = ds_in["geolat_c"]
        lon = ds_in["geolon_c"]
        wet = ds_in["wet_c"]

    dset = xr.Dataset()
    dset["mask"] = xr.where(wet == 0.0, np.nan, 1.0)

    if point_type == "t":
        dset["depth"] = xr.DataArray(ds_in["depth_ocean"])

    if retain_coords is True:
        dset[lat.name] = lat
        dset[lon.name] = lon
        dset[wet.name] = wet

        if add_attrs is True:
            if attr_fmt == "gfdl":
                dset[lat.name].attrs = {}
                dset[lon.name].attrs = {}
                dset[wet.name].attrs = {}
            elif attr_fmt == "ncar":
                dset[lat.name].attrs = {
                    "axis": "Y",
                    "standard_name": "latitude",
                    "title": "Latitude",
                    "type": "double",
                    "units": "degrees_north",
                    "valid_max": 90.0,
                    "valid_min": -90.0,
                }
                dset[lon.name].attrs = {
                    "axis": "X",
                    "standard_name": "longitude",
                    "title": "Longitude",
                    "type": "double",
                    "units": "degrees_east",
                    "valid_max": 360.0,
                    "valid_min": 0.0,
                }
                dset[wet.name].attrs = {}
            else:
                raise ValueError("Unknown attribute format")

        else:
            dset[lat.name].attrs = {}
            dset[lon.name].attrs = {}
            dset[wet.name].attrs = {}

    if attr_fmt == "ncar":
        dset = dset.rename({"xh": "nlon", "yh": "nlat"})

        lat_range = np.array(np.arange(1, len(dset["nlat"]) + 1, dtype=np.intc))
        dset["nlat"] = xr.DataArray(lat_range, dims=("nlat"))
        dset["nlat"].attrs = {
            "long_name": "cell index along second dimension",
            "units": "1",
        }

        lon_range = np.array(np.arange(1, len(dset["nlon"]) + 1, dtype=np.intc))
        dset["nlon"] = xr.DataArray(lon_range, dims=("nlon"))
        dset["nlon"].attrs = {
            "long_name": "cell index along first dimension",
            "units": "1",
        }

        dset = dset.rename({lat.name: "lat", lon.name: "lon"})

    return dset
示例#27
0
文件: read.py 项目: san57/python
def read(
    self,
    name,
    tracdir,
    tracfile,
    varnames,
    dates,
    interpol_flx=False,
    tracer=None,
    model=None,
    **kwargs
):
    """Get fluxes from pre-computed fluxes and load them into a pyCIF
    variables

    Args:
        self: the fluxes Plugin
        name: the name of the component
        tracdir, tracfile: flux directory and file format
        dates: list of dates to extract
        interpol_flx (bool): if True, interpolates fluxes at time t from
        values of surrounding available files

    """

    # Replace tracfile by available information from model
    if tracfile == "":
        tracfile = model.fluxes.file

    # Available files in the directory
    list_files = os.listdir(tracdir)
    list_available = []
    for flx_file in list_files:
        try:
            list_available.append(
                datetime.datetime.strptime(flx_file, tracfile)
            )
        except BaseException:
            continue

    list_available = np.array(list_available)

    # Reading required fluxes files
    trcr_flx = []
    for dd in dates:
        delta = dd - list_available
        mask = delta >= datetime.timedelta(0)
        imin = np.argmin(delta[mask])
        fdates = list_available[mask][imin]

        filein = fdates.strftime("{}/{}".format(tracdir, tracfile))

        data, times = readnc(filein, [name, "Times"])

        # Get the correct hour in the file
        times = [
            datetime.datetime.strptime(
                str(b"".join(s), "utf-8"), "%Y-%m-%d_%H:%M:%S"
            )
            for s in times
        ]
        hour = int((dd - times[0]).total_seconds() // 3600)

        trcr_flx.append(data[hour, ...])
    
    # Building a xarray
    xmod = xr.DataArray(
        trcr_flx, coords={"time": dates}, dims=("time", "lev", "lat", "lon")
    )

    return xmod
            print('get alpha for init_time of ',cit)
            alp=alpha.sel(init_time=DOY[cit],nregions=cR)
            # add 1 for fore_time = 0
            alp_zero = alp[0]  # begin by stealing part of another dataarray
            alp_zero = alp_zero.expand_dims('fore_time')
            alp_zero.fore_time[0] = 0
            alp_zero.load()
            alp_zero[0] = 1.0
            alp = xr.concat([alp_zero,alp],dim='fore_time')

            damped = anom.isel(time=cit).values*alp
            damped.name = 'DampedAnom'
            tmp = anom['time'].isel(time=cit).values  # +np.timedelta64(5,'D')
            damptimes = pd.to_datetime( damped.fore_time.values, unit='D',
                                                 origin = tmp)  
            damped.coords['time'] = xr.DataArray(damptimes, dims='fore_time', coords={'fore_time':damped.fore_time})
            damped = damped.swap_dims({'fore_time':'time'})
            #print('damped ',damped)
            damped = damped + ds_climo_reg
            damped = damped.where(damped>0, other=0)
            damped.plot(ax=ax1, color=cc, linewidth=2)
            
        damped.plot(ax=ax1, label=str(cdate.year)+' DampedAnom', color=cc, linewidth=2)
        ds_obs_reg.plot(ax=ax1, label=str(cdate.year)+' Observed', color='m', linewidth=8)
        ax1.set_ylabel('Sea Ice Extent\n [Millions of square km]')

    #     # 1980-2010 Historical Interquartile Range
    #     plt.fill_between(ds_per_mean.time.values, ds_per_mean + ds_per_std, 
    #                  ds_per_mean - ds_per_std, alpha=0.35, label='1980-2010\nInterquartile Range', color='m')
        ax1.set_xlim(cxlims) # fix x limits
        cylims = ax1.get_ylim()
示例#29
0
def get_lutevents(dataset,
                  scale_factor=28e-6,
                  event_dim='number_of_events',
                  x_range=(-0.31, 0.31),
                  y_range=(-0.31, 0.31)):
    """ Build an event lookup table. Assign each event location a "sort of"
        pixel ID based on its fixed grid coordinates, discretized to some step
        interval that is less than the minimum pixel spacing of 224 microrad=8
        km at nadir.

        A new location is assigned to each discretized location (mean of the
        locations of  the constituent events). The time is assigned, uniformly,
        to be the dataset's product time attribute.

        The event lookup table is accompanied by pre-accumulated data at each
        discretized location: the flash, group and event counts; total flash and
        group areas; total event energy.

        Returns a new dataset with dimension "lutevent_id", having an index of
        the same name. The dataset is a (shallow) copy, but a new xarray object.

        If needed, returned dataset lutevents can be added to the original
        dataset with dataset.update(lutevents).

        If the pixel ID were stored as a 32 bit unsigned integer,
        (0 to 4294967295) that is 65536 unique values for a square (x,y) grid,
        the minimum safe scale factor for the span of the full disk is
        (0.62e6 microradians)/65536 = 9.46 microradians
        which is a bit large. Therefore, the implementation uses 64 bit unsigned
        integers to be safe.

        Arguments:
        dataset: GLM dataset in xarray format

        Keyword arguments:
        scale_factor: discretization interval, radians (default 28e-6)
        x_range, y_range: range of possible fixed grid coordinate values
            (default -/+.31 radians, which is larger than the
            full disk at geo. Ref: GOES-R PUG Vol. 3, L1b data.)
    """
    # Make a copy of the dataset so we can update it and return a copy.
    # xarray copys are shallow/cheap, and the xarray docs promote returning new
    # datasets http://xarray.pydata.org/en/stable/combining.html
    dataset = dataset.copy()
    event_x, event_y = dataset.event_x.data, dataset.event_y.data
    event_energy = dataset.event_energy.data
    product_time = dataset.product_time.data
    ev_flash_id = dataset.event_parent_flash_id.data
    ev_group_id = dataset.event_parent_group_id.data
    flash_area = dataset.flash_area.data
    group_area = dataset.group_area.data

    xy_id = discretize_2d_location(event_x, event_y, scale_factor, x_range,
                                   y_range)
    dataset['event_parent_lutevent_id'] = xr.DataArray(xy_id,
                                                       dims=[
                                                           event_dim,
                                                       ])
    eventlut_groups = dataset.groupby('event_parent_lutevent_id')
    n_lutevents = len(eventlut_groups.groups)

    # Create a new dimension for the reduced set of events, with their
    # properties aggregated.
    # - Sum: event_energy, flash_area, group_area
    # - Mean: event_x, event_y
    # - Count: event_id; unique flash_id, group_id
    eventlut_dtype = [('lutevent_id', 'u8'), ('lutevent_x', 'f8'),
                      ('lutevent_y', 'f8'), ('lutevent_energy', 'f8'),
                      ('lutevent_count', 'f4'), ('lutevent_flash_count', 'f4'),
                      ('lutevent_group_count', 'f4'),
                      ('lutevent_total_flash_area', 'f8'),
                      ('lutevent_total_group_area', 'f8'),
                      ('lutevent_time_offset', '<M8[ns]')]

    def event_lut_iter(event_lut_groupby, flash_groupby, group_groupby):
        flash_groups = flash_groupby.groups
        group_groups = group_groupby.groups
        for xy_id, evids in event_lut_groupby.groups.items():
            flash_ids = np.unique(ev_flash_id[evids])
            group_ids = np.unique(ev_group_id[evids])
            flash_count, group_count = len(flash_ids), len(group_ids)
            total_flash_area = sum(
                (flash_area[flash_groups[fid]].sum() for fid in flash_ids))
            total_group_area = sum(
                (group_area[group_groups[gid]].sum() for gid in group_ids))
            yield (xy_id, event_x[evids].mean(), event_y[evids].mean(),
                   event_energy[evids].sum(), len(evids), flash_count,
                   group_count, total_flash_area, total_group_area,
                   product_time)

    lut_iter = event_lut_iter(eventlut_groups, dataset.groupby('flash_id'),
                              dataset.groupby('group_id'))
    event_lut = np.fromiter(lut_iter, dtype=eventlut_dtype, count=n_lutevents)
    lutevents = xr.Dataset.from_dataframe(
        pd.DataFrame(event_lut).set_index('lutevent_id'))
    dataset.update(lutevents)
    return dataset
示例#30
0
def numpy_to_xarray(array, geobox, name=None):
    """Utility to convert ndarray to DataArray, using a datacube.model.GeoBox"""
    coords=[xarray.IndexVariable(x, geobox.coords[x].values, attrs=dict(units=geobox.coords[x].units))
            for x in geobox.dims]
    return xarray.DataArray(array, coords=coords, attrs=dict(crs=geobox.crs), name=name)